1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
63 enum upper_128bits_state
70 typedef struct block_info_def
72 /* State of the upper 128bits of AVX registers at exit. */
73 enum upper_128bits_state state;
74 /* TRUE if state of the upper 128bits of AVX registers is unchanged
77 /* TRUE if block has been processed. */
79 /* TRUE if block has been scanned. */
81 /* Previous state of the upper 128bits of AVX registers at entry. */
82 enum upper_128bits_state prev;
85 #define BLOCK_INFO(B) ((block_info) (B)->aux)
87 enum call_avx256_state
89 /* Callee returns 256bit AVX register. */
90 callee_return_avx256 = -1,
91 /* Callee returns and passes 256bit AVX register. */
92 callee_return_pass_avx256,
93 /* Callee passes 256bit AVX register. */
95 /* Callee doesn't return nor passe 256bit AVX register, or no
96 256bit AVX register in function return. */
98 /* vzeroupper intrinsic. */
102 /* Check if a 256bit AVX register is referenced in stores. */
105 check_avx256_stores (rtx dest, const_rtx set, void *data)
108 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
109 || (GET_CODE (set) == SET
110 && REG_P (SET_SRC (set))
111 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
113 enum upper_128bits_state *state
114 = (enum upper_128bits_state *) data;
119 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
120 in basic block BB. Delete it if upper 128bit AVX registers are
121 unused. If it isn't deleted, move it to just before a jump insn.
123 STATE is state of the upper 128bits of AVX registers at entry. */
126 move_or_delete_vzeroupper_2 (basic_block bb,
127 enum upper_128bits_state state)
130 rtx vzeroupper_insn = NULL_RTX;
135 if (BLOCK_INFO (bb)->unchanged)
138 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
141 BLOCK_INFO (bb)->state = state;
145 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
148 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
149 bb->index, BLOCK_INFO (bb)->state);
153 BLOCK_INFO (bb)->prev = state;
156 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
161 /* BB_END changes when it is deleted. */
162 bb_end = BB_END (bb);
164 while (insn != bb_end)
166 insn = NEXT_INSN (insn);
168 if (!NONDEBUG_INSN_P (insn))
171 /* Move vzeroupper before jump/call. */
172 if (JUMP_P (insn) || CALL_P (insn))
174 if (!vzeroupper_insn)
177 if (PREV_INSN (insn) != vzeroupper_insn)
181 fprintf (dump_file, "Move vzeroupper after:\n");
182 print_rtl_single (dump_file, PREV_INSN (insn));
183 fprintf (dump_file, "before:\n");
184 print_rtl_single (dump_file, insn);
186 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
189 vzeroupper_insn = NULL_RTX;
193 pat = PATTERN (insn);
195 /* Check insn for vzeroupper intrinsic. */
196 if (GET_CODE (pat) == UNSPEC_VOLATILE
197 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
201 /* Found vzeroupper intrinsic. */
202 fprintf (dump_file, "Found vzeroupper:\n");
203 print_rtl_single (dump_file, insn);
208 /* Check insn for vzeroall intrinsic. */
209 if (GET_CODE (pat) == PARALLEL
210 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
211 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
216 /* Delete pending vzeroupper insertion. */
219 delete_insn (vzeroupper_insn);
220 vzeroupper_insn = NULL_RTX;
223 else if (state != used)
225 note_stores (pat, check_avx256_stores, &state);
232 /* Process vzeroupper intrinsic. */
233 avx256 = INTVAL (XVECEXP (pat, 0, 0));
237 /* Since the upper 128bits are cleared, callee must not pass
238 256bit AVX register. We only need to check if callee
239 returns 256bit AVX register. */
240 if (avx256 == callee_return_avx256)
246 /* Remove unnecessary vzeroupper since upper 128bits are
250 fprintf (dump_file, "Delete redundant vzeroupper:\n");
251 print_rtl_single (dump_file, insn);
257 /* Set state to UNUSED if callee doesn't return 256bit AVX
259 if (avx256 != callee_return_pass_avx256)
262 if (avx256 == callee_return_pass_avx256
263 || avx256 == callee_pass_avx256)
265 /* Must remove vzeroupper since callee passes in 256bit
269 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
270 print_rtl_single (dump_file, insn);
276 vzeroupper_insn = insn;
282 BLOCK_INFO (bb)->state = state;
283 BLOCK_INFO (bb)->unchanged = unchanged;
284 BLOCK_INFO (bb)->scanned = true;
287 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
288 bb->index, unchanged ? "unchanged" : "changed",
292 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
293 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
294 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
298 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
302 enum upper_128bits_state state, old_state, new_state;
306 fprintf (dump_file, " Process [bb %i]: status: %d\n",
307 block->index, BLOCK_INFO (block)->processed);
309 if (BLOCK_INFO (block)->processed)
314 /* Check all predecessor edges of this block. */
315 seen_unknown = false;
316 FOR_EACH_EDGE (e, ei, block->preds)
320 switch (BLOCK_INFO (e->src)->state)
323 if (!unknown_is_unused)
337 old_state = BLOCK_INFO (block)->state;
338 move_or_delete_vzeroupper_2 (block, state);
339 new_state = BLOCK_INFO (block)->state;
341 if (state != unknown || new_state == used)
342 BLOCK_INFO (block)->processed = true;
344 /* Need to rescan if the upper 128bits of AVX registers are changed
346 if (new_state != old_state)
348 if (new_state == used)
349 cfun->machine->rescan_vzeroupper_p = 1;
356 /* Go through the instruction stream looking for vzeroupper. Delete
357 it if upper 128bit AVX registers are unused. If it isn't deleted,
358 move it to just before a jump insn. */
361 move_or_delete_vzeroupper (void)
366 fibheap_t worklist, pending, fibheap_swap;
367 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
372 /* Set up block info for each basic block. */
373 alloc_aux_for_blocks (sizeof (struct block_info_def));
375 /* Process outgoing edges of entry point. */
377 fprintf (dump_file, "Process outgoing edges of entry point\n");
379 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
381 move_or_delete_vzeroupper_2 (e->dest,
382 cfun->machine->caller_pass_avx256_p
384 BLOCK_INFO (e->dest)->processed = true;
387 /* Compute reverse completion order of depth first search of the CFG
388 so that the data-flow runs faster. */
389 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
390 bb_order = XNEWVEC (int, last_basic_block);
391 pre_and_rev_post_order_compute (NULL, rc_order, false);
392 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
393 bb_order[rc_order[i]] = i;
396 worklist = fibheap_new ();
397 pending = fibheap_new ();
398 visited = sbitmap_alloc (last_basic_block);
399 in_worklist = sbitmap_alloc (last_basic_block);
400 in_pending = sbitmap_alloc (last_basic_block);
401 sbitmap_zero (in_worklist);
403 /* Don't check outgoing edges of entry point. */
404 sbitmap_ones (in_pending);
406 if (BLOCK_INFO (bb)->processed)
407 RESET_BIT (in_pending, bb->index);
410 move_or_delete_vzeroupper_1 (bb, false);
411 fibheap_insert (pending, bb_order[bb->index], bb);
415 fprintf (dump_file, "Check remaining basic blocks\n");
417 while (!fibheap_empty (pending))
419 fibheap_swap = pending;
421 worklist = fibheap_swap;
422 sbitmap_swap = in_pending;
423 in_pending = in_worklist;
424 in_worklist = sbitmap_swap;
426 sbitmap_zero (visited);
428 cfun->machine->rescan_vzeroupper_p = 0;
430 while (!fibheap_empty (worklist))
432 bb = (basic_block) fibheap_extract_min (worklist);
433 RESET_BIT (in_worklist, bb->index);
434 gcc_assert (!TEST_BIT (visited, bb->index));
435 if (!TEST_BIT (visited, bb->index))
439 SET_BIT (visited, bb->index);
441 if (move_or_delete_vzeroupper_1 (bb, false))
442 FOR_EACH_EDGE (e, ei, bb->succs)
444 if (e->dest == EXIT_BLOCK_PTR
445 || BLOCK_INFO (e->dest)->processed)
448 if (TEST_BIT (visited, e->dest->index))
450 if (!TEST_BIT (in_pending, e->dest->index))
452 /* Send E->DEST to next round. */
453 SET_BIT (in_pending, e->dest->index);
454 fibheap_insert (pending,
455 bb_order[e->dest->index],
459 else if (!TEST_BIT (in_worklist, e->dest->index))
461 /* Add E->DEST to current round. */
462 SET_BIT (in_worklist, e->dest->index);
463 fibheap_insert (worklist, bb_order[e->dest->index],
470 if (!cfun->machine->rescan_vzeroupper_p)
475 fibheap_delete (worklist);
476 fibheap_delete (pending);
477 sbitmap_free (visited);
478 sbitmap_free (in_worklist);
479 sbitmap_free (in_pending);
482 fprintf (dump_file, "Process remaining basic blocks\n");
485 move_or_delete_vzeroupper_1 (bb, true);
487 free_aux_for_blocks ();
490 static rtx legitimize_dllimport_symbol (rtx, bool);
492 #ifndef CHECK_STACK_LIMIT
493 #define CHECK_STACK_LIMIT (-1)
496 /* Return index of given mode in mult and division cost tables. */
497 #define MODE_INDEX(mode) \
498 ((mode) == QImode ? 0 \
499 : (mode) == HImode ? 1 \
500 : (mode) == SImode ? 2 \
501 : (mode) == DImode ? 3 \
504 /* Processor costs (relative to an add) */
505 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
506 #define COSTS_N_BYTES(N) ((N) * 2)
508 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
511 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
512 COSTS_N_BYTES (2), /* cost of an add instruction */
513 COSTS_N_BYTES (3), /* cost of a lea instruction */
514 COSTS_N_BYTES (2), /* variable shift costs */
515 COSTS_N_BYTES (3), /* constant shift costs */
516 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
517 COSTS_N_BYTES (3), /* HI */
518 COSTS_N_BYTES (3), /* SI */
519 COSTS_N_BYTES (3), /* DI */
520 COSTS_N_BYTES (5)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
523 COSTS_N_BYTES (3), /* HI */
524 COSTS_N_BYTES (3), /* SI */
525 COSTS_N_BYTES (3), /* DI */
526 COSTS_N_BYTES (5)}, /* other */
527 COSTS_N_BYTES (3), /* cost of movsx */
528 COSTS_N_BYTES (3), /* cost of movzx */
529 0, /* "large" insn */
531 2, /* cost for loading QImode using movzbl */
532 {2, 2, 2}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 2, 2}, /* cost of storing integer registers */
536 2, /* cost of reg,reg fld/fst */
537 {2, 2, 2}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {2, 2, 2}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 3, /* cost of moving MMX register */
542 {3, 3}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {3, 3}, /* cost of storing MMX registers
545 in SImode and DImode */
546 3, /* cost of moving SSE register */
547 {3, 3, 3}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {3, 3, 3}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 3, /* MMX or SSE register to integer */
552 0, /* size of l1 cache */
553 0, /* size of l2 cache */
554 0, /* size of prefetch block */
555 0, /* number of parallel prefetches */
557 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
558 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
559 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
560 COSTS_N_BYTES (2), /* cost of FABS instruction. */
561 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
562 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
563 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
564 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
565 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
566 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 1, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 1, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
580 /* Processor costs (relative to an add) */
582 struct processor_costs i386_cost = { /* 386 specific costs */
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (1), /* cost of a lea instruction */
585 COSTS_N_INSNS (3), /* variable shift costs */
586 COSTS_N_INSNS (2), /* constant shift costs */
587 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (6), /* HI */
589 COSTS_N_INSNS (6), /* SI */
590 COSTS_N_INSNS (6), /* DI */
591 COSTS_N_INSNS (6)}, /* other */
592 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (23), /* HI */
595 COSTS_N_INSNS (23), /* SI */
596 COSTS_N_INSNS (23), /* DI */
597 COSTS_N_INSNS (23)}, /* other */
598 COSTS_N_INSNS (3), /* cost of movsx */
599 COSTS_N_INSNS (2), /* cost of movzx */
600 15, /* "large" insn */
602 4, /* cost for loading QImode using movzbl */
603 {2, 4, 2}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {2, 4, 2}, /* cost of storing integer registers */
607 2, /* cost of reg,reg fld/fst */
608 {8, 8, 8}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {8, 8, 8}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {4, 8}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {4, 8}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {4, 8, 16}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {4, 8, 16}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 3, /* MMX or SSE register to integer */
623 0, /* size of l1 cache */
624 0, /* size of l2 cache */
625 0, /* size of prefetch block */
626 0, /* number of parallel prefetches */
628 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (22), /* cost of FABS instruction. */
632 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
634 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 1, /* scalar_stmt_cost. */
639 1, /* scalar load_cost. */
640 1, /* scalar_store_cost. */
641 1, /* vec_stmt_cost. */
642 1, /* vec_to_scalar_cost. */
643 1, /* scalar_to_vec_cost. */
644 1, /* vec_align_load_cost. */
645 2, /* vec_unalign_load_cost. */
646 1, /* vec_store_cost. */
647 3, /* cond_taken_branch_cost. */
648 1, /* cond_not_taken_branch_cost. */
652 struct processor_costs i486_cost = { /* 486 specific costs */
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (1), /* cost of a lea instruction */
655 COSTS_N_INSNS (3), /* variable shift costs */
656 COSTS_N_INSNS (2), /* constant shift costs */
657 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (12), /* HI */
659 COSTS_N_INSNS (12), /* SI */
660 COSTS_N_INSNS (12), /* DI */
661 COSTS_N_INSNS (12)}, /* other */
662 1, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (40), /* HI */
665 COSTS_N_INSNS (40), /* SI */
666 COSTS_N_INSNS (40), /* DI */
667 COSTS_N_INSNS (40)}, /* other */
668 COSTS_N_INSNS (3), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 15, /* "large" insn */
672 4, /* cost for loading QImode using movzbl */
673 {2, 4, 2}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 4, 2}, /* cost of storing integer registers */
677 2, /* cost of reg,reg fld/fst */
678 {8, 8, 8}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {8, 8, 8}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 2, /* cost of moving MMX register */
683 {4, 8}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {4, 8}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {4, 8, 16}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {4, 8, 16}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 3, /* MMX or SSE register to integer */
693 4, /* size of l1 cache. 486 has 8kB cache
694 shared for code and data, so 4kB is
695 not really precise. */
696 4, /* size of l2 cache */
697 0, /* size of prefetch block */
698 0, /* number of parallel prefetches */
700 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
701 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
702 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
703 COSTS_N_INSNS (3), /* cost of FABS instruction. */
704 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
705 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
706 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
707 DUMMY_STRINGOP_ALGS},
708 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 1, /* scalar_stmt_cost. */
711 1, /* scalar load_cost. */
712 1, /* scalar_store_cost. */
713 1, /* vec_stmt_cost. */
714 1, /* vec_to_scalar_cost. */
715 1, /* scalar_to_vec_cost. */
716 1, /* vec_align_load_cost. */
717 2, /* vec_unalign_load_cost. */
718 1, /* vec_store_cost. */
719 3, /* cond_taken_branch_cost. */
720 1, /* cond_not_taken_branch_cost. */
724 struct processor_costs pentium_cost = {
725 COSTS_N_INSNS (1), /* cost of an add instruction */
726 COSTS_N_INSNS (1), /* cost of a lea instruction */
727 COSTS_N_INSNS (4), /* variable shift costs */
728 COSTS_N_INSNS (1), /* constant shift costs */
729 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
730 COSTS_N_INSNS (11), /* HI */
731 COSTS_N_INSNS (11), /* SI */
732 COSTS_N_INSNS (11), /* DI */
733 COSTS_N_INSNS (11)}, /* other */
734 0, /* cost of multiply per each bit set */
735 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
736 COSTS_N_INSNS (25), /* HI */
737 COSTS_N_INSNS (25), /* SI */
738 COSTS_N_INSNS (25), /* DI */
739 COSTS_N_INSNS (25)}, /* other */
740 COSTS_N_INSNS (3), /* cost of movsx */
741 COSTS_N_INSNS (2), /* cost of movzx */
742 8, /* "large" insn */
744 6, /* cost for loading QImode using movzbl */
745 {2, 4, 2}, /* cost of loading integer registers
746 in QImode, HImode and SImode.
747 Relative to reg-reg move (2). */
748 {2, 4, 2}, /* cost of storing integer registers */
749 2, /* cost of reg,reg fld/fst */
750 {2, 2, 6}, /* cost of loading fp registers
751 in SFmode, DFmode and XFmode */
752 {4, 4, 6}, /* cost of storing fp registers
753 in SFmode, DFmode and XFmode */
754 8, /* cost of moving MMX register */
755 {8, 8}, /* cost of loading MMX registers
756 in SImode and DImode */
757 {8, 8}, /* cost of storing MMX registers
758 in SImode and DImode */
759 2, /* cost of moving SSE register */
760 {4, 8, 16}, /* cost of loading SSE registers
761 in SImode, DImode and TImode */
762 {4, 8, 16}, /* cost of storing SSE registers
763 in SImode, DImode and TImode */
764 3, /* MMX or SSE register to integer */
765 8, /* size of l1 cache. */
766 8, /* size of l2 cache */
767 0, /* size of prefetch block */
768 0, /* number of parallel prefetches */
770 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (1), /* cost of FABS instruction. */
774 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
776 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
777 DUMMY_STRINGOP_ALGS},
778 {{libcall, {{-1, rep_prefix_4_byte}}},
779 DUMMY_STRINGOP_ALGS},
780 1, /* scalar_stmt_cost. */
781 1, /* scalar load_cost. */
782 1, /* scalar_store_cost. */
783 1, /* vec_stmt_cost. */
784 1, /* vec_to_scalar_cost. */
785 1, /* scalar_to_vec_cost. */
786 1, /* vec_align_load_cost. */
787 2, /* vec_unalign_load_cost. */
788 1, /* vec_store_cost. */
789 3, /* cond_taken_branch_cost. */
790 1, /* cond_not_taken_branch_cost. */
794 struct processor_costs pentiumpro_cost = {
795 COSTS_N_INSNS (1), /* cost of an add instruction */
796 COSTS_N_INSNS (1), /* cost of a lea instruction */
797 COSTS_N_INSNS (1), /* variable shift costs */
798 COSTS_N_INSNS (1), /* constant shift costs */
799 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
800 COSTS_N_INSNS (4), /* HI */
801 COSTS_N_INSNS (4), /* SI */
802 COSTS_N_INSNS (4), /* DI */
803 COSTS_N_INSNS (4)}, /* other */
804 0, /* cost of multiply per each bit set */
805 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
806 COSTS_N_INSNS (17), /* HI */
807 COSTS_N_INSNS (17), /* SI */
808 COSTS_N_INSNS (17), /* DI */
809 COSTS_N_INSNS (17)}, /* other */
810 COSTS_N_INSNS (1), /* cost of movsx */
811 COSTS_N_INSNS (1), /* cost of movzx */
812 8, /* "large" insn */
814 2, /* cost for loading QImode using movzbl */
815 {4, 4, 4}, /* cost of loading integer registers
816 in QImode, HImode and SImode.
817 Relative to reg-reg move (2). */
818 {2, 2, 2}, /* cost of storing integer registers */
819 2, /* cost of reg,reg fld/fst */
820 {2, 2, 6}, /* cost of loading fp registers
821 in SFmode, DFmode and XFmode */
822 {4, 4, 6}, /* cost of storing fp registers
823 in SFmode, DFmode and XFmode */
824 2, /* cost of moving MMX register */
825 {2, 2}, /* cost of loading MMX registers
826 in SImode and DImode */
827 {2, 2}, /* cost of storing MMX registers
828 in SImode and DImode */
829 2, /* cost of moving SSE register */
830 {2, 2, 8}, /* cost of loading SSE registers
831 in SImode, DImode and TImode */
832 {2, 2, 8}, /* cost of storing SSE registers
833 in SImode, DImode and TImode */
834 3, /* MMX or SSE register to integer */
835 8, /* size of l1 cache. */
836 256, /* size of l2 cache */
837 32, /* size of prefetch block */
838 6, /* number of parallel prefetches */
840 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
841 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
842 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
843 COSTS_N_INSNS (2), /* cost of FABS instruction. */
844 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
845 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
846 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
847 (we ensure the alignment). For small blocks inline loop is still a
848 noticeable win, for bigger blocks either rep movsl or rep movsb is
849 way to go. Rep movsb has apparently more expensive startup time in CPU,
850 but after 4K the difference is down in the noise. */
851 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
852 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
853 DUMMY_STRINGOP_ALGS},
854 {{rep_prefix_4_byte, {{1024, unrolled_loop},
855 {8192, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 1, /* scalar_stmt_cost. */
858 1, /* scalar load_cost. */
859 1, /* scalar_store_cost. */
860 1, /* vec_stmt_cost. */
861 1, /* vec_to_scalar_cost. */
862 1, /* scalar_to_vec_cost. */
863 1, /* vec_align_load_cost. */
864 2, /* vec_unalign_load_cost. */
865 1, /* vec_store_cost. */
866 3, /* cond_taken_branch_cost. */
867 1, /* cond_not_taken_branch_cost. */
871 struct processor_costs geode_cost = {
872 COSTS_N_INSNS (1), /* cost of an add instruction */
873 COSTS_N_INSNS (1), /* cost of a lea instruction */
874 COSTS_N_INSNS (2), /* variable shift costs */
875 COSTS_N_INSNS (1), /* constant shift costs */
876 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
877 COSTS_N_INSNS (4), /* HI */
878 COSTS_N_INSNS (7), /* SI */
879 COSTS_N_INSNS (7), /* DI */
880 COSTS_N_INSNS (7)}, /* other */
881 0, /* cost of multiply per each bit set */
882 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
883 COSTS_N_INSNS (23), /* HI */
884 COSTS_N_INSNS (39), /* SI */
885 COSTS_N_INSNS (39), /* DI */
886 COSTS_N_INSNS (39)}, /* other */
887 COSTS_N_INSNS (1), /* cost of movsx */
888 COSTS_N_INSNS (1), /* cost of movzx */
889 8, /* "large" insn */
891 1, /* cost for loading QImode using movzbl */
892 {1, 1, 1}, /* cost of loading integer registers
893 in QImode, HImode and SImode.
894 Relative to reg-reg move (2). */
895 {1, 1, 1}, /* cost of storing integer registers */
896 1, /* cost of reg,reg fld/fst */
897 {1, 1, 1}, /* cost of loading fp registers
898 in SFmode, DFmode and XFmode */
899 {4, 6, 6}, /* cost of storing fp registers
900 in SFmode, DFmode and XFmode */
902 1, /* cost of moving MMX register */
903 {1, 1}, /* cost of loading MMX registers
904 in SImode and DImode */
905 {1, 1}, /* cost of storing MMX registers
906 in SImode and DImode */
907 1, /* cost of moving SSE register */
908 {1, 1, 1}, /* cost of loading SSE registers
909 in SImode, DImode and TImode */
910 {1, 1, 1}, /* cost of storing SSE registers
911 in SImode, DImode and TImode */
912 1, /* MMX or SSE register to integer */
913 64, /* size of l1 cache. */
914 128, /* size of l2 cache. */
915 32, /* size of prefetch block */
916 1, /* number of parallel prefetches */
918 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
919 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
920 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
921 COSTS_N_INSNS (1), /* cost of FABS instruction. */
922 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
923 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
924 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
925 DUMMY_STRINGOP_ALGS},
926 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 1, /* scalar_stmt_cost. */
929 1, /* scalar load_cost. */
930 1, /* scalar_store_cost. */
931 1, /* vec_stmt_cost. */
932 1, /* vec_to_scalar_cost. */
933 1, /* scalar_to_vec_cost. */
934 1, /* vec_align_load_cost. */
935 2, /* vec_unalign_load_cost. */
936 1, /* vec_store_cost. */
937 3, /* cond_taken_branch_cost. */
938 1, /* cond_not_taken_branch_cost. */
942 struct processor_costs k6_cost = {
943 COSTS_N_INSNS (1), /* cost of an add instruction */
944 COSTS_N_INSNS (2), /* cost of a lea instruction */
945 COSTS_N_INSNS (1), /* variable shift costs */
946 COSTS_N_INSNS (1), /* constant shift costs */
947 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
948 COSTS_N_INSNS (3), /* HI */
949 COSTS_N_INSNS (3), /* SI */
950 COSTS_N_INSNS (3), /* DI */
951 COSTS_N_INSNS (3)}, /* other */
952 0, /* cost of multiply per each bit set */
953 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
954 COSTS_N_INSNS (18), /* HI */
955 COSTS_N_INSNS (18), /* SI */
956 COSTS_N_INSNS (18), /* DI */
957 COSTS_N_INSNS (18)}, /* other */
958 COSTS_N_INSNS (2), /* cost of movsx */
959 COSTS_N_INSNS (2), /* cost of movzx */
960 8, /* "large" insn */
962 3, /* cost for loading QImode using movzbl */
963 {4, 5, 4}, /* cost of loading integer registers
964 in QImode, HImode and SImode.
965 Relative to reg-reg move (2). */
966 {2, 3, 2}, /* cost of storing integer registers */
967 4, /* cost of reg,reg fld/fst */
968 {6, 6, 6}, /* cost of loading fp registers
969 in SFmode, DFmode and XFmode */
970 {4, 4, 4}, /* cost of storing fp registers
971 in SFmode, DFmode and XFmode */
972 2, /* cost of moving MMX register */
973 {2, 2}, /* cost of loading MMX registers
974 in SImode and DImode */
975 {2, 2}, /* cost of storing MMX registers
976 in SImode and DImode */
977 2, /* cost of moving SSE register */
978 {2, 2, 8}, /* cost of loading SSE registers
979 in SImode, DImode and TImode */
980 {2, 2, 8}, /* cost of storing SSE registers
981 in SImode, DImode and TImode */
982 6, /* MMX or SSE register to integer */
983 32, /* size of l1 cache. */
984 32, /* size of l2 cache. Some models
985 have integrated l2 cache, but
986 optimizing for k6 is not important
987 enough to worry about that. */
988 32, /* size of prefetch block */
989 1, /* number of parallel prefetches */
991 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
992 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
993 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
994 COSTS_N_INSNS (2), /* cost of FABS instruction. */
995 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
996 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
997 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
998 DUMMY_STRINGOP_ALGS},
999 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
1000 DUMMY_STRINGOP_ALGS},
1001 1, /* scalar_stmt_cost. */
1002 1, /* scalar load_cost. */
1003 1, /* scalar_store_cost. */
1004 1, /* vec_stmt_cost. */
1005 1, /* vec_to_scalar_cost. */
1006 1, /* scalar_to_vec_cost. */
1007 1, /* vec_align_load_cost. */
1008 2, /* vec_unalign_load_cost. */
1009 1, /* vec_store_cost. */
1010 3, /* cond_taken_branch_cost. */
1011 1, /* cond_not_taken_branch_cost. */
1015 struct processor_costs athlon_cost = {
1016 COSTS_N_INSNS (1), /* cost of an add instruction */
1017 COSTS_N_INSNS (2), /* cost of a lea instruction */
1018 COSTS_N_INSNS (1), /* variable shift costs */
1019 COSTS_N_INSNS (1), /* constant shift costs */
1020 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1021 COSTS_N_INSNS (5), /* HI */
1022 COSTS_N_INSNS (5), /* SI */
1023 COSTS_N_INSNS (5), /* DI */
1024 COSTS_N_INSNS (5)}, /* other */
1025 0, /* cost of multiply per each bit set */
1026 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1027 COSTS_N_INSNS (26), /* HI */
1028 COSTS_N_INSNS (42), /* SI */
1029 COSTS_N_INSNS (74), /* DI */
1030 COSTS_N_INSNS (74)}, /* other */
1031 COSTS_N_INSNS (1), /* cost of movsx */
1032 COSTS_N_INSNS (1), /* cost of movzx */
1033 8, /* "large" insn */
1035 4, /* cost for loading QImode using movzbl */
1036 {3, 4, 3}, /* cost of loading integer registers
1037 in QImode, HImode and SImode.
1038 Relative to reg-reg move (2). */
1039 {3, 4, 3}, /* cost of storing integer registers */
1040 4, /* cost of reg,reg fld/fst */
1041 {4, 4, 12}, /* cost of loading fp registers
1042 in SFmode, DFmode and XFmode */
1043 {6, 6, 8}, /* cost of storing fp registers
1044 in SFmode, DFmode and XFmode */
1045 2, /* cost of moving MMX register */
1046 {4, 4}, /* cost of loading MMX registers
1047 in SImode and DImode */
1048 {4, 4}, /* cost of storing MMX registers
1049 in SImode and DImode */
1050 2, /* cost of moving SSE register */
1051 {4, 4, 6}, /* cost of loading SSE registers
1052 in SImode, DImode and TImode */
1053 {4, 4, 5}, /* cost of storing SSE registers
1054 in SImode, DImode and TImode */
1055 5, /* MMX or SSE register to integer */
1056 64, /* size of l1 cache. */
1057 256, /* size of l2 cache. */
1058 64, /* size of prefetch block */
1059 6, /* number of parallel prefetches */
1060 5, /* Branch cost */
1061 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1062 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1063 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1064 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1065 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1066 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1067 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1068 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1069 128 bytes for memset. */
1070 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1071 DUMMY_STRINGOP_ALGS},
1072 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1073 DUMMY_STRINGOP_ALGS},
1074 1, /* scalar_stmt_cost. */
1075 1, /* scalar load_cost. */
1076 1, /* scalar_store_cost. */
1077 1, /* vec_stmt_cost. */
1078 1, /* vec_to_scalar_cost. */
1079 1, /* scalar_to_vec_cost. */
1080 1, /* vec_align_load_cost. */
1081 2, /* vec_unalign_load_cost. */
1082 1, /* vec_store_cost. */
1083 3, /* cond_taken_branch_cost. */
1084 1, /* cond_not_taken_branch_cost. */
1088 struct processor_costs k8_cost = {
1089 COSTS_N_INSNS (1), /* cost of an add instruction */
1090 COSTS_N_INSNS (2), /* cost of a lea instruction */
1091 COSTS_N_INSNS (1), /* variable shift costs */
1092 COSTS_N_INSNS (1), /* constant shift costs */
1093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1094 COSTS_N_INSNS (4), /* HI */
1095 COSTS_N_INSNS (3), /* SI */
1096 COSTS_N_INSNS (4), /* DI */
1097 COSTS_N_INSNS (5)}, /* other */
1098 0, /* cost of multiply per each bit set */
1099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1100 COSTS_N_INSNS (26), /* HI */
1101 COSTS_N_INSNS (42), /* SI */
1102 COSTS_N_INSNS (74), /* DI */
1103 COSTS_N_INSNS (74)}, /* other */
1104 COSTS_N_INSNS (1), /* cost of movsx */
1105 COSTS_N_INSNS (1), /* cost of movzx */
1106 8, /* "large" insn */
1108 4, /* cost for loading QImode using movzbl */
1109 {3, 4, 3}, /* cost of loading integer registers
1110 in QImode, HImode and SImode.
1111 Relative to reg-reg move (2). */
1112 {3, 4, 3}, /* cost of storing integer registers */
1113 4, /* cost of reg,reg fld/fst */
1114 {4, 4, 12}, /* cost of loading fp registers
1115 in SFmode, DFmode and XFmode */
1116 {6, 6, 8}, /* cost of storing fp registers
1117 in SFmode, DFmode and XFmode */
1118 2, /* cost of moving MMX register */
1119 {3, 3}, /* cost of loading MMX registers
1120 in SImode and DImode */
1121 {4, 4}, /* cost of storing MMX registers
1122 in SImode and DImode */
1123 2, /* cost of moving SSE register */
1124 {4, 3, 6}, /* cost of loading SSE registers
1125 in SImode, DImode and TImode */
1126 {4, 4, 5}, /* cost of storing SSE registers
1127 in SImode, DImode and TImode */
1128 5, /* MMX or SSE register to integer */
1129 64, /* size of l1 cache. */
1130 512, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1137 100, /* number of parallel prefetches */
1138 3, /* Branch cost */
1139 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1145 /* K8 has optimized REP instruction for medium sized blocks, but for very
1146 small blocks it is better to use loop. For large blocks, libcall can
1147 do nontemporary accesses and beat inline considerably. */
1148 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1149 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1150 {{libcall, {{8, loop}, {24, unrolled_loop},
1151 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1152 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1153 4, /* scalar_stmt_cost. */
1154 2, /* scalar load_cost. */
1155 2, /* scalar_store_cost. */
1156 5, /* vec_stmt_cost. */
1157 0, /* vec_to_scalar_cost. */
1158 2, /* scalar_to_vec_cost. */
1159 2, /* vec_align_load_cost. */
1160 3, /* vec_unalign_load_cost. */
1161 3, /* vec_store_cost. */
1162 3, /* cond_taken_branch_cost. */
1163 2, /* cond_not_taken_branch_cost. */
1166 struct processor_costs amdfam10_cost = {
1167 COSTS_N_INSNS (1), /* cost of an add instruction */
1168 COSTS_N_INSNS (2), /* cost of a lea instruction */
1169 COSTS_N_INSNS (1), /* variable shift costs */
1170 COSTS_N_INSNS (1), /* constant shift costs */
1171 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1172 COSTS_N_INSNS (4), /* HI */
1173 COSTS_N_INSNS (3), /* SI */
1174 COSTS_N_INSNS (4), /* DI */
1175 COSTS_N_INSNS (5)}, /* other */
1176 0, /* cost of multiply per each bit set */
1177 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1178 COSTS_N_INSNS (35), /* HI */
1179 COSTS_N_INSNS (51), /* SI */
1180 COSTS_N_INSNS (83), /* DI */
1181 COSTS_N_INSNS (83)}, /* other */
1182 COSTS_N_INSNS (1), /* cost of movsx */
1183 COSTS_N_INSNS (1), /* cost of movzx */
1184 8, /* "large" insn */
1186 4, /* cost for loading QImode using movzbl */
1187 {3, 4, 3}, /* cost of loading integer registers
1188 in QImode, HImode and SImode.
1189 Relative to reg-reg move (2). */
1190 {3, 4, 3}, /* cost of storing integer registers */
1191 4, /* cost of reg,reg fld/fst */
1192 {4, 4, 12}, /* cost of loading fp registers
1193 in SFmode, DFmode and XFmode */
1194 {6, 6, 8}, /* cost of storing fp registers
1195 in SFmode, DFmode and XFmode */
1196 2, /* cost of moving MMX register */
1197 {3, 3}, /* cost of loading MMX registers
1198 in SImode and DImode */
1199 {4, 4}, /* cost of storing MMX registers
1200 in SImode and DImode */
1201 2, /* cost of moving SSE register */
1202 {4, 4, 3}, /* cost of loading SSE registers
1203 in SImode, DImode and TImode */
1204 {4, 4, 5}, /* cost of storing SSE registers
1205 in SImode, DImode and TImode */
1206 3, /* MMX or SSE register to integer */
1208 MOVD reg64, xmmreg Double FSTORE 4
1209 MOVD reg32, xmmreg Double FSTORE 4
1211 MOVD reg64, xmmreg Double FADD 3
1213 MOVD reg32, xmmreg Double FADD 3
1215 64, /* size of l1 cache. */
1216 512, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1232 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1233 very small blocks it is better to use loop. For large blocks, libcall can
1234 do nontemporary accesses and beat inline considerably. */
1235 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1236 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1237 {{libcall, {{8, loop}, {24, unrolled_loop},
1238 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1239 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1253 struct processor_costs bdver1_cost = {
1254 COSTS_N_INSNS (1), /* cost of an add instruction */
1255 COSTS_N_INSNS (1), /* cost of a lea instruction */
1256 COSTS_N_INSNS (1), /* variable shift costs */
1257 COSTS_N_INSNS (1), /* constant shift costs */
1258 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1259 COSTS_N_INSNS (4), /* HI */
1260 COSTS_N_INSNS (4), /* SI */
1261 COSTS_N_INSNS (6), /* DI */
1262 COSTS_N_INSNS (6)}, /* other */
1263 0, /* cost of multiply per each bit set */
1264 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1265 COSTS_N_INSNS (35), /* HI */
1266 COSTS_N_INSNS (51), /* SI */
1267 COSTS_N_INSNS (83), /* DI */
1268 COSTS_N_INSNS (83)}, /* other */
1269 COSTS_N_INSNS (1), /* cost of movsx */
1270 COSTS_N_INSNS (1), /* cost of movzx */
1271 8, /* "large" insn */
1273 4, /* cost for loading QImode using movzbl */
1274 {5, 5, 4}, /* cost of loading integer registers
1275 in QImode, HImode and SImode.
1276 Relative to reg-reg move (2). */
1277 {4, 4, 4}, /* cost of storing integer registers */
1278 2, /* cost of reg,reg fld/fst */
1279 {5, 5, 12}, /* cost of loading fp registers
1280 in SFmode, DFmode and XFmode */
1281 {4, 4, 8}, /* cost of storing fp registers
1282 in SFmode, DFmode and XFmode */
1283 2, /* cost of moving MMX register */
1284 {4, 4}, /* cost of loading MMX registers
1285 in SImode and DImode */
1286 {4, 4}, /* cost of storing MMX registers
1287 in SImode and DImode */
1288 2, /* cost of moving SSE register */
1289 {4, 4, 4}, /* cost of loading SSE registers
1290 in SImode, DImode and TImode */
1291 {4, 4, 4}, /* cost of storing SSE registers
1292 in SImode, DImode and TImode */
1293 2, /* MMX or SSE register to integer */
1295 MOVD reg64, xmmreg Double FSTORE 4
1296 MOVD reg32, xmmreg Double FSTORE 4
1298 MOVD reg64, xmmreg Double FADD 3
1300 MOVD reg32, xmmreg Double FADD 3
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1319 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1320 very small blocks it is better to use loop. For large blocks, libcall
1321 can do nontemporary accesses and beat inline considerably. */
1322 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1323 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1324 {{libcall, {{8, loop}, {24, unrolled_loop},
1325 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1326 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 6, /* scalar_stmt_cost. */
1328 4, /* scalar load_cost. */
1329 4, /* scalar_store_cost. */
1330 6, /* vec_stmt_cost. */
1331 0, /* vec_to_scalar_cost. */
1332 2, /* scalar_to_vec_cost. */
1333 4, /* vec_align_load_cost. */
1334 4, /* vec_unalign_load_cost. */
1335 4, /* vec_store_cost. */
1336 2, /* cond_taken_branch_cost. */
1337 1, /* cond_not_taken_branch_cost. */
1340 struct processor_costs btver1_cost = {
1341 COSTS_N_INSNS (1), /* cost of an add instruction */
1342 COSTS_N_INSNS (2), /* cost of a lea instruction */
1343 COSTS_N_INSNS (1), /* variable shift costs */
1344 COSTS_N_INSNS (1), /* constant shift costs */
1345 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1346 COSTS_N_INSNS (4), /* HI */
1347 COSTS_N_INSNS (3), /* SI */
1348 COSTS_N_INSNS (4), /* DI */
1349 COSTS_N_INSNS (5)}, /* other */
1350 0, /* cost of multiply per each bit set */
1351 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1352 COSTS_N_INSNS (35), /* HI */
1353 COSTS_N_INSNS (51), /* SI */
1354 COSTS_N_INSNS (83), /* DI */
1355 COSTS_N_INSNS (83)}, /* other */
1356 COSTS_N_INSNS (1), /* cost of movsx */
1357 COSTS_N_INSNS (1), /* cost of movzx */
1358 8, /* "large" insn */
1360 4, /* cost for loading QImode using movzbl */
1361 {3, 4, 3}, /* cost of loading integer registers
1362 in QImode, HImode and SImode.
1363 Relative to reg-reg move (2). */
1364 {3, 4, 3}, /* cost of storing integer registers */
1365 4, /* cost of reg,reg fld/fst */
1366 {4, 4, 12}, /* cost of loading fp registers
1367 in SFmode, DFmode and XFmode */
1368 {6, 6, 8}, /* cost of storing fp registers
1369 in SFmode, DFmode and XFmode */
1370 2, /* cost of moving MMX register */
1371 {3, 3}, /* cost of loading MMX registers
1372 in SImode and DImode */
1373 {4, 4}, /* cost of storing MMX registers
1374 in SImode and DImode */
1375 2, /* cost of moving SSE register */
1376 {4, 4, 3}, /* cost of loading SSE registers
1377 in SImode, DImode and TImode */
1378 {4, 4, 5}, /* cost of storing SSE registers
1379 in SImode, DImode and TImode */
1380 3, /* MMX or SSE register to integer */
1382 MOVD reg64, xmmreg Double FSTORE 4
1383 MOVD reg32, xmmreg Double FSTORE 4
1385 MOVD reg64, xmmreg Double FADD 3
1387 MOVD reg32, xmmreg Double FADD 3
1389 32, /* size of l1 cache. */
1390 512, /* size of l2 cache. */
1391 64, /* size of prefetch block */
1392 100, /* number of parallel prefetches */
1393 2, /* Branch cost */
1394 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1395 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1396 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1397 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1398 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1399 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1401 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1402 very small blocks it is better to use loop. For large blocks, libcall can
1403 do nontemporary accesses and beat inline considerably. */
1404 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1405 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1406 {{libcall, {{8, loop}, {24, unrolled_loop},
1407 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1408 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1409 4, /* scalar_stmt_cost. */
1410 2, /* scalar load_cost. */
1411 2, /* scalar_store_cost. */
1412 6, /* vec_stmt_cost. */
1413 0, /* vec_to_scalar_cost. */
1414 2, /* scalar_to_vec_cost. */
1415 2, /* vec_align_load_cost. */
1416 2, /* vec_unalign_load_cost. */
1417 2, /* vec_store_cost. */
1418 2, /* cond_taken_branch_cost. */
1419 1, /* cond_not_taken_branch_cost. */
1423 struct processor_costs pentium4_cost = {
1424 COSTS_N_INSNS (1), /* cost of an add instruction */
1425 COSTS_N_INSNS (3), /* cost of a lea instruction */
1426 COSTS_N_INSNS (4), /* variable shift costs */
1427 COSTS_N_INSNS (4), /* constant shift costs */
1428 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1429 COSTS_N_INSNS (15), /* HI */
1430 COSTS_N_INSNS (15), /* SI */
1431 COSTS_N_INSNS (15), /* DI */
1432 COSTS_N_INSNS (15)}, /* other */
1433 0, /* cost of multiply per each bit set */
1434 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1435 COSTS_N_INSNS (56), /* HI */
1436 COSTS_N_INSNS (56), /* SI */
1437 COSTS_N_INSNS (56), /* DI */
1438 COSTS_N_INSNS (56)}, /* other */
1439 COSTS_N_INSNS (1), /* cost of movsx */
1440 COSTS_N_INSNS (1), /* cost of movzx */
1441 16, /* "large" insn */
1443 2, /* cost for loading QImode using movzbl */
1444 {4, 5, 4}, /* cost of loading integer registers
1445 in QImode, HImode and SImode.
1446 Relative to reg-reg move (2). */
1447 {2, 3, 2}, /* cost of storing integer registers */
1448 2, /* cost of reg,reg fld/fst */
1449 {2, 2, 6}, /* cost of loading fp registers
1450 in SFmode, DFmode and XFmode */
1451 {4, 4, 6}, /* cost of storing fp registers
1452 in SFmode, DFmode and XFmode */
1453 2, /* cost of moving MMX register */
1454 {2, 2}, /* cost of loading MMX registers
1455 in SImode and DImode */
1456 {2, 2}, /* cost of storing MMX registers
1457 in SImode and DImode */
1458 12, /* cost of moving SSE register */
1459 {12, 12, 12}, /* cost of loading SSE registers
1460 in SImode, DImode and TImode */
1461 {2, 2, 8}, /* cost of storing SSE registers
1462 in SImode, DImode and TImode */
1463 10, /* MMX or SSE register to integer */
1464 8, /* size of l1 cache. */
1465 256, /* size of l2 cache. */
1466 64, /* size of prefetch block */
1467 6, /* number of parallel prefetches */
1468 2, /* Branch cost */
1469 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1470 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1471 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1474 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1475 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1476 DUMMY_STRINGOP_ALGS},
1477 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1479 DUMMY_STRINGOP_ALGS},
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1494 struct processor_costs nocona_cost = {
1495 COSTS_N_INSNS (1), /* cost of an add instruction */
1496 COSTS_N_INSNS (1), /* cost of a lea instruction */
1497 COSTS_N_INSNS (1), /* variable shift costs */
1498 COSTS_N_INSNS (1), /* constant shift costs */
1499 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1500 COSTS_N_INSNS (10), /* HI */
1501 COSTS_N_INSNS (10), /* SI */
1502 COSTS_N_INSNS (10), /* DI */
1503 COSTS_N_INSNS (10)}, /* other */
1504 0, /* cost of multiply per each bit set */
1505 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1506 COSTS_N_INSNS (66), /* HI */
1507 COSTS_N_INSNS (66), /* SI */
1508 COSTS_N_INSNS (66), /* DI */
1509 COSTS_N_INSNS (66)}, /* other */
1510 COSTS_N_INSNS (1), /* cost of movsx */
1511 COSTS_N_INSNS (1), /* cost of movzx */
1512 16, /* "large" insn */
1513 17, /* MOVE_RATIO */
1514 4, /* cost for loading QImode using movzbl */
1515 {4, 4, 4}, /* cost of loading integer registers
1516 in QImode, HImode and SImode.
1517 Relative to reg-reg move (2). */
1518 {4, 4, 4}, /* cost of storing integer registers */
1519 3, /* cost of reg,reg fld/fst */
1520 {12, 12, 12}, /* cost of loading fp registers
1521 in SFmode, DFmode and XFmode */
1522 {4, 4, 4}, /* cost of storing fp registers
1523 in SFmode, DFmode and XFmode */
1524 6, /* cost of moving MMX register */
1525 {12, 12}, /* cost of loading MMX registers
1526 in SImode and DImode */
1527 {12, 12}, /* cost of storing MMX registers
1528 in SImode and DImode */
1529 6, /* cost of moving SSE register */
1530 {12, 12, 12}, /* cost of loading SSE registers
1531 in SImode, DImode and TImode */
1532 {12, 12, 12}, /* cost of storing SSE registers
1533 in SImode, DImode and TImode */
1534 8, /* MMX or SSE register to integer */
1535 8, /* size of l1 cache. */
1536 1024, /* size of l2 cache. */
1537 128, /* size of prefetch block */
1538 8, /* number of parallel prefetches */
1539 1, /* Branch cost */
1540 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1541 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1542 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1543 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1544 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1545 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1546 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1547 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1548 {100000, unrolled_loop}, {-1, libcall}}}},
1549 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1551 {libcall, {{24, loop}, {64, unrolled_loop},
1552 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1553 1, /* scalar_stmt_cost. */
1554 1, /* scalar load_cost. */
1555 1, /* scalar_store_cost. */
1556 1, /* vec_stmt_cost. */
1557 1, /* vec_to_scalar_cost. */
1558 1, /* scalar_to_vec_cost. */
1559 1, /* vec_align_load_cost. */
1560 2, /* vec_unalign_load_cost. */
1561 1, /* vec_store_cost. */
1562 3, /* cond_taken_branch_cost. */
1563 1, /* cond_not_taken_branch_cost. */
1567 struct processor_costs atom_cost = {
1568 COSTS_N_INSNS (1), /* cost of an add instruction */
1569 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1570 COSTS_N_INSNS (1), /* variable shift costs */
1571 COSTS_N_INSNS (1), /* constant shift costs */
1572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1573 COSTS_N_INSNS (4), /* HI */
1574 COSTS_N_INSNS (3), /* SI */
1575 COSTS_N_INSNS (4), /* DI */
1576 COSTS_N_INSNS (2)}, /* other */
1577 0, /* cost of multiply per each bit set */
1578 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1579 COSTS_N_INSNS (26), /* HI */
1580 COSTS_N_INSNS (42), /* SI */
1581 COSTS_N_INSNS (74), /* DI */
1582 COSTS_N_INSNS (74)}, /* other */
1583 COSTS_N_INSNS (1), /* cost of movsx */
1584 COSTS_N_INSNS (1), /* cost of movzx */
1585 8, /* "large" insn */
1586 17, /* MOVE_RATIO */
1587 2, /* cost for loading QImode using movzbl */
1588 {4, 4, 4}, /* cost of loading integer registers
1589 in QImode, HImode and SImode.
1590 Relative to reg-reg move (2). */
1591 {4, 4, 4}, /* cost of storing integer registers */
1592 4, /* cost of reg,reg fld/fst */
1593 {12, 12, 12}, /* cost of loading fp registers
1594 in SFmode, DFmode and XFmode */
1595 {6, 6, 8}, /* cost of storing fp registers
1596 in SFmode, DFmode and XFmode */
1597 2, /* cost of moving MMX register */
1598 {8, 8}, /* cost of loading MMX registers
1599 in SImode and DImode */
1600 {8, 8}, /* cost of storing MMX registers
1601 in SImode and DImode */
1602 2, /* cost of moving SSE register */
1603 {8, 8, 8}, /* cost of loading SSE registers
1604 in SImode, DImode and TImode */
1605 {8, 8, 8}, /* cost of storing SSE registers
1606 in SImode, DImode and TImode */
1607 5, /* MMX or SSE register to integer */
1608 32, /* size of l1 cache. */
1609 256, /* size of l2 cache. */
1610 64, /* size of prefetch block */
1611 6, /* number of parallel prefetches */
1612 3, /* Branch cost */
1613 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1614 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1615 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1616 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1617 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1618 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1619 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1620 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1621 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1622 {{libcall, {{8, loop}, {15, unrolled_loop},
1623 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1624 {libcall, {{24, loop}, {32, unrolled_loop},
1625 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1626 1, /* scalar_stmt_cost. */
1627 1, /* scalar load_cost. */
1628 1, /* scalar_store_cost. */
1629 1, /* vec_stmt_cost. */
1630 1, /* vec_to_scalar_cost. */
1631 1, /* scalar_to_vec_cost. */
1632 1, /* vec_align_load_cost. */
1633 2, /* vec_unalign_load_cost. */
1634 1, /* vec_store_cost. */
1635 3, /* cond_taken_branch_cost. */
1636 1, /* cond_not_taken_branch_cost. */
1639 /* Generic64 should produce code tuned for Nocona and K8. */
1641 struct processor_costs generic64_cost = {
1642 COSTS_N_INSNS (1), /* cost of an add instruction */
1643 /* On all chips taken into consideration lea is 2 cycles and more. With
1644 this cost however our current implementation of synth_mult results in
1645 use of unnecessary temporary registers causing regression on several
1646 SPECfp benchmarks. */
1647 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1648 COSTS_N_INSNS (1), /* variable shift costs */
1649 COSTS_N_INSNS (1), /* constant shift costs */
1650 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1651 COSTS_N_INSNS (4), /* HI */
1652 COSTS_N_INSNS (3), /* SI */
1653 COSTS_N_INSNS (4), /* DI */
1654 COSTS_N_INSNS (2)}, /* other */
1655 0, /* cost of multiply per each bit set */
1656 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1657 COSTS_N_INSNS (26), /* HI */
1658 COSTS_N_INSNS (42), /* SI */
1659 COSTS_N_INSNS (74), /* DI */
1660 COSTS_N_INSNS (74)}, /* other */
1661 COSTS_N_INSNS (1), /* cost of movsx */
1662 COSTS_N_INSNS (1), /* cost of movzx */
1663 8, /* "large" insn */
1664 17, /* MOVE_RATIO */
1665 4, /* cost for loading QImode using movzbl */
1666 {4, 4, 4}, /* cost of loading integer registers
1667 in QImode, HImode and SImode.
1668 Relative to reg-reg move (2). */
1669 {4, 4, 4}, /* cost of storing integer registers */
1670 4, /* cost of reg,reg fld/fst */
1671 {12, 12, 12}, /* cost of loading fp registers
1672 in SFmode, DFmode and XFmode */
1673 {6, 6, 8}, /* cost of storing fp registers
1674 in SFmode, DFmode and XFmode */
1675 2, /* cost of moving MMX register */
1676 {8, 8}, /* cost of loading MMX registers
1677 in SImode and DImode */
1678 {8, 8}, /* cost of storing MMX registers
1679 in SImode and DImode */
1680 2, /* cost of moving SSE register */
1681 {8, 8, 8}, /* cost of loading SSE registers
1682 in SImode, DImode and TImode */
1683 {8, 8, 8}, /* cost of storing SSE registers
1684 in SImode, DImode and TImode */
1685 5, /* MMX or SSE register to integer */
1686 32, /* size of l1 cache. */
1687 512, /* size of l2 cache. */
1688 64, /* size of prefetch block */
1689 6, /* number of parallel prefetches */
1690 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1691 value is increased to perhaps more appropriate value of 5. */
1692 3, /* Branch cost */
1693 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1694 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1695 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1696 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1697 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1698 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1699 {DUMMY_STRINGOP_ALGS,
1700 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1701 {DUMMY_STRINGOP_ALGS,
1702 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1703 1, /* scalar_stmt_cost. */
1704 1, /* scalar load_cost. */
1705 1, /* scalar_store_cost. */
1706 1, /* vec_stmt_cost. */
1707 1, /* vec_to_scalar_cost. */
1708 1, /* scalar_to_vec_cost. */
1709 1, /* vec_align_load_cost. */
1710 2, /* vec_unalign_load_cost. */
1711 1, /* vec_store_cost. */
1712 3, /* cond_taken_branch_cost. */
1713 1, /* cond_not_taken_branch_cost. */
1716 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1719 struct processor_costs generic32_cost = {
1720 COSTS_N_INSNS (1), /* cost of an add instruction */
1721 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1722 COSTS_N_INSNS (1), /* variable shift costs */
1723 COSTS_N_INSNS (1), /* constant shift costs */
1724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1725 COSTS_N_INSNS (4), /* HI */
1726 COSTS_N_INSNS (3), /* SI */
1727 COSTS_N_INSNS (4), /* DI */
1728 COSTS_N_INSNS (2)}, /* other */
1729 0, /* cost of multiply per each bit set */
1730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1731 COSTS_N_INSNS (26), /* HI */
1732 COSTS_N_INSNS (42), /* SI */
1733 COSTS_N_INSNS (74), /* DI */
1734 COSTS_N_INSNS (74)}, /* other */
1735 COSTS_N_INSNS (1), /* cost of movsx */
1736 COSTS_N_INSNS (1), /* cost of movzx */
1737 8, /* "large" insn */
1738 17, /* MOVE_RATIO */
1739 4, /* cost for loading QImode using movzbl */
1740 {4, 4, 4}, /* cost of loading integer registers
1741 in QImode, HImode and SImode.
1742 Relative to reg-reg move (2). */
1743 {4, 4, 4}, /* cost of storing integer registers */
1744 4, /* cost of reg,reg fld/fst */
1745 {12, 12, 12}, /* cost of loading fp registers
1746 in SFmode, DFmode and XFmode */
1747 {6, 6, 8}, /* cost of storing fp registers
1748 in SFmode, DFmode and XFmode */
1749 2, /* cost of moving MMX register */
1750 {8, 8}, /* cost of loading MMX registers
1751 in SImode and DImode */
1752 {8, 8}, /* cost of storing MMX registers
1753 in SImode and DImode */
1754 2, /* cost of moving SSE register */
1755 {8, 8, 8}, /* cost of loading SSE registers
1756 in SImode, DImode and TImode */
1757 {8, 8, 8}, /* cost of storing SSE registers
1758 in SImode, DImode and TImode */
1759 5, /* MMX or SSE register to integer */
1760 32, /* size of l1 cache. */
1761 256, /* size of l2 cache. */
1762 64, /* size of prefetch block */
1763 6, /* number of parallel prefetches */
1764 3, /* Branch cost */
1765 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1766 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1767 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1768 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1769 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1770 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1772 DUMMY_STRINGOP_ALGS},
1773 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1774 DUMMY_STRINGOP_ALGS},
1775 1, /* scalar_stmt_cost. */
1776 1, /* scalar load_cost. */
1777 1, /* scalar_store_cost. */
1778 1, /* vec_stmt_cost. */
1779 1, /* vec_to_scalar_cost. */
1780 1, /* scalar_to_vec_cost. */
1781 1, /* vec_align_load_cost. */
1782 2, /* vec_unalign_load_cost. */
1783 1, /* vec_store_cost. */
1784 3, /* cond_taken_branch_cost. */
1785 1, /* cond_not_taken_branch_cost. */
1788 const struct processor_costs *ix86_cost = &pentium_cost;
1790 /* Processor feature/optimization bitmasks. */
1791 #define m_386 (1<<PROCESSOR_I386)
1792 #define m_486 (1<<PROCESSOR_I486)
1793 #define m_PENT (1<<PROCESSOR_PENTIUM)
1794 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1795 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1796 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1797 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1798 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1799 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1800 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1801 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1802 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1803 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1804 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1805 #define m_ATOM (1<<PROCESSOR_ATOM)
1807 #define m_GEODE (1<<PROCESSOR_GEODE)
1808 #define m_K6 (1<<PROCESSOR_K6)
1809 #define m_K6_GEODE (m_K6 | m_GEODE)
1810 #define m_K8 (1<<PROCESSOR_K8)
1811 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1812 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1813 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1814 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1815 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1816 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1818 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1819 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1821 /* Generic instruction choice should be common subset of supported CPUs
1822 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1823 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1825 /* Feature tests against the various tunings. */
1826 unsigned char ix86_tune_features[X86_TUNE_LAST];
1828 /* Feature tests against the various tunings used to create ix86_tune_features
1829 based on the processor mask. */
1830 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1831 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1832 negatively, so enabling for Generic64 seems like good code size
1833 tradeoff. We can't enable it for 32bit generic because it does not
1834 work well with PPro base chips. */
1835 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1837 /* X86_TUNE_PUSH_MEMORY */
1838 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1839 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1841 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1844 /* X86_TUNE_UNROLL_STRLEN */
1845 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1846 | m_CORE2I7 | m_GENERIC,
1848 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1849 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1850 | m_CORE2I7 | m_GENERIC,
1852 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1853 on simulation result. But after P4 was made, no performance benefit
1854 was observed with branch hints. It also increases the code size.
1855 As a result, icc never generates branch hints. */
1858 /* X86_TUNE_DOUBLE_WITH_ADD */
1861 /* X86_TUNE_USE_SAHF */
1862 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1863 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1865 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1866 partial dependencies. */
1867 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1868 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1870 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1871 register stalls on Generic32 compilation setting as well. However
1872 in current implementation the partial register stalls are not eliminated
1873 very well - they can be introduced via subregs synthesized by combine
1874 and can happen in caller/callee saving sequences. Because this option
1875 pays back little on PPro based chips and is in conflict with partial reg
1876 dependencies used by Athlon/P4 based chips, it is better to leave it off
1877 for generic32 for now. */
1880 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1881 m_CORE2I7 | m_GENERIC,
1883 /* X86_TUNE_USE_HIMODE_FIOP */
1884 m_386 | m_486 | m_K6_GEODE,
1886 /* X86_TUNE_USE_SIMODE_FIOP */
1887 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1889 /* X86_TUNE_USE_MOV0 */
1892 /* X86_TUNE_USE_CLTD */
1893 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1895 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1898 /* X86_TUNE_SPLIT_LONG_MOVES */
1901 /* X86_TUNE_READ_MODIFY_WRITE */
1904 /* X86_TUNE_READ_MODIFY */
1907 /* X86_TUNE_PROMOTE_QIMODE */
1908 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1909 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1911 /* X86_TUNE_FAST_PREFIX */
1912 ~(m_PENT | m_486 | m_386),
1914 /* X86_TUNE_SINGLE_STRINGOP */
1915 m_386 | m_PENT4 | m_NOCONA,
1917 /* X86_TUNE_QIMODE_MATH */
1920 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1921 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1922 might be considered for Generic32 if our scheme for avoiding partial
1923 stalls was more effective. */
1926 /* X86_TUNE_PROMOTE_QI_REGS */
1929 /* X86_TUNE_PROMOTE_HI_REGS */
1932 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1933 over esp addition. */
1934 m_386 | m_486 | m_PENT | m_PPRO,
1936 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1937 over esp addition. */
1940 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1941 over esp subtraction. */
1942 m_386 | m_486 | m_PENT | m_K6_GEODE,
1944 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1945 over esp subtraction. */
1946 m_PENT | m_K6_GEODE,
1948 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1949 for DFmode copies */
1950 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1951 | m_GENERIC | m_GEODE),
1953 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1954 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1956 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1957 conflict here in between PPro/Pentium4 based chips that thread 128bit
1958 SSE registers as single units versus K8 based chips that divide SSE
1959 registers to two 64bit halves. This knob promotes all store destinations
1960 to be 128bit to allow register renaming on 128bit SSE units, but usually
1961 results in one extra microop on 64bit SSE units. Experimental results
1962 shows that disabling this option on P4 brings over 20% SPECfp regression,
1963 while enabling it on K8 brings roughly 2.4% regression that can be partly
1964 masked by careful scheduling of moves. */
1965 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1966 | m_AMDFAM10 | m_BDVER1,
1968 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1969 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1971 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1972 m_BDVER1 | m_COREI7,
1974 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1977 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1978 are resolved on SSE register parts instead of whole registers, so we may
1979 maintain just lower part of scalar values in proper format leaving the
1980 upper part undefined. */
1983 /* X86_TUNE_SSE_TYPELESS_STORES */
1986 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1987 m_PPRO | m_PENT4 | m_NOCONA,
1989 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1990 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1992 /* X86_TUNE_PROLOGUE_USING_MOVE */
1993 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1995 /* X86_TUNE_EPILOGUE_USING_MOVE */
1996 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1998 /* X86_TUNE_SHIFT1 */
2001 /* X86_TUNE_USE_FFREEP */
2004 /* X86_TUNE_INTER_UNIT_MOVES */
2005 ~(m_AMD_MULTIPLE | m_GENERIC),
2007 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2008 ~(m_AMDFAM10 | m_BDVER1),
2010 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2011 than 4 branch instructions in the 16 byte window. */
2012 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2015 /* X86_TUNE_SCHEDULE */
2016 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2019 /* X86_TUNE_USE_BT */
2020 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2022 /* X86_TUNE_USE_INCDEC */
2023 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2025 /* X86_TUNE_PAD_RETURNS */
2026 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2028 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2031 /* X86_TUNE_EXT_80387_CONSTANTS */
2032 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2033 | m_CORE2I7 | m_GENERIC,
2035 /* X86_TUNE_SHORTEN_X87_SSE */
2038 /* X86_TUNE_AVOID_VECTOR_DECODE */
2039 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2041 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2042 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2045 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2046 vector path on AMD machines. */
2047 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2049 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2051 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2053 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2057 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2058 but one byte longer. */
2061 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2062 operand that cannot be represented using a modRM byte. The XOR
2063 replacement is long decoded, so this split helps here as well. */
2066 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2068 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2070 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2071 from integer to FP. */
2074 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2075 with a subsequent conditional jump instruction into a single
2076 compare-and-branch uop. */
2079 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2080 will impact LEA instruction selection. */
2083 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2087 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2088 at -O3. For the moment, the prefetching seems badly tuned for Intel
2090 m_K6_GEODE | m_AMD_MULTIPLE
2093 /* Feature tests against the various architecture variations. */
2094 unsigned char ix86_arch_features[X86_ARCH_LAST];
2096 /* Feature tests against the various architecture variations, used to create
2097 ix86_arch_features based on the processor mask. */
2098 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2099 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2100 ~(m_386 | m_486 | m_PENT | m_K6),
2102 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2105 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2108 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2111 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2115 static const unsigned int x86_accumulate_outgoing_args
2116 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2119 static const unsigned int x86_arch_always_fancy_math_387
2120 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2121 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2123 static enum stringop_alg stringop_alg = no_stringop;
2125 /* In case the average insn count for single function invocation is
2126 lower than this constant, emit fast (but longer) prologue and
2128 #define FAST_PROLOGUE_INSN_COUNT 20
2130 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2131 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2132 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2133 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2135 /* Array of the smallest class containing reg number REGNO, indexed by
2136 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2138 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2140 /* ax, dx, cx, bx */
2141 AREG, DREG, CREG, BREG,
2142 /* si, di, bp, sp */
2143 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2145 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2146 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2149 /* flags, fpsr, fpcr, frame */
2150 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2158 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2159 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2160 /* SSE REX registers */
2161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2165 /* The "default" register map used in 32bit mode. */
2167 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2169 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2170 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2171 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2172 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2173 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2174 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2175 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2178 /* The "default" register map used in 64bit mode. */
2180 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2182 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2183 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2184 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2185 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2186 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2187 8,9,10,11,12,13,14,15, /* extended integer registers */
2188 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2191 /* Define the register numbers to be used in Dwarf debugging information.
2192 The SVR4 reference port C compiler uses the following register numbers
2193 in its Dwarf output code:
2194 0 for %eax (gcc regno = 0)
2195 1 for %ecx (gcc regno = 2)
2196 2 for %edx (gcc regno = 1)
2197 3 for %ebx (gcc regno = 3)
2198 4 for %esp (gcc regno = 7)
2199 5 for %ebp (gcc regno = 6)
2200 6 for %esi (gcc regno = 4)
2201 7 for %edi (gcc regno = 5)
2202 The following three DWARF register numbers are never generated by
2203 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2204 believes these numbers have these meanings.
2205 8 for %eip (no gcc equivalent)
2206 9 for %eflags (gcc regno = 17)
2207 10 for %trapno (no gcc equivalent)
2208 It is not at all clear how we should number the FP stack registers
2209 for the x86 architecture. If the version of SDB on x86/svr4 were
2210 a bit less brain dead with respect to floating-point then we would
2211 have a precedent to follow with respect to DWARF register numbers
2212 for x86 FP registers, but the SDB on x86/svr4 is so completely
2213 broken with respect to FP registers that it is hardly worth thinking
2214 of it as something to strive for compatibility with.
2215 The version of x86/svr4 SDB I have at the moment does (partially)
2216 seem to believe that DWARF register number 11 is associated with
2217 the x86 register %st(0), but that's about all. Higher DWARF
2218 register numbers don't seem to be associated with anything in
2219 particular, and even for DWARF regno 11, SDB only seems to under-
2220 stand that it should say that a variable lives in %st(0) (when
2221 asked via an `=' command) if we said it was in DWARF regno 11,
2222 but SDB still prints garbage when asked for the value of the
2223 variable in question (via a `/' command).
2224 (Also note that the labels SDB prints for various FP stack regs
2225 when doing an `x' command are all wrong.)
2226 Note that these problems generally don't affect the native SVR4
2227 C compiler because it doesn't allow the use of -O with -g and
2228 because when it is *not* optimizing, it allocates a memory
2229 location for each floating-point variable, and the memory
2230 location is what gets described in the DWARF AT_location
2231 attribute for the variable in question.
2232 Regardless of the severe mental illness of the x86/svr4 SDB, we
2233 do something sensible here and we use the following DWARF
2234 register numbers. Note that these are all stack-top-relative
2236 11 for %st(0) (gcc regno = 8)
2237 12 for %st(1) (gcc regno = 9)
2238 13 for %st(2) (gcc regno = 10)
2239 14 for %st(3) (gcc regno = 11)
2240 15 for %st(4) (gcc regno = 12)
2241 16 for %st(5) (gcc regno = 13)
2242 17 for %st(6) (gcc regno = 14)
2243 18 for %st(7) (gcc regno = 15)
2245 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2247 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2248 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2249 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2250 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2251 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2256 /* Define parameter passing and return registers. */
2258 static int const x86_64_int_parameter_registers[6] =
2260 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2263 static int const x86_64_ms_abi_int_parameter_registers[4] =
2265 CX_REG, DX_REG, R8_REG, R9_REG
2268 static int const x86_64_int_return_registers[4] =
2270 AX_REG, DX_REG, DI_REG, SI_REG
2273 /* Define the structure for the machine field in struct function. */
2275 struct GTY(()) stack_local_entry {
2276 unsigned short mode;
2279 struct stack_local_entry *next;
2282 /* Structure describing stack frame layout.
2283 Stack grows downward:
2289 saved static chain if ix86_static_chain_on_stack
2291 saved frame pointer if frame_pointer_needed
2292 <- HARD_FRAME_POINTER
2298 <- sse_regs_save_offset
2301 [va_arg registers] |
2305 [padding2] | = to_allocate
2314 int outgoing_arguments_size;
2315 HOST_WIDE_INT frame;
2317 /* The offsets relative to ARG_POINTER. */
2318 HOST_WIDE_INT frame_pointer_offset;
2319 HOST_WIDE_INT hard_frame_pointer_offset;
2320 HOST_WIDE_INT stack_pointer_offset;
2321 HOST_WIDE_INT hfp_save_offset;
2322 HOST_WIDE_INT reg_save_offset;
2323 HOST_WIDE_INT sse_reg_save_offset;
2325 /* When save_regs_using_mov is set, emit prologue using
2326 move instead of push instructions. */
2327 bool save_regs_using_mov;
2330 /* Code model option. */
2331 enum cmodel ix86_cmodel;
2333 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2335 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2337 /* Which unit we are generating floating point math for. */
2338 enum fpmath_unit ix86_fpmath;
2340 /* Which cpu are we scheduling for. */
2341 enum attr_cpu ix86_schedule;
2343 /* Which cpu are we optimizing for. */
2344 enum processor_type ix86_tune;
2346 /* Which instruction set architecture to use. */
2347 enum processor_type ix86_arch;
2349 /* true if sse prefetch instruction is not NOOP. */
2350 int x86_prefetch_sse;
2352 /* ix86_regparm_string as a number */
2353 static int ix86_regparm;
2355 /* -mstackrealign option */
2356 static const char ix86_force_align_arg_pointer_string[]
2357 = "force_align_arg_pointer";
2359 static rtx (*ix86_gen_leave) (void);
2360 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2361 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2362 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2363 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2364 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2365 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2367 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* The abi used by target. */
2384 enum calling_abi ix86_abi;
2386 /* Values 1-5: see jump.c */
2387 int ix86_branch_cost;
2389 /* Calling abi specific va_list type nodes. */
2390 static GTY(()) tree sysv_va_list_type_node;
2391 static GTY(()) tree ms_va_list_type_node;
2393 /* Variables which are this size or smaller are put in the data/bss
2394 or ldata/lbss sections. */
2396 int ix86_section_threshold = 65536;
2398 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2399 char internal_label_prefix[16];
2400 int internal_label_prefix_len;
2402 /* Fence to use after loop using movnt. */
2405 /* Register class used for passing given 64bit part of the argument.
2406 These represent classes as documented by the PS ABI, with the exception
2407 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2408 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2410 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2411 whenever possible (upper half does contain padding). */
2412 enum x86_64_reg_class
2415 X86_64_INTEGER_CLASS,
2416 X86_64_INTEGERSI_CLASS,
2423 X86_64_COMPLEX_X87_CLASS,
2427 #define MAX_CLASSES 4
2429 /* Table of constants used by fldpi, fldln2, etc.... */
2430 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2431 static bool ext_80387_constants_init = 0;
2434 static struct machine_function * ix86_init_machine_status (void);
2435 static rtx ix86_function_value (const_tree, const_tree, bool);
2436 static bool ix86_function_value_regno_p (const unsigned int);
2437 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2439 static rtx ix86_static_chain (const_tree, bool);
2440 static int ix86_function_regparm (const_tree, const_tree);
2441 static void ix86_compute_frame_layout (struct ix86_frame *);
2442 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2444 static void ix86_add_new_builtins (int);
2445 static rtx ix86_expand_vec_perm_builtin (tree);
2446 static tree ix86_canonical_va_list_type (tree);
2447 static void predict_jump (int);
2448 static unsigned int split_stack_prologue_scratch_regno (void);
2449 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2451 enum ix86_function_specific_strings
2453 IX86_FUNCTION_SPECIFIC_ARCH,
2454 IX86_FUNCTION_SPECIFIC_TUNE,
2455 IX86_FUNCTION_SPECIFIC_FPMATH,
2456 IX86_FUNCTION_SPECIFIC_MAX
2459 static char *ix86_target_string (int, int, const char *, const char *,
2460 const char *, bool);
2461 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2462 static void ix86_function_specific_save (struct cl_target_option *);
2463 static void ix86_function_specific_restore (struct cl_target_option *);
2464 static void ix86_function_specific_print (FILE *, int,
2465 struct cl_target_option *);
2466 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2467 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2468 static bool ix86_can_inline_p (tree, tree);
2469 static void ix86_set_current_function (tree);
2470 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2472 static enum calling_abi ix86_function_abi (const_tree);
2475 #ifndef SUBTARGET32_DEFAULT_CPU
2476 #define SUBTARGET32_DEFAULT_CPU "i386"
2479 /* The svr4 ABI for the i386 says that records and unions are returned
2481 #ifndef DEFAULT_PCC_STRUCT_RETURN
2482 #define DEFAULT_PCC_STRUCT_RETURN 1
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Define a set of ISAs which are available when a given ISA is
2490 enabled. MMX and SSE ISAs are handled separately. */
2492 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2493 #define OPTION_MASK_ISA_3DNOW_SET \
2494 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2496 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2497 #define OPTION_MASK_ISA_SSE2_SET \
2498 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2499 #define OPTION_MASK_ISA_SSE3_SET \
2500 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2501 #define OPTION_MASK_ISA_SSSE3_SET \
2502 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2503 #define OPTION_MASK_ISA_SSE4_1_SET \
2504 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2505 #define OPTION_MASK_ISA_SSE4_2_SET \
2506 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2507 #define OPTION_MASK_ISA_AVX_SET \
2508 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2509 #define OPTION_MASK_ISA_FMA_SET \
2510 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2512 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2514 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2516 #define OPTION_MASK_ISA_SSE4A_SET \
2517 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2518 #define OPTION_MASK_ISA_FMA4_SET \
2519 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2520 | OPTION_MASK_ISA_AVX_SET)
2521 #define OPTION_MASK_ISA_XOP_SET \
2522 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2523 #define OPTION_MASK_ISA_LWP_SET \
2526 /* AES and PCLMUL need SSE2 because they use xmm registers */
2527 #define OPTION_MASK_ISA_AES_SET \
2528 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2529 #define OPTION_MASK_ISA_PCLMUL_SET \
2530 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2532 #define OPTION_MASK_ISA_ABM_SET \
2533 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2535 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2536 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2537 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2538 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2539 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2540 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2541 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2543 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2544 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2545 #define OPTION_MASK_ISA_F16C_SET \
2546 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2548 /* Define a set of ISAs which aren't available when a given ISA is
2549 disabled. MMX and SSE ISAs are handled separately. */
2551 #define OPTION_MASK_ISA_MMX_UNSET \
2552 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2553 #define OPTION_MASK_ISA_3DNOW_UNSET \
2554 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2555 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2557 #define OPTION_MASK_ISA_SSE_UNSET \
2558 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2559 #define OPTION_MASK_ISA_SSE2_UNSET \
2560 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2561 #define OPTION_MASK_ISA_SSE3_UNSET \
2562 (OPTION_MASK_ISA_SSE3 \
2563 | OPTION_MASK_ISA_SSSE3_UNSET \
2564 | OPTION_MASK_ISA_SSE4A_UNSET )
2565 #define OPTION_MASK_ISA_SSSE3_UNSET \
2566 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2567 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2568 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2569 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2570 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2571 #define OPTION_MASK_ISA_AVX_UNSET \
2572 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2573 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2574 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2576 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2578 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2580 #define OPTION_MASK_ISA_SSE4A_UNSET \
2581 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2583 #define OPTION_MASK_ISA_FMA4_UNSET \
2584 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2585 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2586 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2588 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2589 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2590 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2591 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2592 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2593 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2594 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2595 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2596 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2597 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2599 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2600 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2601 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2603 /* Vectorization library interface and handlers. */
2604 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2606 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2607 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2609 /* Processor target table, indexed by processor number */
2612 const struct processor_costs *cost; /* Processor costs */
2613 const int align_loop; /* Default alignments. */
2614 const int align_loop_max_skip;
2615 const int align_jump;
2616 const int align_jump_max_skip;
2617 const int align_func;
2620 static const struct ptt processor_target_table[PROCESSOR_max] =
2622 {&i386_cost, 4, 3, 4, 3, 4},
2623 {&i486_cost, 16, 15, 16, 15, 16},
2624 {&pentium_cost, 16, 7, 16, 7, 16},
2625 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2626 {&geode_cost, 0, 0, 0, 0, 0},
2627 {&k6_cost, 32, 7, 32, 7, 32},
2628 {&athlon_cost, 16, 7, 16, 7, 16},
2629 {&pentium4_cost, 0, 0, 0, 0, 0},
2630 {&k8_cost, 16, 7, 16, 7, 16},
2631 {&nocona_cost, 0, 0, 0, 0, 0},
2632 /* Core 2 32-bit. */
2633 {&generic32_cost, 16, 10, 16, 10, 16},
2634 /* Core 2 64-bit. */
2635 {&generic64_cost, 16, 10, 16, 10, 16},
2636 /* Core i7 32-bit. */
2637 {&generic32_cost, 16, 10, 16, 10, 16},
2638 /* Core i7 64-bit. */
2639 {&generic64_cost, 16, 10, 16, 10, 16},
2640 {&generic32_cost, 16, 7, 16, 7, 16},
2641 {&generic64_cost, 16, 10, 16, 10, 16},
2642 {&amdfam10_cost, 32, 24, 32, 7, 32},
2643 {&bdver1_cost, 32, 24, 32, 7, 32},
2644 {&btver1_cost, 32, 24, 32, 7, 32},
2645 {&atom_cost, 16, 7, 16, 7, 16}
2648 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2677 /* Return true if a red-zone is in use. */
2680 ix86_using_red_zone (void)
2682 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2685 /* Implement TARGET_HANDLE_OPTION. */
2688 ix86_handle_option (struct gcc_options *opts,
2689 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
2690 const struct cl_decoded_option *decoded,
2691 location_t loc ATTRIBUTE_UNUSED)
2693 size_t code = decoded->opt_index;
2694 int value = decoded->value;
2701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2702 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2706 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2707 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2714 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2715 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2719 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2720 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2731 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2735 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2736 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2744 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2748 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2749 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2756 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2757 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2761 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2762 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2770 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2774 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2775 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2783 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2787 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2788 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2796 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2800 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2801 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2808 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2809 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2813 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2814 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2822 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2826 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2827 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2832 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2833 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2837 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2838 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2844 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2845 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2849 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2850 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2857 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2858 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2862 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2863 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2871 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2875 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2876 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2883 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2884 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2888 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2889 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2896 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2897 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2901 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2902 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2910 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2914 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2915 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2922 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2923 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2927 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2928 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2935 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2936 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2940 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2941 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2948 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2949 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2953 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2954 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2961 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2962 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2966 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2967 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2974 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2975 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2979 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2980 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2987 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2988 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2992 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2993 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
3000 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
3001 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
3005 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
3006 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
3013 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
3014 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
3018 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
3019 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3026 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3027 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3031 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3032 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3039 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3040 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3044 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3045 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3052 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3053 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3057 opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3058 opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3067 /* Return a string that documents the current -m options. The caller is
3068 responsible for freeing the string. */
3071 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3072 const char *fpmath, bool add_nl_p)
3074 struct ix86_target_opts
3076 const char *option; /* option string */
3077 int mask; /* isa mask options */
3080 /* This table is ordered so that options like -msse4.2 that imply
3081 preceding options while match those first. */
3082 static struct ix86_target_opts isa_opts[] =
3084 { "-m64", OPTION_MASK_ISA_64BIT },
3085 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3086 { "-mfma", OPTION_MASK_ISA_FMA },
3087 { "-mxop", OPTION_MASK_ISA_XOP },
3088 { "-mlwp", OPTION_MASK_ISA_LWP },
3089 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3090 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3091 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3092 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3093 { "-msse3", OPTION_MASK_ISA_SSE3 },
3094 { "-msse2", OPTION_MASK_ISA_SSE2 },
3095 { "-msse", OPTION_MASK_ISA_SSE },
3096 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3097 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3098 { "-mmmx", OPTION_MASK_ISA_MMX },
3099 { "-mabm", OPTION_MASK_ISA_ABM },
3100 { "-mbmi", OPTION_MASK_ISA_BMI },
3101 { "-mtbm", OPTION_MASK_ISA_TBM },
3102 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3103 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3104 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3105 { "-maes", OPTION_MASK_ISA_AES },
3106 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3107 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3108 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3109 { "-mf16c", OPTION_MASK_ISA_F16C },
3113 static struct ix86_target_opts flag_opts[] =
3115 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3116 { "-m80387", MASK_80387 },
3117 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3118 { "-malign-double", MASK_ALIGN_DOUBLE },
3119 { "-mcld", MASK_CLD },
3120 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3121 { "-mieee-fp", MASK_IEEE_FP },
3122 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3123 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3124 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3125 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3126 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3127 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3128 { "-mno-red-zone", MASK_NO_RED_ZONE },
3129 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3130 { "-mrecip", MASK_RECIP },
3131 { "-mrtd", MASK_RTD },
3132 { "-msseregparm", MASK_SSEREGPARM },
3133 { "-mstack-arg-probe", MASK_STACK_PROBE },
3134 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3135 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3136 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3137 { "-mvzeroupper", MASK_VZEROUPPER },
3138 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3139 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3142 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3145 char target_other[40];
3154 memset (opts, '\0', sizeof (opts));
3156 /* Add -march= option. */
3159 opts[num][0] = "-march=";
3160 opts[num++][1] = arch;
3163 /* Add -mtune= option. */
3166 opts[num][0] = "-mtune=";
3167 opts[num++][1] = tune;
3170 /* Pick out the options in isa options. */
3171 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3173 if ((isa & isa_opts[i].mask) != 0)
3175 opts[num++][0] = isa_opts[i].option;
3176 isa &= ~ isa_opts[i].mask;
3180 if (isa && add_nl_p)
3182 opts[num++][0] = isa_other;
3183 sprintf (isa_other, "(other isa: %#x)", isa);
3186 /* Add flag options. */
3187 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3189 if ((flags & flag_opts[i].mask) != 0)
3191 opts[num++][0] = flag_opts[i].option;
3192 flags &= ~ flag_opts[i].mask;
3196 if (flags && add_nl_p)
3198 opts[num++][0] = target_other;
3199 sprintf (target_other, "(other flags: %#x)", flags);
3202 /* Add -fpmath= option. */
3205 opts[num][0] = "-mfpmath=";
3206 opts[num++][1] = fpmath;
3213 gcc_assert (num < ARRAY_SIZE (opts));
3215 /* Size the string. */
3217 sep_len = (add_nl_p) ? 3 : 1;
3218 for (i = 0; i < num; i++)
3221 for (j = 0; j < 2; j++)
3223 len += strlen (opts[i][j]);
3226 /* Build the string. */
3227 ret = ptr = (char *) xmalloc (len);
3230 for (i = 0; i < num; i++)
3234 for (j = 0; j < 2; j++)
3235 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3242 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3250 for (j = 0; j < 2; j++)
3253 memcpy (ptr, opts[i][j], len2[j]);
3255 line_len += len2[j];
3260 gcc_assert (ret + len >= ptr);
3265 /* Return true, if profiling code should be emitted before
3266 prologue. Otherwise it returns false.
3267 Note: For x86 with "hotfix" it is sorried. */
3269 ix86_profile_before_prologue (void)
3271 return flag_fentry != 0;
3274 /* Function that is callable from the debugger to print the current
3277 ix86_debug_options (void)
3279 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3280 ix86_arch_string, ix86_tune_string,
3281 ix86_fpmath_string, true);
3285 fprintf (stderr, "%s\n\n", opts);
3289 fputs ("<no options>\n\n", stderr);
3294 /* Override various settings based on options. If MAIN_ARGS_P, the
3295 options are from the command line, otherwise they are from
3299 ix86_option_override_internal (bool main_args_p)
3302 unsigned int ix86_arch_mask, ix86_tune_mask;
3303 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3308 /* Comes from final.c -- no real reason to change it. */
3309 #define MAX_CODE_ALIGN 16
3317 PTA_PREFETCH_SSE = 1 << 4,
3319 PTA_3DNOW_A = 1 << 6,
3323 PTA_POPCNT = 1 << 10,
3325 PTA_SSE4A = 1 << 12,
3326 PTA_NO_SAHF = 1 << 13,
3327 PTA_SSE4_1 = 1 << 14,
3328 PTA_SSE4_2 = 1 << 15,
3330 PTA_PCLMUL = 1 << 17,
3333 PTA_MOVBE = 1 << 20,
3337 PTA_FSGSBASE = 1 << 24,
3338 PTA_RDRND = 1 << 25,
3342 /* if this reaches 32, need to widen struct pta flags below */
3347 const char *const name; /* processor name or nickname. */
3348 const enum processor_type processor;
3349 const enum attr_cpu schedule;
3350 const unsigned /*enum pta_flags*/ flags;
3352 const processor_alias_table[] =
3354 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3355 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3356 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3357 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3358 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3359 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3360 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3361 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3362 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3363 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3364 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3365 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3366 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3368 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3370 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3371 PTA_MMX | PTA_SSE | PTA_SSE2},
3372 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3373 PTA_MMX |PTA_SSE | PTA_SSE2},
3374 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3375 PTA_MMX | PTA_SSE | PTA_SSE2},
3376 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3377 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3378 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3379 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3380 | PTA_CX16 | PTA_NO_SAHF},
3381 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3382 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3383 | PTA_SSSE3 | PTA_CX16},
3384 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3385 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3386 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3387 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3388 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3389 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3390 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3391 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3392 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3393 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3394 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3395 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3396 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3397 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3398 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3399 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3400 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3401 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3402 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3403 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3404 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3405 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3406 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3407 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3408 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3409 {"x86-64", PROCESSOR_K8, CPU_K8,
3410 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3411 {"k8", PROCESSOR_K8, CPU_K8,
3412 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3413 | PTA_SSE2 | PTA_NO_SAHF},
3414 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3415 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3416 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3417 {"opteron", PROCESSOR_K8, CPU_K8,
3418 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3419 | PTA_SSE2 | PTA_NO_SAHF},
3420 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3421 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3422 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3423 {"athlon64", PROCESSOR_K8, CPU_K8,
3424 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3425 | PTA_SSE2 | PTA_NO_SAHF},
3426 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3427 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3428 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3429 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3430 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3431 | PTA_SSE2 | PTA_NO_SAHF},
3432 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3433 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3434 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3435 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3436 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3437 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3438 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3439 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3440 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3441 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3442 | PTA_XOP | PTA_LWP},
3443 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3444 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3445 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3446 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3447 0 /* flags are only used for -march switch. */ },
3448 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3449 PTA_64BIT /* flags are only used for -march switch. */ },
3452 int const pta_size = ARRAY_SIZE (processor_alias_table);
3454 /* Set up prefix/suffix so the error messages refer to either the command
3455 line argument, or the attribute(target). */
3464 prefix = "option(\"";
3469 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3470 SUBTARGET_OVERRIDE_OPTIONS;
3473 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3474 SUBSUBTARGET_OVERRIDE_OPTIONS;
3477 /* -fPIC is the default for x86_64. */
3478 if (TARGET_MACHO && TARGET_64BIT)
3481 /* Need to check -mtune=generic first. */
3482 if (ix86_tune_string)
3484 if (!strcmp (ix86_tune_string, "generic")
3485 || !strcmp (ix86_tune_string, "i686")
3486 /* As special support for cross compilers we read -mtune=native
3487 as -mtune=generic. With native compilers we won't see the
3488 -mtune=native, as it was changed by the driver. */
3489 || !strcmp (ix86_tune_string, "native"))
3492 ix86_tune_string = "generic64";
3494 ix86_tune_string = "generic32";
3496 /* If this call is for setting the option attribute, allow the
3497 generic32/generic64 that was previously set. */
3498 else if (!main_args_p
3499 && (!strcmp (ix86_tune_string, "generic32")
3500 || !strcmp (ix86_tune_string, "generic64")))
3502 else if (!strncmp (ix86_tune_string, "generic", 7))
3503 error ("bad value (%s) for %stune=%s %s",
3504 ix86_tune_string, prefix, suffix, sw);
3505 else if (!strcmp (ix86_tune_string, "x86-64"))
3506 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3507 "%stune=k8%s or %stune=generic%s instead as appropriate",
3508 prefix, suffix, prefix, suffix, prefix, suffix);
3512 if (ix86_arch_string)
3513 ix86_tune_string = ix86_arch_string;
3514 if (!ix86_tune_string)
3516 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3517 ix86_tune_defaulted = 1;
3520 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3521 need to use a sensible tune option. */
3522 if (!strcmp (ix86_tune_string, "generic")
3523 || !strcmp (ix86_tune_string, "x86-64")
3524 || !strcmp (ix86_tune_string, "i686"))
3527 ix86_tune_string = "generic64";
3529 ix86_tune_string = "generic32";
3533 if (ix86_stringop_string)
3535 if (!strcmp (ix86_stringop_string, "rep_byte"))
3536 stringop_alg = rep_prefix_1_byte;
3537 else if (!strcmp (ix86_stringop_string, "libcall"))
3538 stringop_alg = libcall;
3539 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3540 stringop_alg = rep_prefix_4_byte;
3541 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3543 /* rep; movq isn't available in 32-bit code. */
3544 stringop_alg = rep_prefix_8_byte;
3545 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3546 stringop_alg = loop_1_byte;
3547 else if (!strcmp (ix86_stringop_string, "loop"))
3548 stringop_alg = loop;
3549 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3550 stringop_alg = unrolled_loop;
3552 error ("bad value (%s) for %sstringop-strategy=%s %s",
3553 ix86_stringop_string, prefix, suffix, sw);
3556 if (!ix86_arch_string)
3557 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3559 ix86_arch_specified = 1;
3561 /* Validate -mabi= value. */
3562 if (ix86_abi_string)
3564 if (strcmp (ix86_abi_string, "sysv") == 0)
3565 ix86_abi = SYSV_ABI;
3566 else if (strcmp (ix86_abi_string, "ms") == 0)
3569 error ("unknown ABI (%s) for %sabi=%s %s",
3570 ix86_abi_string, prefix, suffix, sw);
3573 ix86_abi = DEFAULT_ABI;
3575 if (ix86_cmodel_string != 0)
3577 if (!strcmp (ix86_cmodel_string, "small"))
3578 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3579 else if (!strcmp (ix86_cmodel_string, "medium"))
3580 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3581 else if (!strcmp (ix86_cmodel_string, "large"))
3582 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3584 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3585 else if (!strcmp (ix86_cmodel_string, "32"))
3586 ix86_cmodel = CM_32;
3587 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3588 ix86_cmodel = CM_KERNEL;
3590 error ("bad value (%s) for %scmodel=%s %s",
3591 ix86_cmodel_string, prefix, suffix, sw);
3595 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3596 use of rip-relative addressing. This eliminates fixups that
3597 would otherwise be needed if this object is to be placed in a
3598 DLL, and is essentially just as efficient as direct addressing. */
3599 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3600 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3601 else if (TARGET_64BIT)
3602 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3604 ix86_cmodel = CM_32;
3606 if (ix86_asm_string != 0)
3609 && !strcmp (ix86_asm_string, "intel"))
3610 ix86_asm_dialect = ASM_INTEL;
3611 else if (!strcmp (ix86_asm_string, "att"))
3612 ix86_asm_dialect = ASM_ATT;
3614 error ("bad value (%s) for %sasm=%s %s",
3615 ix86_asm_string, prefix, suffix, sw);
3617 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3618 error ("code model %qs not supported in the %s bit mode",
3619 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3620 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3621 sorry ("%i-bit mode not compiled in",
3622 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3624 for (i = 0; i < pta_size; i++)
3625 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3627 ix86_schedule = processor_alias_table[i].schedule;
3628 ix86_arch = processor_alias_table[i].processor;
3629 /* Default cpu tuning to the architecture. */
3630 ix86_tune = ix86_arch;
3632 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3633 error ("CPU you selected does not support x86-64 "
3636 if (processor_alias_table[i].flags & PTA_MMX
3637 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3638 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3639 if (processor_alias_table[i].flags & PTA_3DNOW
3640 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3641 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3642 if (processor_alias_table[i].flags & PTA_3DNOW_A
3643 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3644 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3645 if (processor_alias_table[i].flags & PTA_SSE
3646 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3647 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3648 if (processor_alias_table[i].flags & PTA_SSE2
3649 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3650 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3651 if (processor_alias_table[i].flags & PTA_SSE3
3652 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3653 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3654 if (processor_alias_table[i].flags & PTA_SSSE3
3655 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3656 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3657 if (processor_alias_table[i].flags & PTA_SSE4_1
3658 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3659 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3660 if (processor_alias_table[i].flags & PTA_SSE4_2
3661 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3662 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3663 if (processor_alias_table[i].flags & PTA_AVX
3664 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3665 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3666 if (processor_alias_table[i].flags & PTA_FMA
3667 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3668 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3669 if (processor_alias_table[i].flags & PTA_SSE4A
3670 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3671 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3672 if (processor_alias_table[i].flags & PTA_FMA4
3673 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3674 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3675 if (processor_alias_table[i].flags & PTA_XOP
3676 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3677 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3678 if (processor_alias_table[i].flags & PTA_LWP
3679 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3680 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3681 if (processor_alias_table[i].flags & PTA_ABM
3682 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3683 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3684 if (processor_alias_table[i].flags & PTA_BMI
3685 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3686 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3687 if (processor_alias_table[i].flags & PTA_TBM
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3689 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3690 if (processor_alias_table[i].flags & PTA_CX16
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3692 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3693 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3694 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3695 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3696 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3697 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3698 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3699 if (processor_alias_table[i].flags & PTA_MOVBE
3700 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3701 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3702 if (processor_alias_table[i].flags & PTA_AES
3703 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3704 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3705 if (processor_alias_table[i].flags & PTA_PCLMUL
3706 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3707 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3708 if (processor_alias_table[i].flags & PTA_FSGSBASE
3709 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3710 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3711 if (processor_alias_table[i].flags & PTA_RDRND
3712 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3713 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3714 if (processor_alias_table[i].flags & PTA_F16C
3715 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3716 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3717 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3718 x86_prefetch_sse = true;
3723 if (!strcmp (ix86_arch_string, "generic"))
3724 error ("generic CPU can be used only for %stune=%s %s",
3725 prefix, suffix, sw);
3726 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3727 error ("bad value (%s) for %sarch=%s %s",
3728 ix86_arch_string, prefix, suffix, sw);
3730 ix86_arch_mask = 1u << ix86_arch;
3731 for (i = 0; i < X86_ARCH_LAST; ++i)
3732 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3734 for (i = 0; i < pta_size; i++)
3735 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3737 ix86_schedule = processor_alias_table[i].schedule;
3738 ix86_tune = processor_alias_table[i].processor;
3741 if (!(processor_alias_table[i].flags & PTA_64BIT))
3743 if (ix86_tune_defaulted)
3745 ix86_tune_string = "x86-64";
3746 for (i = 0; i < pta_size; i++)
3747 if (! strcmp (ix86_tune_string,
3748 processor_alias_table[i].name))
3750 ix86_schedule = processor_alias_table[i].schedule;
3751 ix86_tune = processor_alias_table[i].processor;
3754 error ("CPU you selected does not support x86-64 "
3760 /* Adjust tuning when compiling for 32-bit ABI. */
3763 case PROCESSOR_GENERIC64:
3764 ix86_tune = PROCESSOR_GENERIC32;
3765 ix86_schedule = CPU_PENTIUMPRO;
3768 case PROCESSOR_CORE2_64:
3769 ix86_tune = PROCESSOR_CORE2_32;
3772 case PROCESSOR_COREI7_64:
3773 ix86_tune = PROCESSOR_COREI7_32;
3780 /* Intel CPUs have always interpreted SSE prefetch instructions as
3781 NOPs; so, we can enable SSE prefetch instructions even when
3782 -mtune (rather than -march) points us to a processor that has them.
3783 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3784 higher processors. */
3786 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3787 x86_prefetch_sse = true;
3791 if (ix86_tune_specified && i == pta_size)
3792 error ("bad value (%s) for %stune=%s %s",
3793 ix86_tune_string, prefix, suffix, sw);
3795 ix86_tune_mask = 1u << ix86_tune;
3796 for (i = 0; i < X86_TUNE_LAST; ++i)
3797 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3799 #ifndef USE_IX86_FRAME_POINTER
3800 #define USE_IX86_FRAME_POINTER 0
3803 #ifndef USE_X86_64_FRAME_POINTER
3804 #define USE_X86_64_FRAME_POINTER 0
3807 /* Set the default values for switches whose default depends on TARGET_64BIT
3808 in case they weren't overwritten by command line options. */
3811 if (optimize > 1 && !global_options_set.x_flag_zee)
3813 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3814 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3815 if (flag_asynchronous_unwind_tables == 2)
3816 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3817 if (flag_pcc_struct_return == 2)
3818 flag_pcc_struct_return = 0;
3822 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3823 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3824 if (flag_asynchronous_unwind_tables == 2)
3825 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3826 if (flag_pcc_struct_return == 2)
3827 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3831 ix86_cost = &ix86_size_cost;
3833 ix86_cost = processor_target_table[ix86_tune].cost;
3835 /* Arrange to set up i386_stack_locals for all functions. */
3836 init_machine_status = ix86_init_machine_status;
3838 /* Validate -mregparm= value. */
3839 if (ix86_regparm_string)
3842 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3843 i = atoi (ix86_regparm_string);
3844 if (i < 0 || i > REGPARM_MAX)
3845 error ("%sregparm=%d%s is not between 0 and %d",
3846 prefix, i, suffix, REGPARM_MAX);
3851 ix86_regparm = REGPARM_MAX;
3853 /* If the user has provided any of the -malign-* options,
3854 warn and use that value only if -falign-* is not set.
3855 Remove this code in GCC 3.2 or later. */
3856 if (ix86_align_loops_string)
3858 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3859 prefix, suffix, suffix);
3860 if (align_loops == 0)
3862 i = atoi (ix86_align_loops_string);
3863 if (i < 0 || i > MAX_CODE_ALIGN)
3864 error ("%salign-loops=%d%s is not between 0 and %d",
3865 prefix, i, suffix, MAX_CODE_ALIGN);
3867 align_loops = 1 << i;
3871 if (ix86_align_jumps_string)
3873 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3874 prefix, suffix, suffix);
3875 if (align_jumps == 0)
3877 i = atoi (ix86_align_jumps_string);
3878 if (i < 0 || i > MAX_CODE_ALIGN)
3879 error ("%salign-loops=%d%s is not between 0 and %d",
3880 prefix, i, suffix, MAX_CODE_ALIGN);
3882 align_jumps = 1 << i;
3886 if (ix86_align_funcs_string)
3888 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3889 prefix, suffix, suffix);
3890 if (align_functions == 0)
3892 i = atoi (ix86_align_funcs_string);
3893 if (i < 0 || i > MAX_CODE_ALIGN)
3894 error ("%salign-loops=%d%s is not between 0 and %d",
3895 prefix, i, suffix, MAX_CODE_ALIGN);
3897 align_functions = 1 << i;
3901 /* Default align_* from the processor table. */
3902 if (align_loops == 0)
3904 align_loops = processor_target_table[ix86_tune].align_loop;
3905 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3907 if (align_jumps == 0)
3909 align_jumps = processor_target_table[ix86_tune].align_jump;
3910 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3912 if (align_functions == 0)
3914 align_functions = processor_target_table[ix86_tune].align_func;
3917 /* Validate -mbranch-cost= value, or provide default. */
3918 ix86_branch_cost = ix86_cost->branch_cost;
3919 if (ix86_branch_cost_string)
3921 i = atoi (ix86_branch_cost_string);
3923 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3925 ix86_branch_cost = i;
3927 if (ix86_section_threshold_string)
3929 i = atoi (ix86_section_threshold_string);
3931 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3933 ix86_section_threshold = i;
3936 if (ix86_tls_dialect_string)
3938 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3939 ix86_tls_dialect = TLS_DIALECT_GNU;
3940 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3941 ix86_tls_dialect = TLS_DIALECT_GNU2;
3943 error ("bad value (%s) for %stls-dialect=%s %s",
3944 ix86_tls_dialect_string, prefix, suffix, sw);
3947 if (ix87_precision_string)
3949 i = atoi (ix87_precision_string);
3950 if (i != 32 && i != 64 && i != 80)
3951 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3956 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3958 /* Enable by default the SSE and MMX builtins. Do allow the user to
3959 explicitly disable any of these. In particular, disabling SSE and
3960 MMX for kernel code is extremely useful. */
3961 if (!ix86_arch_specified)
3963 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3964 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3967 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3971 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3973 if (!ix86_arch_specified)
3975 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3977 /* i386 ABI does not specify red zone. It still makes sense to use it
3978 when programmer takes care to stack from being destroyed. */
3979 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3980 target_flags |= MASK_NO_RED_ZONE;
3983 /* Keep nonleaf frame pointers. */
3984 if (flag_omit_frame_pointer)
3985 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3986 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3987 flag_omit_frame_pointer = 1;
3989 /* If we're doing fast math, we don't care about comparison order
3990 wrt NaNs. This lets us use a shorter comparison sequence. */
3991 if (flag_finite_math_only)
3992 target_flags &= ~MASK_IEEE_FP;
3994 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3995 since the insns won't need emulation. */
3996 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3997 target_flags &= ~MASK_NO_FANCY_MATH_387;
3999 /* Likewise, if the target doesn't have a 387, or we've specified
4000 software floating point, don't use 387 inline intrinsics. */
4002 target_flags |= MASK_NO_FANCY_MATH_387;
4004 /* Turn on MMX builtins for -msse. */
4007 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
4008 x86_prefetch_sse = true;
4011 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
4012 if (TARGET_SSE4_2 || TARGET_ABM)
4013 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
4015 /* Validate -mpreferred-stack-boundary= value or default it to
4016 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4017 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4018 if (ix86_preferred_stack_boundary_string)
4020 int min = (TARGET_64BIT ? 4 : 2);
4021 int max = (TARGET_SEH ? 4 : 12);
4023 i = atoi (ix86_preferred_stack_boundary_string);
4024 if (i < min || i > max)
4027 error ("%spreferred-stack-boundary%s is not supported "
4028 "for this target", prefix, suffix);
4030 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
4031 prefix, i, suffix, min, max);
4034 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
4037 /* Set the default value for -mstackrealign. */
4038 if (ix86_force_align_arg_pointer == -1)
4039 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4041 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4043 /* Validate -mincoming-stack-boundary= value or default it to
4044 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4045 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4046 if (ix86_incoming_stack_boundary_string)
4048 i = atoi (ix86_incoming_stack_boundary_string);
4049 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
4050 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4051 i, TARGET_64BIT ? 4 : 2);
4054 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
4055 ix86_incoming_stack_boundary
4056 = ix86_user_incoming_stack_boundary;
4060 /* Accept -msseregparm only if at least SSE support is enabled. */
4061 if (TARGET_SSEREGPARM
4063 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4065 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4066 if (ix86_fpmath_string != 0)
4068 if (! strcmp (ix86_fpmath_string, "387"))
4069 ix86_fpmath = FPMATH_387;
4070 else if (! strcmp (ix86_fpmath_string, "sse"))
4074 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4075 ix86_fpmath = FPMATH_387;
4078 ix86_fpmath = FPMATH_SSE;
4080 else if (! strcmp (ix86_fpmath_string, "387,sse")
4081 || ! strcmp (ix86_fpmath_string, "387+sse")
4082 || ! strcmp (ix86_fpmath_string, "sse,387")
4083 || ! strcmp (ix86_fpmath_string, "sse+387")
4084 || ! strcmp (ix86_fpmath_string, "both"))
4088 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4089 ix86_fpmath = FPMATH_387;
4091 else if (!TARGET_80387)
4093 warning (0, "387 instruction set disabled, using SSE arithmetics");
4094 ix86_fpmath = FPMATH_SSE;
4097 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4100 error ("bad value (%s) for %sfpmath=%s %s",
4101 ix86_fpmath_string, prefix, suffix, sw);
4104 /* If the i387 is disabled, then do not return values in it. */
4106 target_flags &= ~MASK_FLOAT_RETURNS;
4108 /* Use external vectorized library in vectorizing intrinsics. */
4109 if (ix86_veclibabi_string)
4111 if (strcmp (ix86_veclibabi_string, "svml") == 0)
4112 ix86_veclib_handler = ix86_veclibabi_svml;
4113 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
4114 ix86_veclib_handler = ix86_veclibabi_acml;
4116 error ("unknown vectorization library ABI type (%s) for "
4117 "%sveclibabi=%s %s", ix86_veclibabi_string,
4118 prefix, suffix, sw);
4121 if ((!USE_IX86_FRAME_POINTER
4122 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4123 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4125 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4127 /* ??? Unwind info is not correct around the CFG unless either a frame
4128 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4129 unwind info generation to be aware of the CFG and propagating states
4131 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4132 || flag_exceptions || flag_non_call_exceptions)
4133 && flag_omit_frame_pointer
4134 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4136 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4137 warning (0, "unwind tables currently require either a frame pointer "
4138 "or %saccumulate-outgoing-args%s for correctness",
4140 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4143 /* If stack probes are required, the space used for large function
4144 arguments on the stack must also be probed, so enable
4145 -maccumulate-outgoing-args so this happens in the prologue. */
4146 if (TARGET_STACK_PROBE
4147 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4149 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4150 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4151 "for correctness", prefix, suffix);
4152 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4155 /* For sane SSE instruction set generation we need fcomi instruction.
4156 It is safe to enable all CMOVE instructions. */
4160 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4163 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4164 p = strchr (internal_label_prefix, 'X');
4165 internal_label_prefix_len = p - internal_label_prefix;
4169 /* When scheduling description is not available, disable scheduler pass
4170 so it won't slow down the compilation and make x87 code slower. */
4171 if (!TARGET_SCHEDULE)
4172 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4174 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4175 ix86_cost->simultaneous_prefetches,
4176 global_options.x_param_values,
4177 global_options_set.x_param_values);
4178 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4179 global_options.x_param_values,
4180 global_options_set.x_param_values);
4181 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4182 global_options.x_param_values,
4183 global_options_set.x_param_values);
4184 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4185 global_options.x_param_values,
4186 global_options_set.x_param_values);
4188 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4189 if (flag_prefetch_loop_arrays < 0
4192 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4193 flag_prefetch_loop_arrays = 1;
4195 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4196 can be optimized to ap = __builtin_next_arg (0). */
4197 if (!TARGET_64BIT && !flag_split_stack)
4198 targetm.expand_builtin_va_start = NULL;
4202 ix86_gen_leave = gen_leave_rex64;
4203 ix86_gen_add3 = gen_adddi3;
4204 ix86_gen_sub3 = gen_subdi3;
4205 ix86_gen_sub3_carry = gen_subdi3_carry;
4206 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4207 ix86_gen_monitor = gen_sse3_monitor64;
4208 ix86_gen_andsp = gen_anddi3;
4209 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4210 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4211 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4215 ix86_gen_leave = gen_leave;
4216 ix86_gen_add3 = gen_addsi3;
4217 ix86_gen_sub3 = gen_subsi3;
4218 ix86_gen_sub3_carry = gen_subsi3_carry;
4219 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4220 ix86_gen_monitor = gen_sse3_monitor;
4221 ix86_gen_andsp = gen_andsi3;
4222 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4223 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4224 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4228 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4230 target_flags |= MASK_CLD & ~target_flags_explicit;
4233 if (!TARGET_64BIT && flag_pic)
4235 if (flag_fentry > 0)
4236 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4240 else if (TARGET_SEH)
4242 if (flag_fentry == 0)
4243 sorry ("-mno-fentry isn%'t compatible with SEH");
4246 else if (flag_fentry < 0)
4248 #if defined(PROFILE_BEFORE_PROLOGUE)
4255 /* Save the initial options in case the user does function specific options */
4257 target_option_default_node = target_option_current_node
4258 = build_target_option_node ();
4262 /* When not optimize for size, enable vzeroupper optimization for
4263 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4264 AVX unaligned load/store. */
4267 if (flag_expensive_optimizations
4268 && !(target_flags_explicit & MASK_VZEROUPPER))
4269 target_flags |= MASK_VZEROUPPER;
4270 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4271 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4272 if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4273 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4278 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4279 target_flags &= ~MASK_VZEROUPPER;
4283 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4286 function_pass_avx256_p (const_rtx val)
4291 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4294 if (GET_CODE (val) == PARALLEL)
4299 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4301 r = XVECEXP (val, 0, i);
4302 if (GET_CODE (r) == EXPR_LIST
4304 && REG_P (XEXP (r, 0))
4305 && (GET_MODE (XEXP (r, 0)) == OImode
4306 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4314 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4317 ix86_option_override (void)
4319 ix86_option_override_internal (true);
4322 /* Update register usage after having seen the compiler flags. */
4325 ix86_conditional_register_usage (void)
4330 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4332 if (fixed_regs[i] > 1)
4333 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4334 if (call_used_regs[i] > 1)
4335 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4338 /* The PIC register, if it exists, is fixed. */
4339 j = PIC_OFFSET_TABLE_REGNUM;
4340 if (j != INVALID_REGNUM)
4341 fixed_regs[j] = call_used_regs[j] = 1;
4343 /* The 64-bit MS_ABI changes the set of call-used registers. */
4344 if (TARGET_64BIT_MS_ABI)
4346 call_used_regs[SI_REG] = 0;
4347 call_used_regs[DI_REG] = 0;
4348 call_used_regs[XMM6_REG] = 0;
4349 call_used_regs[XMM7_REG] = 0;
4350 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4351 call_used_regs[i] = 0;
4354 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4355 other call-clobbered regs for 64-bit. */
4358 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4360 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4361 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4362 && call_used_regs[i])
4363 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4366 /* If MMX is disabled, squash the registers. */
4368 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4369 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4370 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4372 /* If SSE is disabled, squash the registers. */
4374 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4375 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4376 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4378 /* If the FPU is disabled, squash the registers. */
4379 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4380 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4381 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4382 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4384 /* If 32-bit, squash the 64-bit registers. */
4387 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4389 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4395 /* Save the current options */
4398 ix86_function_specific_save (struct cl_target_option *ptr)
4400 ptr->arch = ix86_arch;
4401 ptr->schedule = ix86_schedule;
4402 ptr->tune = ix86_tune;
4403 ptr->fpmath = ix86_fpmath;
4404 ptr->branch_cost = ix86_branch_cost;
4405 ptr->tune_defaulted = ix86_tune_defaulted;
4406 ptr->arch_specified = ix86_arch_specified;
4407 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4408 ptr->ix86_target_flags_explicit = target_flags_explicit;
4410 /* The fields are char but the variables are not; make sure the
4411 values fit in the fields. */
4412 gcc_assert (ptr->arch == ix86_arch);
4413 gcc_assert (ptr->schedule == ix86_schedule);
4414 gcc_assert (ptr->tune == ix86_tune);
4415 gcc_assert (ptr->fpmath == ix86_fpmath);
4416 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4419 /* Restore the current options */
4422 ix86_function_specific_restore (struct cl_target_option *ptr)
4424 enum processor_type old_tune = ix86_tune;
4425 enum processor_type old_arch = ix86_arch;
4426 unsigned int ix86_arch_mask, ix86_tune_mask;
4429 ix86_arch = (enum processor_type) ptr->arch;
4430 ix86_schedule = (enum attr_cpu) ptr->schedule;
4431 ix86_tune = (enum processor_type) ptr->tune;
4432 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4433 ix86_branch_cost = ptr->branch_cost;
4434 ix86_tune_defaulted = ptr->tune_defaulted;
4435 ix86_arch_specified = ptr->arch_specified;
4436 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4437 target_flags_explicit = ptr->ix86_target_flags_explicit;
4439 /* Recreate the arch feature tests if the arch changed */
4440 if (old_arch != ix86_arch)
4442 ix86_arch_mask = 1u << ix86_arch;
4443 for (i = 0; i < X86_ARCH_LAST; ++i)
4444 ix86_arch_features[i]
4445 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4448 /* Recreate the tune optimization tests */
4449 if (old_tune != ix86_tune)
4451 ix86_tune_mask = 1u << ix86_tune;
4452 for (i = 0; i < X86_TUNE_LAST; ++i)
4453 ix86_tune_features[i]
4454 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4458 /* Print the current options */
4461 ix86_function_specific_print (FILE *file, int indent,
4462 struct cl_target_option *ptr)
4465 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4466 NULL, NULL, NULL, false);
4468 fprintf (file, "%*sarch = %d (%s)\n",
4471 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4472 ? cpu_names[ptr->arch]
4475 fprintf (file, "%*stune = %d (%s)\n",
4478 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4479 ? cpu_names[ptr->tune]
4482 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4483 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4484 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4485 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4489 fprintf (file, "%*s%s\n", indent, "", target_string);
4490 free (target_string);
4495 /* Inner function to process the attribute((target(...))), take an argument and
4496 set the current options from the argument. If we have a list, recursively go
4500 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4505 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4506 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4507 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4508 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4523 enum ix86_opt_type type;
4528 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4529 IX86_ATTR_ISA ("abm", OPT_mabm),
4530 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4531 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4532 IX86_ATTR_ISA ("aes", OPT_maes),
4533 IX86_ATTR_ISA ("avx", OPT_mavx),
4534 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4535 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4536 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4537 IX86_ATTR_ISA ("sse", OPT_msse),
4538 IX86_ATTR_ISA ("sse2", OPT_msse2),
4539 IX86_ATTR_ISA ("sse3", OPT_msse3),
4540 IX86_ATTR_ISA ("sse4", OPT_msse4),
4541 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4542 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4543 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4544 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4545 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4546 IX86_ATTR_ISA ("xop", OPT_mxop),
4547 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4548 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4549 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4550 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4552 /* string options */
4553 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4554 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4555 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4558 IX86_ATTR_YES ("cld",
4562 IX86_ATTR_NO ("fancy-math-387",
4563 OPT_mfancy_math_387,
4564 MASK_NO_FANCY_MATH_387),
4566 IX86_ATTR_YES ("ieee-fp",
4570 IX86_ATTR_YES ("inline-all-stringops",
4571 OPT_minline_all_stringops,
4572 MASK_INLINE_ALL_STRINGOPS),
4574 IX86_ATTR_YES ("inline-stringops-dynamically",
4575 OPT_minline_stringops_dynamically,
4576 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4578 IX86_ATTR_NO ("align-stringops",
4579 OPT_mno_align_stringops,
4580 MASK_NO_ALIGN_STRINGOPS),
4582 IX86_ATTR_YES ("recip",
4588 /* If this is a list, recurse to get the options. */
4589 if (TREE_CODE (args) == TREE_LIST)
4593 for (; args; args = TREE_CHAIN (args))
4594 if (TREE_VALUE (args)
4595 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4601 else if (TREE_CODE (args) != STRING_CST)
4604 /* Handle multiple arguments separated by commas. */
4605 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4607 while (next_optstr && *next_optstr != '\0')
4609 char *p = next_optstr;
4611 char *comma = strchr (next_optstr, ',');
4612 const char *opt_string;
4613 size_t len, opt_len;
4618 enum ix86_opt_type type = ix86_opt_unknown;
4624 len = comma - next_optstr;
4625 next_optstr = comma + 1;
4633 /* Recognize no-xxx. */
4634 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4643 /* Find the option. */
4646 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4648 type = attrs[i].type;
4649 opt_len = attrs[i].len;
4650 if (ch == attrs[i].string[0]
4651 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4652 && memcmp (p, attrs[i].string, opt_len) == 0)
4655 mask = attrs[i].mask;
4656 opt_string = attrs[i].string;
4661 /* Process the option. */
4664 error ("attribute(target(\"%s\")) is unknown", orig_p);
4668 else if (type == ix86_opt_isa)
4670 struct cl_decoded_option decoded;
4672 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4673 ix86_handle_option (&global_options, &global_options_set,
4674 &decoded, input_location);
4677 else if (type == ix86_opt_yes || type == ix86_opt_no)
4679 if (type == ix86_opt_no)
4680 opt_set_p = !opt_set_p;
4683 target_flags |= mask;
4685 target_flags &= ~mask;
4688 else if (type == ix86_opt_str)
4692 error ("option(\"%s\") was already specified", opt_string);
4696 p_strings[opt] = xstrdup (p + opt_len);
4706 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4709 ix86_valid_target_attribute_tree (tree args)
4711 const char *orig_arch_string = ix86_arch_string;
4712 const char *orig_tune_string = ix86_tune_string;
4713 const char *orig_fpmath_string = ix86_fpmath_string;
4714 int orig_tune_defaulted = ix86_tune_defaulted;
4715 int orig_arch_specified = ix86_arch_specified;
4716 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4719 struct cl_target_option *def
4720 = TREE_TARGET_OPTION (target_option_default_node);
4722 /* Process each of the options on the chain. */
4723 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4726 /* If the changed options are different from the default, rerun
4727 ix86_option_override_internal, and then save the options away.
4728 The string options are are attribute options, and will be undone
4729 when we copy the save structure. */
4730 if (ix86_isa_flags != def->x_ix86_isa_flags
4731 || target_flags != def->x_target_flags
4732 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4733 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4734 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4736 /* If we are using the default tune= or arch=, undo the string assigned,
4737 and use the default. */
4738 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4739 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4740 else if (!orig_arch_specified)
4741 ix86_arch_string = NULL;
4743 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4744 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4745 else if (orig_tune_defaulted)
4746 ix86_tune_string = NULL;
4748 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4749 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4750 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4751 else if (!TARGET_64BIT && TARGET_SSE)
4752 ix86_fpmath_string = "sse,387";
4754 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4755 ix86_option_override_internal (false);
4757 /* Add any builtin functions with the new isa if any. */
4758 ix86_add_new_builtins (ix86_isa_flags);
4760 /* Save the current options unless we are validating options for
4762 t = build_target_option_node ();
4764 ix86_arch_string = orig_arch_string;
4765 ix86_tune_string = orig_tune_string;
4766 ix86_fpmath_string = orig_fpmath_string;
4768 /* Free up memory allocated to hold the strings */
4769 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4770 free (option_strings[i]);
4776 /* Hook to validate attribute((target("string"))). */
4779 ix86_valid_target_attribute_p (tree fndecl,
4780 tree ARG_UNUSED (name),
4782 int ARG_UNUSED (flags))
4784 struct cl_target_option cur_target;
4786 tree old_optimize = build_optimization_node ();
4787 tree new_target, new_optimize;
4788 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4790 /* If the function changed the optimization levels as well as setting target
4791 options, start with the optimizations specified. */
4792 if (func_optimize && func_optimize != old_optimize)
4793 cl_optimization_restore (&global_options,
4794 TREE_OPTIMIZATION (func_optimize));
4796 /* The target attributes may also change some optimization flags, so update
4797 the optimization options if necessary. */
4798 cl_target_option_save (&cur_target, &global_options);
4799 new_target = ix86_valid_target_attribute_tree (args);
4800 new_optimize = build_optimization_node ();
4807 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4809 if (old_optimize != new_optimize)
4810 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4813 cl_target_option_restore (&global_options, &cur_target);
4815 if (old_optimize != new_optimize)
4816 cl_optimization_restore (&global_options,
4817 TREE_OPTIMIZATION (old_optimize));
4823 /* Hook to determine if one function can safely inline another. */
4826 ix86_can_inline_p (tree caller, tree callee)
4829 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4830 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4832 /* If callee has no option attributes, then it is ok to inline. */
4836 /* If caller has no option attributes, but callee does then it is not ok to
4838 else if (!caller_tree)
4843 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4844 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4846 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4847 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4849 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4850 != callee_opts->x_ix86_isa_flags)
4853 /* See if we have the same non-isa options. */
4854 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4857 /* See if arch, tune, etc. are the same. */
4858 else if (caller_opts->arch != callee_opts->arch)
4861 else if (caller_opts->tune != callee_opts->tune)
4864 else if (caller_opts->fpmath != callee_opts->fpmath)
4867 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4878 /* Remember the last target of ix86_set_current_function. */
4879 static GTY(()) tree ix86_previous_fndecl;
4881 /* Establish appropriate back-end context for processing the function
4882 FNDECL. The argument might be NULL to indicate processing at top
4883 level, outside of any function scope. */
4885 ix86_set_current_function (tree fndecl)
4887 /* Only change the context if the function changes. This hook is called
4888 several times in the course of compiling a function, and we don't want to
4889 slow things down too much or call target_reinit when it isn't safe. */
4890 if (fndecl && fndecl != ix86_previous_fndecl)
4892 tree old_tree = (ix86_previous_fndecl
4893 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4896 tree new_tree = (fndecl
4897 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4900 ix86_previous_fndecl = fndecl;
4901 if (old_tree == new_tree)
4906 cl_target_option_restore (&global_options,
4907 TREE_TARGET_OPTION (new_tree));
4913 struct cl_target_option *def
4914 = TREE_TARGET_OPTION (target_option_current_node);
4916 cl_target_option_restore (&global_options, def);
4923 /* Return true if this goes in large data/bss. */
4926 ix86_in_large_data_p (tree exp)
4928 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4931 /* Functions are never large data. */
4932 if (TREE_CODE (exp) == FUNCTION_DECL)
4935 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4937 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4938 if (strcmp (section, ".ldata") == 0
4939 || strcmp (section, ".lbss") == 0)
4945 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4947 /* If this is an incomplete type with size 0, then we can't put it
4948 in data because it might be too big when completed. */
4949 if (!size || size > ix86_section_threshold)
4956 /* Switch to the appropriate section for output of DECL.
4957 DECL is either a `VAR_DECL' node or a constant of some sort.
4958 RELOC indicates whether forming the initial value of DECL requires
4959 link-time relocations. */
4961 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4965 x86_64_elf_select_section (tree decl, int reloc,
4966 unsigned HOST_WIDE_INT align)
4968 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4969 && ix86_in_large_data_p (decl))
4971 const char *sname = NULL;
4972 unsigned int flags = SECTION_WRITE;
4973 switch (categorize_decl_for_section (decl, reloc))
4978 case SECCAT_DATA_REL:
4979 sname = ".ldata.rel";
4981 case SECCAT_DATA_REL_LOCAL:
4982 sname = ".ldata.rel.local";
4984 case SECCAT_DATA_REL_RO:
4985 sname = ".ldata.rel.ro";
4987 case SECCAT_DATA_REL_RO_LOCAL:
4988 sname = ".ldata.rel.ro.local";
4992 flags |= SECTION_BSS;
4995 case SECCAT_RODATA_MERGE_STR:
4996 case SECCAT_RODATA_MERGE_STR_INIT:
4997 case SECCAT_RODATA_MERGE_CONST:
5001 case SECCAT_SRODATA:
5008 /* We don't split these for medium model. Place them into
5009 default sections and hope for best. */
5014 /* We might get called with string constants, but get_named_section
5015 doesn't like them as they are not DECLs. Also, we need to set
5016 flags in that case. */
5018 return get_section (sname, flags, NULL);
5019 return get_named_section (decl, sname, reloc);
5022 return default_elf_select_section (decl, reloc, align);
5025 /* Build up a unique section name, expressed as a
5026 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5027 RELOC indicates whether the initial value of EXP requires
5028 link-time relocations. */
5030 static void ATTRIBUTE_UNUSED
5031 x86_64_elf_unique_section (tree decl, int reloc)
5033 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5034 && ix86_in_large_data_p (decl))
5036 const char *prefix = NULL;
5037 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5038 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5040 switch (categorize_decl_for_section (decl, reloc))
5043 case SECCAT_DATA_REL:
5044 case SECCAT_DATA_REL_LOCAL:
5045 case SECCAT_DATA_REL_RO:
5046 case SECCAT_DATA_REL_RO_LOCAL:
5047 prefix = one_only ? ".ld" : ".ldata";
5050 prefix = one_only ? ".lb" : ".lbss";
5053 case SECCAT_RODATA_MERGE_STR:
5054 case SECCAT_RODATA_MERGE_STR_INIT:
5055 case SECCAT_RODATA_MERGE_CONST:
5056 prefix = one_only ? ".lr" : ".lrodata";
5058 case SECCAT_SRODATA:
5065 /* We don't split these for medium model. Place them into
5066 default sections and hope for best. */
5071 const char *name, *linkonce;
5074 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5075 name = targetm.strip_name_encoding (name);
5077 /* If we're using one_only, then there needs to be a .gnu.linkonce
5078 prefix to the section name. */
5079 linkonce = one_only ? ".gnu.linkonce" : "";
5081 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5083 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5087 default_unique_section (decl, reloc);
5090 #ifdef COMMON_ASM_OP
5091 /* This says how to output assembler code to declare an
5092 uninitialized external linkage data object.
5094 For medium model x86-64 we need to use .largecomm opcode for
5097 x86_elf_aligned_common (FILE *file,
5098 const char *name, unsigned HOST_WIDE_INT size,
5101 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5102 && size > (unsigned int)ix86_section_threshold)
5103 fputs (".largecomm\t", file);
5105 fputs (COMMON_ASM_OP, file);
5106 assemble_name (file, name);
5107 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5108 size, align / BITS_PER_UNIT);
5112 /* Utility function for targets to use in implementing
5113 ASM_OUTPUT_ALIGNED_BSS. */
5116 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5117 const char *name, unsigned HOST_WIDE_INT size,
5120 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5121 && size > (unsigned int)ix86_section_threshold)
5122 switch_to_section (get_named_section (decl, ".lbss", 0));
5124 switch_to_section (bss_section);
5125 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5126 #ifdef ASM_DECLARE_OBJECT_NAME
5127 last_assemble_variable_decl = decl;
5128 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5130 /* Standard thing is just output label for the object. */
5131 ASM_OUTPUT_LABEL (file, name);
5132 #endif /* ASM_DECLARE_OBJECT_NAME */
5133 ASM_OUTPUT_SKIP (file, size ? size : 1);
5136 static const struct default_options ix86_option_optimization_table[] =
5138 /* Turn off -fschedule-insns by default. It tends to make the
5139 problem with not enough registers even worse. */
5140 #ifdef INSN_SCHEDULING
5141 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5144 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5145 SUBTARGET_OPTIMIZATION_OPTIONS,
5147 { OPT_LEVELS_NONE, 0, NULL, 0 }
5150 /* Implement TARGET_OPTION_INIT_STRUCT. */
5153 ix86_option_init_struct (struct gcc_options *opts)
5156 /* The Darwin libraries never set errno, so we might as well
5157 avoid calling them when that's the only reason we would. */
5158 opts->x_flag_errno_math = 0;
5160 opts->x_flag_pcc_struct_return = 2;
5161 opts->x_flag_asynchronous_unwind_tables = 2;
5162 opts->x_flag_vect_cost_model = 1;
5165 /* Decide whether we must probe the stack before any space allocation
5166 on this target. It's essentially TARGET_STACK_PROBE except when
5167 -fstack-check causes the stack to be already probed differently. */
5170 ix86_target_stack_probe (void)
5172 /* Do not probe the stack twice if static stack checking is enabled. */
5173 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5176 return TARGET_STACK_PROBE;
5179 /* Decide whether we can make a sibling call to a function. DECL is the
5180 declaration of the function being targeted by the call and EXP is the
5181 CALL_EXPR representing the call. */
5184 ix86_function_ok_for_sibcall (tree decl, tree exp)
5186 tree type, decl_or_type;
5189 /* If we are generating position-independent code, we cannot sibcall
5190 optimize any indirect call, or a direct call to a global function,
5191 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5195 && (!decl || !targetm.binds_local_p (decl)))
5198 /* If we need to align the outgoing stack, then sibcalling would
5199 unalign the stack, which may break the called function. */
5200 if (ix86_minimum_incoming_stack_boundary (true)
5201 < PREFERRED_STACK_BOUNDARY)
5206 decl_or_type = decl;
5207 type = TREE_TYPE (decl);
5211 /* We're looking at the CALL_EXPR, we need the type of the function. */
5212 type = CALL_EXPR_FN (exp); /* pointer expression */
5213 type = TREE_TYPE (type); /* pointer type */
5214 type = TREE_TYPE (type); /* function type */
5215 decl_or_type = type;
5218 /* Check that the return value locations are the same. Like
5219 if we are returning floats on the 80387 register stack, we cannot
5220 make a sibcall from a function that doesn't return a float to a
5221 function that does or, conversely, from a function that does return
5222 a float to a function that doesn't; the necessary stack adjustment
5223 would not be executed. This is also the place we notice
5224 differences in the return value ABI. Note that it is ok for one
5225 of the functions to have void return type as long as the return
5226 value of the other is passed in a register. */
5227 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5228 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5230 if (STACK_REG_P (a) || STACK_REG_P (b))
5232 if (!rtx_equal_p (a, b))
5235 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5237 /* Disable sibcall if we need to generate vzeroupper after
5239 if (TARGET_VZEROUPPER
5240 && cfun->machine->callee_return_avx256_p
5241 && !cfun->machine->caller_return_avx256_p)
5244 else if (!rtx_equal_p (a, b))
5249 /* The SYSV ABI has more call-clobbered registers;
5250 disallow sibcalls from MS to SYSV. */
5251 if (cfun->machine->call_abi == MS_ABI
5252 && ix86_function_type_abi (type) == SYSV_ABI)
5257 /* If this call is indirect, we'll need to be able to use a
5258 call-clobbered register for the address of the target function.
5259 Make sure that all such registers are not used for passing
5260 parameters. Note that DLLIMPORT functions are indirect. */
5262 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5264 if (ix86_function_regparm (type, NULL) >= 3)
5266 /* ??? Need to count the actual number of registers to be used,
5267 not the possible number of registers. Fix later. */
5273 /* Otherwise okay. That also includes certain types of indirect calls. */
5277 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5278 and "sseregparm" calling convention attributes;
5279 arguments as in struct attribute_spec.handler. */
5282 ix86_handle_cconv_attribute (tree *node, tree name,
5284 int flags ATTRIBUTE_UNUSED,
5287 if (TREE_CODE (*node) != FUNCTION_TYPE
5288 && TREE_CODE (*node) != METHOD_TYPE
5289 && TREE_CODE (*node) != FIELD_DECL
5290 && TREE_CODE (*node) != TYPE_DECL)
5292 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5294 *no_add_attrs = true;
5298 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5299 if (is_attribute_p ("regparm", name))
5303 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5305 error ("fastcall and regparm attributes are not compatible");
5308 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5310 error ("regparam and thiscall attributes are not compatible");
5313 cst = TREE_VALUE (args);
5314 if (TREE_CODE (cst) != INTEGER_CST)
5316 warning (OPT_Wattributes,
5317 "%qE attribute requires an integer constant argument",
5319 *no_add_attrs = true;
5321 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5323 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5325 *no_add_attrs = true;
5333 /* Do not warn when emulating the MS ABI. */
5334 if ((TREE_CODE (*node) != FUNCTION_TYPE
5335 && TREE_CODE (*node) != METHOD_TYPE)
5336 || ix86_function_type_abi (*node) != MS_ABI)
5337 warning (OPT_Wattributes, "%qE attribute ignored",
5339 *no_add_attrs = true;
5343 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5344 if (is_attribute_p ("fastcall", name))
5346 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5348 error ("fastcall and cdecl attributes are not compatible");
5350 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5352 error ("fastcall and stdcall attributes are not compatible");
5354 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5356 error ("fastcall and regparm attributes are not compatible");
5358 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5360 error ("fastcall and thiscall attributes are not compatible");
5364 /* Can combine stdcall with fastcall (redundant), regparm and
5366 else if (is_attribute_p ("stdcall", name))
5368 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5370 error ("stdcall and cdecl attributes are not compatible");
5372 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5374 error ("stdcall and fastcall attributes are not compatible");
5376 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5378 error ("stdcall and thiscall attributes are not compatible");
5382 /* Can combine cdecl with regparm and sseregparm. */
5383 else if (is_attribute_p ("cdecl", name))
5385 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5387 error ("stdcall and cdecl attributes are not compatible");
5389 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5391 error ("fastcall and cdecl attributes are not compatible");
5393 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5395 error ("cdecl and thiscall attributes are not compatible");
5398 else if (is_attribute_p ("thiscall", name))
5400 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5401 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5405 error ("stdcall and thiscall attributes are not compatible");
5407 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5409 error ("fastcall and thiscall attributes are not compatible");
5411 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5413 error ("cdecl and thiscall attributes are not compatible");
5417 /* Can combine sseregparm with all attributes. */
5422 /* This function determines from TYPE the calling-convention. */
5425 ix86_get_callcvt (const_tree type)
5427 unsigned int ret = 0;
5432 return IX86_CALLCVT_CDECL;
5434 attrs = TYPE_ATTRIBUTES (type);
5435 if (attrs != NULL_TREE)
5437 if (lookup_attribute ("cdecl", attrs))
5438 ret |= IX86_CALLCVT_CDECL;
5439 else if (lookup_attribute ("stdcall", attrs))
5440 ret |= IX86_CALLCVT_STDCALL;
5441 else if (lookup_attribute ("fastcall", attrs))
5442 ret |= IX86_CALLCVT_FASTCALL;
5443 else if (lookup_attribute ("thiscall", attrs))
5444 ret |= IX86_CALLCVT_THISCALL;
5446 /* Regparam isn't allowed for thiscall and fastcall. */
5447 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5449 if (lookup_attribute ("regparm", attrs))
5450 ret |= IX86_CALLCVT_REGPARM;
5451 if (lookup_attribute ("sseregparm", attrs))
5452 ret |= IX86_CALLCVT_SSEREGPARM;
5455 if (IX86_BASE_CALLCVT(ret) != 0)
5459 is_stdarg = stdarg_p (type);
5460 if (TARGET_RTD && !is_stdarg)
5461 return IX86_CALLCVT_STDCALL | ret;
5465 || TREE_CODE (type) != METHOD_TYPE
5466 || ix86_function_type_abi (type) != MS_ABI)
5467 return IX86_CALLCVT_CDECL | ret;
5469 return IX86_CALLCVT_THISCALL;
5472 /* Return 0 if the attributes for two types are incompatible, 1 if they
5473 are compatible, and 2 if they are nearly compatible (which causes a
5474 warning to be generated). */
5477 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5479 unsigned int ccvt1, ccvt2;
5481 if (TREE_CODE (type1) != FUNCTION_TYPE
5482 && TREE_CODE (type1) != METHOD_TYPE)
5485 ccvt1 = ix86_get_callcvt (type1);
5486 ccvt2 = ix86_get_callcvt (type2);
5489 if (ix86_function_regparm (type1, NULL)
5490 != ix86_function_regparm (type2, NULL))
5496 /* Return the regparm value for a function with the indicated TYPE and DECL.
5497 DECL may be NULL when calling function indirectly
5498 or considering a libcall. */
5501 ix86_function_regparm (const_tree type, const_tree decl)
5508 return (ix86_function_type_abi (type) == SYSV_ABI
5509 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5510 ccvt = ix86_get_callcvt (type);
5511 regparm = ix86_regparm;
5513 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5515 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5518 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5522 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5524 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5527 /* Use register calling convention for local functions when possible. */
5529 && TREE_CODE (decl) == FUNCTION_DECL
5531 && !(profile_flag && !flag_fentry))
5533 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5534 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5535 if (i && i->local && i->can_change_signature)
5537 int local_regparm, globals = 0, regno;
5539 /* Make sure no regparm register is taken by a
5540 fixed register variable. */
5541 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5542 if (fixed_regs[local_regparm])
5545 /* We don't want to use regparm(3) for nested functions as
5546 these use a static chain pointer in the third argument. */
5547 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5550 /* In 32-bit mode save a register for the split stack. */
5551 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5554 /* Each fixed register usage increases register pressure,
5555 so less registers should be used for argument passing.
5556 This functionality can be overriden by an explicit
5558 for (regno = 0; regno <= DI_REG; regno++)
5559 if (fixed_regs[regno])
5563 = globals < local_regparm ? local_regparm - globals : 0;
5565 if (local_regparm > regparm)
5566 regparm = local_regparm;
5573 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5574 DFmode (2) arguments in SSE registers for a function with the
5575 indicated TYPE and DECL. DECL may be NULL when calling function
5576 indirectly or considering a libcall. Otherwise return 0. */
5579 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5581 gcc_assert (!TARGET_64BIT);
5583 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5584 by the sseregparm attribute. */
5585 if (TARGET_SSEREGPARM
5586 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5593 error ("calling %qD with attribute sseregparm without "
5594 "SSE/SSE2 enabled", decl);
5596 error ("calling %qT with attribute sseregparm without "
5597 "SSE/SSE2 enabled", type);
5605 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5606 (and DFmode for SSE2) arguments in SSE registers. */
5607 if (decl && TARGET_SSE_MATH && optimize
5608 && !(profile_flag && !flag_fentry))
5610 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5611 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5612 if (i && i->local && i->can_change_signature)
5613 return TARGET_SSE2 ? 2 : 1;
5619 /* Return true if EAX is live at the start of the function. Used by
5620 ix86_expand_prologue to determine if we need special help before
5621 calling allocate_stack_worker. */
5624 ix86_eax_live_at_start_p (void)
5626 /* Cheat. Don't bother working forward from ix86_function_regparm
5627 to the function type to whether an actual argument is located in
5628 eax. Instead just look at cfg info, which is still close enough
5629 to correct at this point. This gives false positives for broken
5630 functions that might use uninitialized data that happens to be
5631 allocated in eax, but who cares? */
5632 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5636 ix86_keep_aggregate_return_pointer (tree fntype)
5642 attr = lookup_attribute ("callee_pop_aggregate_return",
5643 TYPE_ATTRIBUTES (fntype));
5645 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5647 /* For 32-bit MS-ABI the default is to keep aggregate
5649 if (ix86_function_type_abi (fntype) == MS_ABI)
5652 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5655 /* Value is the number of bytes of arguments automatically
5656 popped when returning from a subroutine call.
5657 FUNDECL is the declaration node of the function (as a tree),
5658 FUNTYPE is the data type of the function (as a tree),
5659 or for a library call it is an identifier node for the subroutine name.
5660 SIZE is the number of bytes of arguments passed on the stack.
5662 On the 80386, the RTD insn may be used to pop them if the number
5663 of args is fixed, but if the number is variable then the caller
5664 must pop them all. RTD can't be used for library calls now
5665 because the library is compiled with the Unix compiler.
5666 Use of RTD is a selectable option, since it is incompatible with
5667 standard Unix calling sequences. If the option is not selected,
5668 the caller must always pop the args.
5670 The attribute stdcall is equivalent to RTD on a per module basis. */
5673 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5677 /* None of the 64-bit ABIs pop arguments. */
5681 ccvt = ix86_get_callcvt (funtype);
5683 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5684 | IX86_CALLCVT_THISCALL)) != 0
5685 && ! stdarg_p (funtype))
5688 /* Lose any fake structure return argument if it is passed on the stack. */
5689 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5690 && !ix86_keep_aggregate_return_pointer (funtype))
5692 int nregs = ix86_function_regparm (funtype, fundecl);
5694 return GET_MODE_SIZE (Pmode);
5700 /* Argument support functions. */
5702 /* Return true when register may be used to pass function parameters. */
5704 ix86_function_arg_regno_p (int regno)
5707 const int *parm_regs;
5712 return (regno < REGPARM_MAX
5713 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5715 return (regno < REGPARM_MAX
5716 || (TARGET_MMX && MMX_REGNO_P (regno)
5717 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5718 || (TARGET_SSE && SSE_REGNO_P (regno)
5719 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5724 if (SSE_REGNO_P (regno) && TARGET_SSE)
5729 if (TARGET_SSE && SSE_REGNO_P (regno)
5730 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5734 /* TODO: The function should depend on current function ABI but
5735 builtins.c would need updating then. Therefore we use the
5738 /* RAX is used as hidden argument to va_arg functions. */
5739 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5742 if (ix86_abi == MS_ABI)
5743 parm_regs = x86_64_ms_abi_int_parameter_registers;
5745 parm_regs = x86_64_int_parameter_registers;
5746 for (i = 0; i < (ix86_abi == MS_ABI
5747 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5748 if (regno == parm_regs[i])
5753 /* Return if we do not know how to pass TYPE solely in registers. */
5756 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5758 if (must_pass_in_stack_var_size_or_pad (mode, type))
5761 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5762 The layout_type routine is crafty and tries to trick us into passing
5763 currently unsupported vector types on the stack by using TImode. */
5764 return (!TARGET_64BIT && mode == TImode
5765 && type && TREE_CODE (type) != VECTOR_TYPE);
5768 /* It returns the size, in bytes, of the area reserved for arguments passed
5769 in registers for the function represented by fndecl dependent to the used
5772 ix86_reg_parm_stack_space (const_tree fndecl)
5774 enum calling_abi call_abi = SYSV_ABI;
5775 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5776 call_abi = ix86_function_abi (fndecl);
5778 call_abi = ix86_function_type_abi (fndecl);
5779 if (TARGET_64BIT && call_abi == MS_ABI)
5784 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5787 ix86_function_type_abi (const_tree fntype)
5789 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5791 enum calling_abi abi = ix86_abi;
5792 if (abi == SYSV_ABI)
5794 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5797 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5805 ix86_function_ms_hook_prologue (const_tree fn)
5807 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5809 if (decl_function_context (fn) != NULL_TREE)
5810 error_at (DECL_SOURCE_LOCATION (fn),
5811 "ms_hook_prologue is not compatible with nested function");
5818 static enum calling_abi
5819 ix86_function_abi (const_tree fndecl)
5823 return ix86_function_type_abi (TREE_TYPE (fndecl));
5826 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5829 ix86_cfun_abi (void)
5833 return cfun->machine->call_abi;
5836 /* Write the extra assembler code needed to declare a function properly. */
5839 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5842 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5846 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5847 unsigned int filler_cc = 0xcccccccc;
5849 for (i = 0; i < filler_count; i += 4)
5850 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5853 #ifdef SUBTARGET_ASM_UNWIND_INIT
5854 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5857 ASM_OUTPUT_LABEL (asm_out_file, fname);
5859 /* Output magic byte marker, if hot-patch attribute is set. */
5864 /* leaq [%rsp + 0], %rsp */
5865 asm_fprintf (asm_out_file, ASM_BYTE
5866 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5870 /* movl.s %edi, %edi
5872 movl.s %esp, %ebp */
5873 asm_fprintf (asm_out_file, ASM_BYTE
5874 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5880 extern void init_regs (void);
5882 /* Implementation of call abi switching target hook. Specific to FNDECL
5883 the specific call register sets are set. See also
5884 ix86_conditional_register_usage for more details. */
5886 ix86_call_abi_override (const_tree fndecl)
5888 if (fndecl == NULL_TREE)
5889 cfun->machine->call_abi = ix86_abi;
5891 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5894 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5895 expensive re-initialization of init_regs each time we switch function context
5896 since this is needed only during RTL expansion. */
5898 ix86_maybe_switch_abi (void)
5901 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5905 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5906 for a call to a function whose data type is FNTYPE.
5907 For a library call, FNTYPE is 0. */
5910 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5911 tree fntype, /* tree ptr for function decl */
5912 rtx libname, /* SYMBOL_REF of library name or 0 */
5916 struct cgraph_local_info *i;
5919 memset (cum, 0, sizeof (*cum));
5921 /* Initialize for the current callee. */
5924 cfun->machine->callee_pass_avx256_p = false;
5925 cfun->machine->callee_return_avx256_p = false;
5930 i = cgraph_local_info (fndecl);
5931 cum->call_abi = ix86_function_abi (fndecl);
5932 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5937 cum->call_abi = ix86_function_type_abi (fntype);
5939 fnret_type = TREE_TYPE (fntype);
5944 if (TARGET_VZEROUPPER && fnret_type)
5946 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5948 if (function_pass_avx256_p (fnret_value))
5950 /* The return value of this function uses 256bit AVX modes. */
5952 cfun->machine->callee_return_avx256_p = true;
5954 cfun->machine->caller_return_avx256_p = true;
5958 cum->caller = caller;
5960 /* Set up the number of registers to use for passing arguments. */
5962 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5963 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5964 "or subtarget optimization implying it");
5965 cum->nregs = ix86_regparm;
5968 cum->nregs = (cum->call_abi == SYSV_ABI
5969 ? X86_64_REGPARM_MAX
5970 : X86_64_MS_REGPARM_MAX);
5974 cum->sse_nregs = SSE_REGPARM_MAX;
5977 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5978 ? X86_64_SSE_REGPARM_MAX
5979 : X86_64_MS_SSE_REGPARM_MAX);
5983 cum->mmx_nregs = MMX_REGPARM_MAX;
5984 cum->warn_avx = true;
5985 cum->warn_sse = true;
5986 cum->warn_mmx = true;
5988 /* Because type might mismatch in between caller and callee, we need to
5989 use actual type of function for local calls.
5990 FIXME: cgraph_analyze can be told to actually record if function uses
5991 va_start so for local functions maybe_vaarg can be made aggressive
5993 FIXME: once typesytem is fixed, we won't need this code anymore. */
5994 if (i && i->local && i->can_change_signature)
5995 fntype = TREE_TYPE (fndecl);
5996 cum->maybe_vaarg = (fntype
5997 ? (!prototype_p (fntype) || stdarg_p (fntype))
6002 /* If there are variable arguments, then we won't pass anything
6003 in registers in 32-bit mode. */
6004 if (stdarg_p (fntype))
6015 /* Use ecx and edx registers if function has fastcall attribute,
6016 else look for regparm information. */
6019 unsigned int ccvt = ix86_get_callcvt (fntype);
6020 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6023 cum->fastcall = 1; /* Same first register as in fastcall. */
6025 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6031 cum->nregs = ix86_function_regparm (fntype, fndecl);
6034 /* Set up the number of SSE registers used for passing SFmode
6035 and DFmode arguments. Warn for mismatching ABI. */
6036 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6040 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6041 But in the case of vector types, it is some vector mode.
6043 When we have only some of our vector isa extensions enabled, then there
6044 are some modes for which vector_mode_supported_p is false. For these
6045 modes, the generic vector support in gcc will choose some non-vector mode
6046 in order to implement the type. By computing the natural mode, we'll
6047 select the proper ABI location for the operand and not depend on whatever
6048 the middle-end decides to do with these vector types.
6050 The midde-end can't deal with the vector types > 16 bytes. In this
6051 case, we return the original mode and warn ABI change if CUM isn't
6054 static enum machine_mode
6055 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6057 enum machine_mode mode = TYPE_MODE (type);
6059 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6061 HOST_WIDE_INT size = int_size_in_bytes (type);
6062 if ((size == 8 || size == 16 || size == 32)
6063 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6064 && TYPE_VECTOR_SUBPARTS (type) > 1)
6066 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6068 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6069 mode = MIN_MODE_VECTOR_FLOAT;
6071 mode = MIN_MODE_VECTOR_INT;
6073 /* Get the mode which has this inner mode and number of units. */
6074 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6075 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6076 && GET_MODE_INNER (mode) == innermode)
6078 if (size == 32 && !TARGET_AVX)
6080 static bool warnedavx;
6087 warning (0, "AVX vector argument without AVX "
6088 "enabled changes the ABI");
6090 return TYPE_MODE (type);
6103 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6104 this may not agree with the mode that the type system has chosen for the
6105 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6106 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6109 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6114 if (orig_mode != BLKmode)
6115 tmp = gen_rtx_REG (orig_mode, regno);
6118 tmp = gen_rtx_REG (mode, regno);
6119 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6120 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6126 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6127 of this code is to classify each 8bytes of incoming argument by the register
6128 class and assign registers accordingly. */
6130 /* Return the union class of CLASS1 and CLASS2.
6131 See the x86-64 PS ABI for details. */
6133 static enum x86_64_reg_class
6134 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6136 /* Rule #1: If both classes are equal, this is the resulting class. */
6137 if (class1 == class2)
6140 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6142 if (class1 == X86_64_NO_CLASS)
6144 if (class2 == X86_64_NO_CLASS)
6147 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6148 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6149 return X86_64_MEMORY_CLASS;
6151 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6152 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6153 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6154 return X86_64_INTEGERSI_CLASS;
6155 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6156 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6157 return X86_64_INTEGER_CLASS;
6159 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6161 if (class1 == X86_64_X87_CLASS
6162 || class1 == X86_64_X87UP_CLASS
6163 || class1 == X86_64_COMPLEX_X87_CLASS
6164 || class2 == X86_64_X87_CLASS
6165 || class2 == X86_64_X87UP_CLASS
6166 || class2 == X86_64_COMPLEX_X87_CLASS)
6167 return X86_64_MEMORY_CLASS;
6169 /* Rule #6: Otherwise class SSE is used. */
6170 return X86_64_SSE_CLASS;
6173 /* Classify the argument of type TYPE and mode MODE.
6174 CLASSES will be filled by the register class used to pass each word
6175 of the operand. The number of words is returned. In case the parameter
6176 should be passed in memory, 0 is returned. As a special case for zero
6177 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6179 BIT_OFFSET is used internally for handling records and specifies offset
6180 of the offset in bits modulo 256 to avoid overflow cases.
6182 See the x86-64 PS ABI for details.
6186 classify_argument (enum machine_mode mode, const_tree type,
6187 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6189 HOST_WIDE_INT bytes =
6190 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6191 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6193 /* Variable sized entities are always passed/returned in memory. */
6197 if (mode != VOIDmode
6198 && targetm.calls.must_pass_in_stack (mode, type))
6201 if (type && AGGREGATE_TYPE_P (type))
6205 enum x86_64_reg_class subclasses[MAX_CLASSES];
6207 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6211 for (i = 0; i < words; i++)
6212 classes[i] = X86_64_NO_CLASS;
6214 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6215 signalize memory class, so handle it as special case. */
6218 classes[0] = X86_64_NO_CLASS;
6222 /* Classify each field of record and merge classes. */
6223 switch (TREE_CODE (type))
6226 /* And now merge the fields of structure. */
6227 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6229 if (TREE_CODE (field) == FIELD_DECL)
6233 if (TREE_TYPE (field) == error_mark_node)
6236 /* Bitfields are always classified as integer. Handle them
6237 early, since later code would consider them to be
6238 misaligned integers. */
6239 if (DECL_BIT_FIELD (field))
6241 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6242 i < ((int_bit_position (field) + (bit_offset % 64))
6243 + tree_low_cst (DECL_SIZE (field), 0)
6246 merge_classes (X86_64_INTEGER_CLASS,
6253 type = TREE_TYPE (field);
6255 /* Flexible array member is ignored. */
6256 if (TYPE_MODE (type) == BLKmode
6257 && TREE_CODE (type) == ARRAY_TYPE
6258 && TYPE_SIZE (type) == NULL_TREE
6259 && TYPE_DOMAIN (type) != NULL_TREE
6260 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6265 if (!warned && warn_psabi)
6268 inform (input_location,
6269 "the ABI of passing struct with"
6270 " a flexible array member has"
6271 " changed in GCC 4.4");
6275 num = classify_argument (TYPE_MODE (type), type,
6277 (int_bit_position (field)
6278 + bit_offset) % 256);
6281 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6282 for (i = 0; i < num && (i + pos) < words; i++)
6284 merge_classes (subclasses[i], classes[i + pos]);
6291 /* Arrays are handled as small records. */
6294 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6295 TREE_TYPE (type), subclasses, bit_offset);
6299 /* The partial classes are now full classes. */
6300 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6301 subclasses[0] = X86_64_SSE_CLASS;
6302 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6303 && !((bit_offset % 64) == 0 && bytes == 4))
6304 subclasses[0] = X86_64_INTEGER_CLASS;
6306 for (i = 0; i < words; i++)
6307 classes[i] = subclasses[i % num];
6312 case QUAL_UNION_TYPE:
6313 /* Unions are similar to RECORD_TYPE but offset is always 0.
6315 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6317 if (TREE_CODE (field) == FIELD_DECL)
6321 if (TREE_TYPE (field) == error_mark_node)
6324 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6325 TREE_TYPE (field), subclasses,
6329 for (i = 0; i < num; i++)
6330 classes[i] = merge_classes (subclasses[i], classes[i]);
6341 /* When size > 16 bytes, if the first one isn't
6342 X86_64_SSE_CLASS or any other ones aren't
6343 X86_64_SSEUP_CLASS, everything should be passed in
6345 if (classes[0] != X86_64_SSE_CLASS)
6348 for (i = 1; i < words; i++)
6349 if (classes[i] != X86_64_SSEUP_CLASS)
6353 /* Final merger cleanup. */
6354 for (i = 0; i < words; i++)
6356 /* If one class is MEMORY, everything should be passed in
6358 if (classes[i] == X86_64_MEMORY_CLASS)
6361 /* The X86_64_SSEUP_CLASS should be always preceded by
6362 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6363 if (classes[i] == X86_64_SSEUP_CLASS
6364 && classes[i - 1] != X86_64_SSE_CLASS
6365 && classes[i - 1] != X86_64_SSEUP_CLASS)
6367 /* The first one should never be X86_64_SSEUP_CLASS. */
6368 gcc_assert (i != 0);
6369 classes[i] = X86_64_SSE_CLASS;
6372 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6373 everything should be passed in memory. */
6374 if (classes[i] == X86_64_X87UP_CLASS
6375 && (classes[i - 1] != X86_64_X87_CLASS))
6379 /* The first one should never be X86_64_X87UP_CLASS. */
6380 gcc_assert (i != 0);
6381 if (!warned && warn_psabi)
6384 inform (input_location,
6385 "the ABI of passing union with long double"
6386 " has changed in GCC 4.4");
6394 /* Compute alignment needed. We align all types to natural boundaries with
6395 exception of XFmode that is aligned to 64bits. */
6396 if (mode != VOIDmode && mode != BLKmode)
6398 int mode_alignment = GET_MODE_BITSIZE (mode);
6401 mode_alignment = 128;
6402 else if (mode == XCmode)
6403 mode_alignment = 256;
6404 if (COMPLEX_MODE_P (mode))
6405 mode_alignment /= 2;
6406 /* Misaligned fields are always returned in memory. */
6407 if (bit_offset % mode_alignment)
6411 /* for V1xx modes, just use the base mode */
6412 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6413 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6414 mode = GET_MODE_INNER (mode);
6416 /* Classification of atomic types. */
6421 classes[0] = X86_64_SSE_CLASS;
6424 classes[0] = X86_64_SSE_CLASS;
6425 classes[1] = X86_64_SSEUP_CLASS;
6435 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6439 classes[0] = X86_64_INTEGERSI_CLASS;
6442 else if (size <= 64)
6444 classes[0] = X86_64_INTEGER_CLASS;
6447 else if (size <= 64+32)
6449 classes[0] = X86_64_INTEGER_CLASS;
6450 classes[1] = X86_64_INTEGERSI_CLASS;
6453 else if (size <= 64+64)
6455 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6463 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6467 /* OImode shouldn't be used directly. */
6472 if (!(bit_offset % 64))
6473 classes[0] = X86_64_SSESF_CLASS;
6475 classes[0] = X86_64_SSE_CLASS;
6478 classes[0] = X86_64_SSEDF_CLASS;
6481 classes[0] = X86_64_X87_CLASS;
6482 classes[1] = X86_64_X87UP_CLASS;
6485 classes[0] = X86_64_SSE_CLASS;
6486 classes[1] = X86_64_SSEUP_CLASS;
6489 classes[0] = X86_64_SSE_CLASS;
6490 if (!(bit_offset % 64))
6496 if (!warned && warn_psabi)
6499 inform (input_location,
6500 "the ABI of passing structure with complex float"
6501 " member has changed in GCC 4.4");
6503 classes[1] = X86_64_SSESF_CLASS;
6507 classes[0] = X86_64_SSEDF_CLASS;
6508 classes[1] = X86_64_SSEDF_CLASS;
6511 classes[0] = X86_64_COMPLEX_X87_CLASS;
6514 /* This modes is larger than 16 bytes. */
6522 classes[0] = X86_64_SSE_CLASS;
6523 classes[1] = X86_64_SSEUP_CLASS;
6524 classes[2] = X86_64_SSEUP_CLASS;
6525 classes[3] = X86_64_SSEUP_CLASS;
6533 classes[0] = X86_64_SSE_CLASS;
6534 classes[1] = X86_64_SSEUP_CLASS;
6542 classes[0] = X86_64_SSE_CLASS;
6548 gcc_assert (VECTOR_MODE_P (mode));
6553 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6555 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6556 classes[0] = X86_64_INTEGERSI_CLASS;
6558 classes[0] = X86_64_INTEGER_CLASS;
6559 classes[1] = X86_64_INTEGER_CLASS;
6560 return 1 + (bytes > 8);
6564 /* Examine the argument and return set number of register required in each
6565 class. Return 0 iff parameter should be passed in memory. */
6567 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6568 int *int_nregs, int *sse_nregs)
6570 enum x86_64_reg_class regclass[MAX_CLASSES];
6571 int n = classify_argument (mode, type, regclass, 0);
6577 for (n--; n >= 0; n--)
6578 switch (regclass[n])
6580 case X86_64_INTEGER_CLASS:
6581 case X86_64_INTEGERSI_CLASS:
6584 case X86_64_SSE_CLASS:
6585 case X86_64_SSESF_CLASS:
6586 case X86_64_SSEDF_CLASS:
6589 case X86_64_NO_CLASS:
6590 case X86_64_SSEUP_CLASS:
6592 case X86_64_X87_CLASS:
6593 case X86_64_X87UP_CLASS:
6597 case X86_64_COMPLEX_X87_CLASS:
6598 return in_return ? 2 : 0;
6599 case X86_64_MEMORY_CLASS:
6605 /* Construct container for the argument used by GCC interface. See
6606 FUNCTION_ARG for the detailed description. */
6609 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6610 const_tree type, int in_return, int nintregs, int nsseregs,
6611 const int *intreg, int sse_regno)
6613 /* The following variables hold the static issued_error state. */
6614 static bool issued_sse_arg_error;
6615 static bool issued_sse_ret_error;
6616 static bool issued_x87_ret_error;
6618 enum machine_mode tmpmode;
6620 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6621 enum x86_64_reg_class regclass[MAX_CLASSES];
6625 int needed_sseregs, needed_intregs;
6626 rtx exp[MAX_CLASSES];
6629 n = classify_argument (mode, type, regclass, 0);
6632 if (!examine_argument (mode, type, in_return, &needed_intregs,
6635 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6638 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6639 some less clueful developer tries to use floating-point anyway. */
6640 if (needed_sseregs && !TARGET_SSE)
6644 if (!issued_sse_ret_error)
6646 error ("SSE register return with SSE disabled");
6647 issued_sse_ret_error = true;
6650 else if (!issued_sse_arg_error)
6652 error ("SSE register argument with SSE disabled");
6653 issued_sse_arg_error = true;
6658 /* Likewise, error if the ABI requires us to return values in the
6659 x87 registers and the user specified -mno-80387. */
6660 if (!TARGET_80387 && in_return)
6661 for (i = 0; i < n; i++)
6662 if (regclass[i] == X86_64_X87_CLASS
6663 || regclass[i] == X86_64_X87UP_CLASS
6664 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6666 if (!issued_x87_ret_error)
6668 error ("x87 register return with x87 disabled");
6669 issued_x87_ret_error = true;
6674 /* First construct simple cases. Avoid SCmode, since we want to use
6675 single register to pass this type. */
6676 if (n == 1 && mode != SCmode)
6677 switch (regclass[0])
6679 case X86_64_INTEGER_CLASS:
6680 case X86_64_INTEGERSI_CLASS:
6681 return gen_rtx_REG (mode, intreg[0]);
6682 case X86_64_SSE_CLASS:
6683 case X86_64_SSESF_CLASS:
6684 case X86_64_SSEDF_CLASS:
6685 if (mode != BLKmode)
6686 return gen_reg_or_parallel (mode, orig_mode,
6687 SSE_REGNO (sse_regno));
6689 case X86_64_X87_CLASS:
6690 case X86_64_COMPLEX_X87_CLASS:
6691 return gen_rtx_REG (mode, FIRST_STACK_REG);
6692 case X86_64_NO_CLASS:
6693 /* Zero sized array, struct or class. */
6698 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6699 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6700 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6702 && regclass[0] == X86_64_SSE_CLASS
6703 && regclass[1] == X86_64_SSEUP_CLASS
6704 && regclass[2] == X86_64_SSEUP_CLASS
6705 && regclass[3] == X86_64_SSEUP_CLASS
6707 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6710 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6711 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6712 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6713 && regclass[1] == X86_64_INTEGER_CLASS
6714 && (mode == CDImode || mode == TImode || mode == TFmode)
6715 && intreg[0] + 1 == intreg[1])
6716 return gen_rtx_REG (mode, intreg[0]);
6718 /* Otherwise figure out the entries of the PARALLEL. */
6719 for (i = 0; i < n; i++)
6723 switch (regclass[i])
6725 case X86_64_NO_CLASS:
6727 case X86_64_INTEGER_CLASS:
6728 case X86_64_INTEGERSI_CLASS:
6729 /* Merge TImodes on aligned occasions here too. */
6730 if (i * 8 + 8 > bytes)
6731 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6732 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6736 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6737 if (tmpmode == BLKmode)
6739 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6740 gen_rtx_REG (tmpmode, *intreg),
6744 case X86_64_SSESF_CLASS:
6745 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6746 gen_rtx_REG (SFmode,
6747 SSE_REGNO (sse_regno)),
6751 case X86_64_SSEDF_CLASS:
6752 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6753 gen_rtx_REG (DFmode,
6754 SSE_REGNO (sse_regno)),
6758 case X86_64_SSE_CLASS:
6766 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6776 && regclass[1] == X86_64_SSEUP_CLASS
6777 && regclass[2] == X86_64_SSEUP_CLASS
6778 && regclass[3] == X86_64_SSEUP_CLASS);
6785 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6786 gen_rtx_REG (tmpmode,
6787 SSE_REGNO (sse_regno)),
6796 /* Empty aligned struct, union or class. */
6800 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6801 for (i = 0; i < nexps; i++)
6802 XVECEXP (ret, 0, i) = exp [i];
6806 /* Update the data in CUM to advance over an argument of mode MODE
6807 and data type TYPE. (TYPE is null for libcalls where that information
6808 may not be available.) */
6811 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6812 const_tree type, HOST_WIDE_INT bytes,
6813 HOST_WIDE_INT words)
6829 cum->words += words;
6830 cum->nregs -= words;
6831 cum->regno += words;
6833 if (cum->nregs <= 0)
6841 /* OImode shouldn't be used directly. */
6845 if (cum->float_in_sse < 2)
6848 if (cum->float_in_sse < 1)
6865 if (!type || !AGGREGATE_TYPE_P (type))
6867 cum->sse_words += words;
6868 cum->sse_nregs -= 1;
6869 cum->sse_regno += 1;
6870 if (cum->sse_nregs <= 0)
6884 if (!type || !AGGREGATE_TYPE_P (type))
6886 cum->mmx_words += words;
6887 cum->mmx_nregs -= 1;
6888 cum->mmx_regno += 1;
6889 if (cum->mmx_nregs <= 0)
6900 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6901 const_tree type, HOST_WIDE_INT words, bool named)
6903 int int_nregs, sse_nregs;
6905 /* Unnamed 256bit vector mode parameters are passed on stack. */
6906 if (!named && VALID_AVX256_REG_MODE (mode))
6909 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6910 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6912 cum->nregs -= int_nregs;
6913 cum->sse_nregs -= sse_nregs;
6914 cum->regno += int_nregs;
6915 cum->sse_regno += sse_nregs;
6919 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6920 cum->words = (cum->words + align - 1) & ~(align - 1);
6921 cum->words += words;
6926 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6927 HOST_WIDE_INT words)
6929 /* Otherwise, this should be passed indirect. */
6930 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6932 cum->words += words;
6940 /* Update the data in CUM to advance over an argument of mode MODE and
6941 data type TYPE. (TYPE is null for libcalls where that information
6942 may not be available.) */
6945 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6946 const_tree type, bool named)
6948 HOST_WIDE_INT bytes, words;
6950 if (mode == BLKmode)
6951 bytes = int_size_in_bytes (type);
6953 bytes = GET_MODE_SIZE (mode);
6954 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6957 mode = type_natural_mode (type, NULL);
6959 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6960 function_arg_advance_ms_64 (cum, bytes, words);
6961 else if (TARGET_64BIT)
6962 function_arg_advance_64 (cum, mode, type, words, named);
6964 function_arg_advance_32 (cum, mode, type, bytes, words);
6967 /* Define where to put the arguments to a function.
6968 Value is zero to push the argument on the stack,
6969 or a hard register in which to store the argument.
6971 MODE is the argument's machine mode.
6972 TYPE is the data type of the argument (as a tree).
6973 This is null for libcalls where that information may
6975 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6976 the preceding args and about the function being called.
6977 NAMED is nonzero if this argument is a named parameter
6978 (otherwise it is an extra parameter matching an ellipsis). */
6981 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6982 enum machine_mode orig_mode, const_tree type,
6983 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6985 static bool warnedsse, warnedmmx;
6987 /* Avoid the AL settings for the Unix64 ABI. */
6988 if (mode == VOIDmode)
7004 if (words <= cum->nregs)
7006 int regno = cum->regno;
7008 /* Fastcall allocates the first two DWORD (SImode) or
7009 smaller arguments to ECX and EDX if it isn't an
7015 || (type && AGGREGATE_TYPE_P (type)))
7018 /* ECX not EAX is the first allocated register. */
7019 if (regno == AX_REG)
7022 return gen_rtx_REG (mode, regno);
7027 if (cum->float_in_sse < 2)
7030 if (cum->float_in_sse < 1)
7034 /* In 32bit, we pass TImode in xmm registers. */
7041 if (!type || !AGGREGATE_TYPE_P (type))
7043 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7046 warning (0, "SSE vector argument without SSE enabled "
7050 return gen_reg_or_parallel (mode, orig_mode,
7051 cum->sse_regno + FIRST_SSE_REG);
7056 /* OImode shouldn't be used directly. */
7065 if (!type || !AGGREGATE_TYPE_P (type))
7068 return gen_reg_or_parallel (mode, orig_mode,
7069 cum->sse_regno + FIRST_SSE_REG);
7079 if (!type || !AGGREGATE_TYPE_P (type))
7081 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7084 warning (0, "MMX vector argument without MMX enabled "
7088 return gen_reg_or_parallel (mode, orig_mode,
7089 cum->mmx_regno + FIRST_MMX_REG);
7098 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7099 enum machine_mode orig_mode, const_tree type, bool named)
7101 /* Handle a hidden AL argument containing number of registers
7102 for varargs x86-64 functions. */
7103 if (mode == VOIDmode)
7104 return GEN_INT (cum->maybe_vaarg
7105 ? (cum->sse_nregs < 0
7106 ? X86_64_SSE_REGPARM_MAX
7121 /* Unnamed 256bit vector mode parameters are passed on stack. */
7127 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7129 &x86_64_int_parameter_registers [cum->regno],
7134 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7135 enum machine_mode orig_mode, bool named,
7136 HOST_WIDE_INT bytes)
7140 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7141 We use value of -2 to specify that current function call is MSABI. */
7142 if (mode == VOIDmode)
7143 return GEN_INT (-2);
7145 /* If we've run out of registers, it goes on the stack. */
7146 if (cum->nregs == 0)
7149 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7151 /* Only floating point modes are passed in anything but integer regs. */
7152 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7155 regno = cum->regno + FIRST_SSE_REG;
7160 /* Unnamed floating parameters are passed in both the
7161 SSE and integer registers. */
7162 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7163 t2 = gen_rtx_REG (mode, regno);
7164 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7165 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7166 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7169 /* Handle aggregated types passed in register. */
7170 if (orig_mode == BLKmode)
7172 if (bytes > 0 && bytes <= 8)
7173 mode = (bytes > 4 ? DImode : SImode);
7174 if (mode == BLKmode)
7178 return gen_reg_or_parallel (mode, orig_mode, regno);
7181 /* Return where to put the arguments to a function.
7182 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7184 MODE is the argument's machine mode. TYPE is the data type of the
7185 argument. It is null for libcalls where that information may not be
7186 available. CUM gives information about the preceding args and about
7187 the function being called. NAMED is nonzero if this argument is a
7188 named parameter (otherwise it is an extra parameter matching an
7192 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7193 const_tree type, bool named)
7195 enum machine_mode mode = omode;
7196 HOST_WIDE_INT bytes, words;
7199 if (mode == BLKmode)
7200 bytes = int_size_in_bytes (type);
7202 bytes = GET_MODE_SIZE (mode);
7203 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7205 /* To simplify the code below, represent vector types with a vector mode
7206 even if MMX/SSE are not active. */
7207 if (type && TREE_CODE (type) == VECTOR_TYPE)
7208 mode = type_natural_mode (type, cum);
7210 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7211 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7212 else if (TARGET_64BIT)
7213 arg = function_arg_64 (cum, mode, omode, type, named);
7215 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7217 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7219 /* This argument uses 256bit AVX modes. */
7221 cfun->machine->callee_pass_avx256_p = true;
7223 cfun->machine->caller_pass_avx256_p = true;
7229 /* A C expression that indicates when an argument must be passed by
7230 reference. If nonzero for an argument, a copy of that argument is
7231 made in memory and a pointer to the argument is passed instead of
7232 the argument itself. The pointer is passed in whatever way is
7233 appropriate for passing a pointer to that type. */
7236 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7237 enum machine_mode mode ATTRIBUTE_UNUSED,
7238 const_tree type, bool named ATTRIBUTE_UNUSED)
7240 /* See Windows x64 Software Convention. */
7241 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7243 int msize = (int) GET_MODE_SIZE (mode);
7246 /* Arrays are passed by reference. */
7247 if (TREE_CODE (type) == ARRAY_TYPE)
7250 if (AGGREGATE_TYPE_P (type))
7252 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7253 are passed by reference. */
7254 msize = int_size_in_bytes (type);
7258 /* __m128 is passed by reference. */
7260 case 1: case 2: case 4: case 8:
7266 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7272 /* Return true when TYPE should be 128bit aligned for 32bit argument
7273 passing ABI. XXX: This function is obsolete and is only used for
7274 checking psABI compatibility with previous versions of GCC. */
7277 ix86_compat_aligned_value_p (const_tree type)
7279 enum machine_mode mode = TYPE_MODE (type);
7280 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7284 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7286 if (TYPE_ALIGN (type) < 128)
7289 if (AGGREGATE_TYPE_P (type))
7291 /* Walk the aggregates recursively. */
7292 switch (TREE_CODE (type))
7296 case QUAL_UNION_TYPE:
7300 /* Walk all the structure fields. */
7301 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7303 if (TREE_CODE (field) == FIELD_DECL
7304 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7311 /* Just for use if some languages passes arrays by value. */
7312 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7323 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7324 XXX: This function is obsolete and is only used for checking psABI
7325 compatibility with previous versions of GCC. */
7328 ix86_compat_function_arg_boundary (enum machine_mode mode,
7329 const_tree type, unsigned int align)
7331 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7332 natural boundaries. */
7333 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7335 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7336 make an exception for SSE modes since these require 128bit
7339 The handling here differs from field_alignment. ICC aligns MMX
7340 arguments to 4 byte boundaries, while structure fields are aligned
7341 to 8 byte boundaries. */
7344 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7345 align = PARM_BOUNDARY;
7349 if (!ix86_compat_aligned_value_p (type))
7350 align = PARM_BOUNDARY;
7353 if (align > BIGGEST_ALIGNMENT)
7354 align = BIGGEST_ALIGNMENT;
7358 /* Return true when TYPE should be 128bit aligned for 32bit argument
7362 ix86_contains_aligned_value_p (const_tree type)
7364 enum machine_mode mode = TYPE_MODE (type);
7366 if (mode == XFmode || mode == XCmode)
7369 if (TYPE_ALIGN (type) < 128)
7372 if (AGGREGATE_TYPE_P (type))
7374 /* Walk the aggregates recursively. */
7375 switch (TREE_CODE (type))
7379 case QUAL_UNION_TYPE:
7383 /* Walk all the structure fields. */
7384 for (field = TYPE_FIELDS (type);
7386 field = DECL_CHAIN (field))
7388 if (TREE_CODE (field) == FIELD_DECL
7389 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7396 /* Just for use if some languages passes arrays by value. */
7397 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7406 return TYPE_ALIGN (type) >= 128;
7411 /* Gives the alignment boundary, in bits, of an argument with the
7412 specified mode and type. */
7415 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7420 /* Since the main variant type is used for call, we convert it to
7421 the main variant type. */
7422 type = TYPE_MAIN_VARIANT (type);
7423 align = TYPE_ALIGN (type);
7426 align = GET_MODE_ALIGNMENT (mode);
7427 if (align < PARM_BOUNDARY)
7428 align = PARM_BOUNDARY;
7432 unsigned int saved_align = align;
7436 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7439 if (mode == XFmode || mode == XCmode)
7440 align = PARM_BOUNDARY;
7442 else if (!ix86_contains_aligned_value_p (type))
7443 align = PARM_BOUNDARY;
7446 align = PARM_BOUNDARY;
7451 && align != ix86_compat_function_arg_boundary (mode, type,
7455 inform (input_location,
7456 "The ABI for passing parameters with %d-byte"
7457 " alignment has changed in GCC 4.6",
7458 align / BITS_PER_UNIT);
7465 /* Return true if N is a possible register number of function value. */
7468 ix86_function_value_regno_p (const unsigned int regno)
7475 case FIRST_FLOAT_REG:
7476 /* TODO: The function should depend on current function ABI but
7477 builtins.c would need updating then. Therefore we use the
7479 if (TARGET_64BIT && ix86_abi == MS_ABI)
7481 return TARGET_FLOAT_RETURNS_IN_80387;
7487 if (TARGET_MACHO || TARGET_64BIT)
7495 /* Define how to find the value returned by a function.
7496 VALTYPE is the data type of the value (as a tree).
7497 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7498 otherwise, FUNC is 0. */
7501 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7502 const_tree fntype, const_tree fn)
7506 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7507 we normally prevent this case when mmx is not available. However
7508 some ABIs may require the result to be returned like DImode. */
7509 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7510 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7512 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7513 we prevent this case when sse is not available. However some ABIs
7514 may require the result to be returned like integer TImode. */
7515 else if (mode == TImode
7516 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7517 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7519 /* 32-byte vector modes in %ymm0. */
7520 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7521 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7523 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7524 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7525 regno = FIRST_FLOAT_REG;
7527 /* Most things go in %eax. */
7530 /* Override FP return register with %xmm0 for local functions when
7531 SSE math is enabled or for functions with sseregparm attribute. */
7532 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7534 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7535 if ((sse_level >= 1 && mode == SFmode)
7536 || (sse_level == 2 && mode == DFmode))
7537 regno = FIRST_SSE_REG;
7540 /* OImode shouldn't be used directly. */
7541 gcc_assert (mode != OImode);
7543 return gen_rtx_REG (orig_mode, regno);
7547 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7552 /* Handle libcalls, which don't provide a type node. */
7553 if (valtype == NULL)
7565 return gen_rtx_REG (mode, FIRST_SSE_REG);
7568 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7572 return gen_rtx_REG (mode, AX_REG);
7576 ret = construct_container (mode, orig_mode, valtype, 1,
7577 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7578 x86_64_int_return_registers, 0);
7580 /* For zero sized structures, construct_container returns NULL, but we
7581 need to keep rest of compiler happy by returning meaningful value. */
7583 ret = gen_rtx_REG (orig_mode, AX_REG);
7589 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7591 unsigned int regno = AX_REG;
7595 switch (GET_MODE_SIZE (mode))
7598 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7599 && !COMPLEX_MODE_P (mode))
7600 regno = FIRST_SSE_REG;
7604 if (mode == SFmode || mode == DFmode)
7605 regno = FIRST_SSE_REG;
7611 return gen_rtx_REG (orig_mode, regno);
7615 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7616 enum machine_mode orig_mode, enum machine_mode mode)
7618 const_tree fn, fntype;
7621 if (fntype_or_decl && DECL_P (fntype_or_decl))
7622 fn = fntype_or_decl;
7623 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7625 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7626 return function_value_ms_64 (orig_mode, mode);
7627 else if (TARGET_64BIT)
7628 return function_value_64 (orig_mode, mode, valtype);
7630 return function_value_32 (orig_mode, mode, fntype, fn);
7634 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7635 bool outgoing ATTRIBUTE_UNUSED)
7637 enum machine_mode mode, orig_mode;
7639 orig_mode = TYPE_MODE (valtype);
7640 mode = type_natural_mode (valtype, NULL);
7641 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7645 ix86_libcall_value (enum machine_mode mode)
7647 return ix86_function_value_1 (NULL, NULL, mode, mode);
7650 /* Return true iff type is returned in memory. */
7652 static bool ATTRIBUTE_UNUSED
7653 return_in_memory_32 (const_tree type, enum machine_mode mode)
7657 if (mode == BLKmode)
7660 size = int_size_in_bytes (type);
7662 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7665 if (VECTOR_MODE_P (mode) || mode == TImode)
7667 /* User-created vectors small enough to fit in EAX. */
7671 /* MMX/3dNow values are returned in MM0,
7672 except when it doesn't exits or the ABI prescribes otherwise. */
7674 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7676 /* SSE values are returned in XMM0, except when it doesn't exist. */
7680 /* AVX values are returned in YMM0, except when it doesn't exist. */
7691 /* OImode shouldn't be used directly. */
7692 gcc_assert (mode != OImode);
7697 static bool ATTRIBUTE_UNUSED
7698 return_in_memory_64 (const_tree type, enum machine_mode mode)
7700 int needed_intregs, needed_sseregs;
7701 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7704 static bool ATTRIBUTE_UNUSED
7705 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7707 HOST_WIDE_INT size = int_size_in_bytes (type);
7709 /* __m128 is returned in xmm0. */
7710 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7711 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7714 /* Otherwise, the size must be exactly in [1248]. */
7715 return size != 1 && size != 2 && size != 4 && size != 8;
7719 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7721 #ifdef SUBTARGET_RETURN_IN_MEMORY
7722 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7724 const enum machine_mode mode = type_natural_mode (type, NULL);
7728 if (ix86_function_type_abi (fntype) == MS_ABI)
7729 return return_in_memory_ms_64 (type, mode);
7731 return return_in_memory_64 (type, mode);
7734 return return_in_memory_32 (type, mode);
7738 /* When returning SSE vector types, we have a choice of either
7739 (1) being abi incompatible with a -march switch, or
7740 (2) generating an error.
7741 Given no good solution, I think the safest thing is one warning.
7742 The user won't be able to use -Werror, but....
7744 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7745 called in response to actually generating a caller or callee that
7746 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7747 via aggregate_value_p for general type probing from tree-ssa. */
7750 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7752 static bool warnedsse, warnedmmx;
7754 if (!TARGET_64BIT && type)
7756 /* Look at the return type of the function, not the function type. */
7757 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7759 if (!TARGET_SSE && !warnedsse)
7762 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7765 warning (0, "SSE vector return without SSE enabled "
7770 if (!TARGET_MMX && !warnedmmx)
7772 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7775 warning (0, "MMX vector return without MMX enabled "
7785 /* Create the va_list data type. */
7787 /* Returns the calling convention specific va_list date type.
7788 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7791 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7793 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7795 /* For i386 we use plain pointer to argument area. */
7796 if (!TARGET_64BIT || abi == MS_ABI)
7797 return build_pointer_type (char_type_node);
7799 record = lang_hooks.types.make_type (RECORD_TYPE);
7800 type_decl = build_decl (BUILTINS_LOCATION,
7801 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7803 f_gpr = build_decl (BUILTINS_LOCATION,
7804 FIELD_DECL, get_identifier ("gp_offset"),
7805 unsigned_type_node);
7806 f_fpr = build_decl (BUILTINS_LOCATION,
7807 FIELD_DECL, get_identifier ("fp_offset"),
7808 unsigned_type_node);
7809 f_ovf = build_decl (BUILTINS_LOCATION,
7810 FIELD_DECL, get_identifier ("overflow_arg_area"),
7812 f_sav = build_decl (BUILTINS_LOCATION,
7813 FIELD_DECL, get_identifier ("reg_save_area"),
7816 va_list_gpr_counter_field = f_gpr;
7817 va_list_fpr_counter_field = f_fpr;
7819 DECL_FIELD_CONTEXT (f_gpr) = record;
7820 DECL_FIELD_CONTEXT (f_fpr) = record;
7821 DECL_FIELD_CONTEXT (f_ovf) = record;
7822 DECL_FIELD_CONTEXT (f_sav) = record;
7824 TYPE_STUB_DECL (record) = type_decl;
7825 TYPE_NAME (record) = type_decl;
7826 TYPE_FIELDS (record) = f_gpr;
7827 DECL_CHAIN (f_gpr) = f_fpr;
7828 DECL_CHAIN (f_fpr) = f_ovf;
7829 DECL_CHAIN (f_ovf) = f_sav;
7831 layout_type (record);
7833 /* The correct type is an array type of one element. */
7834 return build_array_type (record, build_index_type (size_zero_node));
7837 /* Setup the builtin va_list data type and for 64-bit the additional
7838 calling convention specific va_list data types. */
7841 ix86_build_builtin_va_list (void)
7843 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7845 /* Initialize abi specific va_list builtin types. */
7849 if (ix86_abi == MS_ABI)
7851 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7852 if (TREE_CODE (t) != RECORD_TYPE)
7853 t = build_variant_type_copy (t);
7854 sysv_va_list_type_node = t;
7859 if (TREE_CODE (t) != RECORD_TYPE)
7860 t = build_variant_type_copy (t);
7861 sysv_va_list_type_node = t;
7863 if (ix86_abi != MS_ABI)
7865 t = ix86_build_builtin_va_list_abi (MS_ABI);
7866 if (TREE_CODE (t) != RECORD_TYPE)
7867 t = build_variant_type_copy (t);
7868 ms_va_list_type_node = t;
7873 if (TREE_CODE (t) != RECORD_TYPE)
7874 t = build_variant_type_copy (t);
7875 ms_va_list_type_node = t;
7882 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7885 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7891 /* GPR size of varargs save area. */
7892 if (cfun->va_list_gpr_size)
7893 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7895 ix86_varargs_gpr_size = 0;
7897 /* FPR size of varargs save area. We don't need it if we don't pass
7898 anything in SSE registers. */
7899 if (TARGET_SSE && cfun->va_list_fpr_size)
7900 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7902 ix86_varargs_fpr_size = 0;
7904 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7907 save_area = frame_pointer_rtx;
7908 set = get_varargs_alias_set ();
7910 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7911 if (max > X86_64_REGPARM_MAX)
7912 max = X86_64_REGPARM_MAX;
7914 for (i = cum->regno; i < max; i++)
7916 mem = gen_rtx_MEM (Pmode,
7917 plus_constant (save_area, i * UNITS_PER_WORD));
7918 MEM_NOTRAP_P (mem) = 1;
7919 set_mem_alias_set (mem, set);
7920 emit_move_insn (mem, gen_rtx_REG (Pmode,
7921 x86_64_int_parameter_registers[i]));
7924 if (ix86_varargs_fpr_size)
7926 enum machine_mode smode;
7929 /* Now emit code to save SSE registers. The AX parameter contains number
7930 of SSE parameter registers used to call this function, though all we
7931 actually check here is the zero/non-zero status. */
7933 label = gen_label_rtx ();
7934 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7935 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7938 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7939 we used movdqa (i.e. TImode) instead? Perhaps even better would
7940 be if we could determine the real mode of the data, via a hook
7941 into pass_stdarg. Ignore all that for now. */
7943 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7944 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7946 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7947 if (max > X86_64_SSE_REGPARM_MAX)
7948 max = X86_64_SSE_REGPARM_MAX;
7950 for (i = cum->sse_regno; i < max; ++i)
7952 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7953 mem = gen_rtx_MEM (smode, mem);
7954 MEM_NOTRAP_P (mem) = 1;
7955 set_mem_alias_set (mem, set);
7956 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7958 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7966 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7968 alias_set_type set = get_varargs_alias_set ();
7971 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7975 mem = gen_rtx_MEM (Pmode,
7976 plus_constant (virtual_incoming_args_rtx,
7977 i * UNITS_PER_WORD));
7978 MEM_NOTRAP_P (mem) = 1;
7979 set_mem_alias_set (mem, set);
7981 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7982 emit_move_insn (mem, reg);
7987 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7988 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7991 CUMULATIVE_ARGS next_cum;
7994 /* This argument doesn't appear to be used anymore. Which is good,
7995 because the old code here didn't suppress rtl generation. */
7996 gcc_assert (!no_rtl);
8001 fntype = TREE_TYPE (current_function_decl);
8003 /* For varargs, we do not want to skip the dummy va_dcl argument.
8004 For stdargs, we do want to skip the last named argument. */
8006 if (stdarg_p (fntype))
8007 ix86_function_arg_advance (&next_cum, mode, type, true);
8009 if (cum->call_abi == MS_ABI)
8010 setup_incoming_varargs_ms_64 (&next_cum);
8012 setup_incoming_varargs_64 (&next_cum);
8015 /* Checks if TYPE is of kind va_list char *. */
8018 is_va_list_char_pointer (tree type)
8022 /* For 32-bit it is always true. */
8025 canonic = ix86_canonical_va_list_type (type);
8026 return (canonic == ms_va_list_type_node
8027 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8030 /* Implement va_start. */
8033 ix86_va_start (tree valist, rtx nextarg)
8035 HOST_WIDE_INT words, n_gpr, n_fpr;
8036 tree f_gpr, f_fpr, f_ovf, f_sav;
8037 tree gpr, fpr, ovf, sav, t;
8041 if (flag_split_stack
8042 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8044 unsigned int scratch_regno;
8046 /* When we are splitting the stack, we can't refer to the stack
8047 arguments using internal_arg_pointer, because they may be on
8048 the old stack. The split stack prologue will arrange to
8049 leave a pointer to the old stack arguments in a scratch
8050 register, which we here copy to a pseudo-register. The split
8051 stack prologue can't set the pseudo-register directly because
8052 it (the prologue) runs before any registers have been saved. */
8054 scratch_regno = split_stack_prologue_scratch_regno ();
8055 if (scratch_regno != INVALID_REGNUM)
8059 reg = gen_reg_rtx (Pmode);
8060 cfun->machine->split_stack_varargs_pointer = reg;
8063 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8067 push_topmost_sequence ();
8068 emit_insn_after (seq, entry_of_function ());
8069 pop_topmost_sequence ();
8073 /* Only 64bit target needs something special. */
8074 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8076 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8077 std_expand_builtin_va_start (valist, nextarg);
8082 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8083 next = expand_binop (ptr_mode, add_optab,
8084 cfun->machine->split_stack_varargs_pointer,
8085 crtl->args.arg_offset_rtx,
8086 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8087 convert_move (va_r, next, 0);
8092 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8093 f_fpr = DECL_CHAIN (f_gpr);
8094 f_ovf = DECL_CHAIN (f_fpr);
8095 f_sav = DECL_CHAIN (f_ovf);
8097 valist = build_simple_mem_ref (valist);
8098 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8099 /* The following should be folded into the MEM_REF offset. */
8100 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8102 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8104 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8106 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8109 /* Count number of gp and fp argument registers used. */
8110 words = crtl->args.info.words;
8111 n_gpr = crtl->args.info.regno;
8112 n_fpr = crtl->args.info.sse_regno;
8114 if (cfun->va_list_gpr_size)
8116 type = TREE_TYPE (gpr);
8117 t = build2 (MODIFY_EXPR, type,
8118 gpr, build_int_cst (type, n_gpr * 8));
8119 TREE_SIDE_EFFECTS (t) = 1;
8120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8123 if (TARGET_SSE && cfun->va_list_fpr_size)
8125 type = TREE_TYPE (fpr);
8126 t = build2 (MODIFY_EXPR, type, fpr,
8127 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8128 TREE_SIDE_EFFECTS (t) = 1;
8129 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8132 /* Find the overflow area. */
8133 type = TREE_TYPE (ovf);
8134 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8135 ovf_rtx = crtl->args.internal_arg_pointer;
8137 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8138 t = make_tree (type, ovf_rtx);
8140 t = build2 (POINTER_PLUS_EXPR, type, t,
8141 size_int (words * UNITS_PER_WORD));
8142 t = build2 (MODIFY_EXPR, type, ovf, t);
8143 TREE_SIDE_EFFECTS (t) = 1;
8144 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8146 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8148 /* Find the register save area.
8149 Prologue of the function save it right above stack frame. */
8150 type = TREE_TYPE (sav);
8151 t = make_tree (type, frame_pointer_rtx);
8152 if (!ix86_varargs_gpr_size)
8153 t = build2 (POINTER_PLUS_EXPR, type, t,
8154 size_int (-8 * X86_64_REGPARM_MAX));
8155 t = build2 (MODIFY_EXPR, type, sav, t);
8156 TREE_SIDE_EFFECTS (t) = 1;
8157 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8161 /* Implement va_arg. */
8164 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8167 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8168 tree f_gpr, f_fpr, f_ovf, f_sav;
8169 tree gpr, fpr, ovf, sav, t;
8171 tree lab_false, lab_over = NULL_TREE;
8176 enum machine_mode nat_mode;
8177 unsigned int arg_boundary;
8179 /* Only 64bit target needs something special. */
8180 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8181 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8183 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8184 f_fpr = DECL_CHAIN (f_gpr);
8185 f_ovf = DECL_CHAIN (f_fpr);
8186 f_sav = DECL_CHAIN (f_ovf);
8188 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8189 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8190 valist = build_va_arg_indirect_ref (valist);
8191 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8192 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8193 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8195 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8197 type = build_pointer_type (type);
8198 size = int_size_in_bytes (type);
8199 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8201 nat_mode = type_natural_mode (type, NULL);
8210 /* Unnamed 256bit vector mode parameters are passed on stack. */
8211 if (!TARGET_64BIT_MS_ABI)
8218 container = construct_container (nat_mode, TYPE_MODE (type),
8219 type, 0, X86_64_REGPARM_MAX,
8220 X86_64_SSE_REGPARM_MAX, intreg,
8225 /* Pull the value out of the saved registers. */
8227 addr = create_tmp_var (ptr_type_node, "addr");
8231 int needed_intregs, needed_sseregs;
8233 tree int_addr, sse_addr;
8235 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8236 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8238 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8240 need_temp = (!REG_P (container)
8241 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8242 || TYPE_ALIGN (type) > 128));
8244 /* In case we are passing structure, verify that it is consecutive block
8245 on the register save area. If not we need to do moves. */
8246 if (!need_temp && !REG_P (container))
8248 /* Verify that all registers are strictly consecutive */
8249 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8253 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8255 rtx slot = XVECEXP (container, 0, i);
8256 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8257 || INTVAL (XEXP (slot, 1)) != i * 16)
8265 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8267 rtx slot = XVECEXP (container, 0, i);
8268 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8269 || INTVAL (XEXP (slot, 1)) != i * 8)
8281 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8282 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8285 /* First ensure that we fit completely in registers. */
8288 t = build_int_cst (TREE_TYPE (gpr),
8289 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8290 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8291 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8292 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8293 gimplify_and_add (t, pre_p);
8297 t = build_int_cst (TREE_TYPE (fpr),
8298 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8299 + X86_64_REGPARM_MAX * 8);
8300 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8301 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8302 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8303 gimplify_and_add (t, pre_p);
8306 /* Compute index to start of area used for integer regs. */
8309 /* int_addr = gpr + sav; */
8310 t = fold_convert (sizetype, gpr);
8311 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8312 gimplify_assign (int_addr, t, pre_p);
8316 /* sse_addr = fpr + sav; */
8317 t = fold_convert (sizetype, fpr);
8318 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8319 gimplify_assign (sse_addr, t, pre_p);
8323 int i, prev_size = 0;
8324 tree temp = create_tmp_var (type, "va_arg_tmp");
8327 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8328 gimplify_assign (addr, t, pre_p);
8330 for (i = 0; i < XVECLEN (container, 0); i++)
8332 rtx slot = XVECEXP (container, 0, i);
8333 rtx reg = XEXP (slot, 0);
8334 enum machine_mode mode = GET_MODE (reg);
8340 tree dest_addr, dest;
8341 int cur_size = GET_MODE_SIZE (mode);
8343 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8344 prev_size = INTVAL (XEXP (slot, 1));
8345 if (prev_size + cur_size > size)
8347 cur_size = size - prev_size;
8348 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8349 if (mode == BLKmode)
8352 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8353 if (mode == GET_MODE (reg))
8354 addr_type = build_pointer_type (piece_type);
8356 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8358 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8361 if (SSE_REGNO_P (REGNO (reg)))
8363 src_addr = sse_addr;
8364 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8368 src_addr = int_addr;
8369 src_offset = REGNO (reg) * 8;
8371 src_addr = fold_convert (addr_type, src_addr);
8372 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8373 size_int (src_offset));
8375 dest_addr = fold_convert (daddr_type, addr);
8376 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8377 size_int (prev_size));
8378 if (cur_size == GET_MODE_SIZE (mode))
8380 src = build_va_arg_indirect_ref (src_addr);
8381 dest = build_va_arg_indirect_ref (dest_addr);
8383 gimplify_assign (dest, src, pre_p);
8388 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8389 3, dest_addr, src_addr,
8390 size_int (cur_size));
8391 gimplify_and_add (copy, pre_p);
8393 prev_size += cur_size;
8399 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8400 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8401 gimplify_assign (gpr, t, pre_p);
8406 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8407 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8408 gimplify_assign (fpr, t, pre_p);
8411 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8413 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8416 /* ... otherwise out of the overflow area. */
8418 /* When we align parameter on stack for caller, if the parameter
8419 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8420 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8421 here with caller. */
8422 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8423 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8424 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8426 /* Care for on-stack alignment if needed. */
8427 if (arg_boundary <= 64 || size == 0)
8431 HOST_WIDE_INT align = arg_boundary / 8;
8432 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8433 size_int (align - 1));
8434 t = fold_convert (sizetype, t);
8435 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8437 t = fold_convert (TREE_TYPE (ovf), t);
8440 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8441 gimplify_assign (addr, t, pre_p);
8443 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8444 size_int (rsize * UNITS_PER_WORD));
8445 gimplify_assign (unshare_expr (ovf), t, pre_p);
8448 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8450 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8451 addr = fold_convert (ptrtype, addr);
8454 addr = build_va_arg_indirect_ref (addr);
8455 return build_va_arg_indirect_ref (addr);
8458 /* Return true if OPNUM's MEM should be matched
8459 in movabs* patterns. */
8462 ix86_check_movabs (rtx insn, int opnum)
8466 set = PATTERN (insn);
8467 if (GET_CODE (set) == PARALLEL)
8468 set = XVECEXP (set, 0, 0);
8469 gcc_assert (GET_CODE (set) == SET);
8470 mem = XEXP (set, opnum);
8471 while (GET_CODE (mem) == SUBREG)
8472 mem = SUBREG_REG (mem);
8473 gcc_assert (MEM_P (mem));
8474 return volatile_ok || !MEM_VOLATILE_P (mem);
8477 /* Initialize the table of extra 80387 mathematical constants. */
8480 init_ext_80387_constants (void)
8482 static const char * cst[5] =
8484 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8485 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8486 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8487 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8488 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8492 for (i = 0; i < 5; i++)
8494 real_from_string (&ext_80387_constants_table[i], cst[i]);
8495 /* Ensure each constant is rounded to XFmode precision. */
8496 real_convert (&ext_80387_constants_table[i],
8497 XFmode, &ext_80387_constants_table[i]);
8500 ext_80387_constants_init = 1;
8503 /* Return non-zero if the constant is something that
8504 can be loaded with a special instruction. */
8507 standard_80387_constant_p (rtx x)
8509 enum machine_mode mode = GET_MODE (x);
8513 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8516 if (x == CONST0_RTX (mode))
8518 if (x == CONST1_RTX (mode))
8521 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8523 /* For XFmode constants, try to find a special 80387 instruction when
8524 optimizing for size or on those CPUs that benefit from them. */
8526 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8530 if (! ext_80387_constants_init)
8531 init_ext_80387_constants ();
8533 for (i = 0; i < 5; i++)
8534 if (real_identical (&r, &ext_80387_constants_table[i]))
8538 /* Load of the constant -0.0 or -1.0 will be split as
8539 fldz;fchs or fld1;fchs sequence. */
8540 if (real_isnegzero (&r))
8542 if (real_identical (&r, &dconstm1))
8548 /* Return the opcode of the special instruction to be used to load
8552 standard_80387_constant_opcode (rtx x)
8554 switch (standard_80387_constant_p (x))
8578 /* Return the CONST_DOUBLE representing the 80387 constant that is
8579 loaded by the specified special instruction. The argument IDX
8580 matches the return value from standard_80387_constant_p. */
8583 standard_80387_constant_rtx (int idx)
8587 if (! ext_80387_constants_init)
8588 init_ext_80387_constants ();
8604 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8608 /* Return 1 if X is all 0s and 2 if x is all 1s
8609 in supported SSE vector mode. */
8612 standard_sse_constant_p (rtx x)
8614 enum machine_mode mode = GET_MODE (x);
8616 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8618 if (vector_all_ones_operand (x, mode))
8634 /* Return the opcode of the special instruction to be used to load
8638 standard_sse_constant_opcode (rtx insn, rtx x)
8640 switch (standard_sse_constant_p (x))
8643 switch (get_attr_mode (insn))
8646 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8648 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8649 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8651 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8653 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8654 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8656 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8658 return "vxorps\t%x0, %x0, %x0";
8660 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8661 return "vxorps\t%x0, %x0, %x0";
8663 return "vxorpd\t%x0, %x0, %x0";
8665 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8666 return "vxorps\t%x0, %x0, %x0";
8668 return "vpxor\t%x0, %x0, %x0";
8673 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8680 /* Returns true if OP contains a symbol reference */
8683 symbolic_reference_mentioned_p (rtx op)
8688 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8691 fmt = GET_RTX_FORMAT (GET_CODE (op));
8692 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8698 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8699 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8703 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8710 /* Return true if it is appropriate to emit `ret' instructions in the
8711 body of a function. Do this only if the epilogue is simple, needing a
8712 couple of insns. Prior to reloading, we can't tell how many registers
8713 must be saved, so return false then. Return false if there is no frame
8714 marker to de-allocate. */
8717 ix86_can_use_return_insn_p (void)
8719 struct ix86_frame frame;
8721 if (! reload_completed || frame_pointer_needed)
8724 /* Don't allow more than 32k pop, since that's all we can do
8725 with one instruction. */
8726 if (crtl->args.pops_args && crtl->args.size >= 32768)
8729 ix86_compute_frame_layout (&frame);
8730 return (frame.stack_pointer_offset == UNITS_PER_WORD
8731 && (frame.nregs + frame.nsseregs) == 0);
8734 /* Value should be nonzero if functions must have frame pointers.
8735 Zero means the frame pointer need not be set up (and parms may
8736 be accessed via the stack pointer) in functions that seem suitable. */
8739 ix86_frame_pointer_required (void)
8741 /* If we accessed previous frames, then the generated code expects
8742 to be able to access the saved ebp value in our frame. */
8743 if (cfun->machine->accesses_prev_frame)
8746 /* Several x86 os'es need a frame pointer for other reasons,
8747 usually pertaining to setjmp. */
8748 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8751 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8752 turns off the frame pointer by default. Turn it back on now if
8753 we've not got a leaf function. */
8754 if (TARGET_OMIT_LEAF_FRAME_POINTER
8755 && (!current_function_is_leaf
8756 || ix86_current_function_calls_tls_descriptor))
8759 if (crtl->profile && !flag_fentry)
8765 /* Record that the current function accesses previous call frames. */
8768 ix86_setup_frame_addresses (void)
8770 cfun->machine->accesses_prev_frame = 1;
8773 #ifndef USE_HIDDEN_LINKONCE
8774 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8775 # define USE_HIDDEN_LINKONCE 1
8777 # define USE_HIDDEN_LINKONCE 0
8781 static int pic_labels_used;
8783 /* Fills in the label name that should be used for a pc thunk for
8784 the given register. */
8787 get_pc_thunk_name (char name[32], unsigned int regno)
8789 gcc_assert (!TARGET_64BIT);
8791 if (USE_HIDDEN_LINKONCE)
8792 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8794 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8798 /* This function generates code for -fpic that loads %ebx with
8799 the return address of the caller and then returns. */
8802 ix86_code_end (void)
8807 for (regno = AX_REG; regno <= SP_REG; regno++)
8812 if (!(pic_labels_used & (1 << regno)))
8815 get_pc_thunk_name (name, regno);
8817 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8818 get_identifier (name),
8819 build_function_type_list (void_type_node, NULL_TREE));
8820 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8821 NULL_TREE, void_type_node);
8822 TREE_PUBLIC (decl) = 1;
8823 TREE_STATIC (decl) = 1;
8828 switch_to_section (darwin_sections[text_coal_section]);
8829 fputs ("\t.weak_definition\t", asm_out_file);
8830 assemble_name (asm_out_file, name);
8831 fputs ("\n\t.private_extern\t", asm_out_file);
8832 assemble_name (asm_out_file, name);
8833 putc ('\n', asm_out_file);
8834 ASM_OUTPUT_LABEL (asm_out_file, name);
8835 DECL_WEAK (decl) = 1;
8839 if (USE_HIDDEN_LINKONCE)
8841 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8843 targetm.asm_out.unique_section (decl, 0);
8844 switch_to_section (get_named_section (decl, NULL, 0));
8846 targetm.asm_out.globalize_label (asm_out_file, name);
8847 fputs ("\t.hidden\t", asm_out_file);
8848 assemble_name (asm_out_file, name);
8849 putc ('\n', asm_out_file);
8850 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8854 switch_to_section (text_section);
8855 ASM_OUTPUT_LABEL (asm_out_file, name);
8858 DECL_INITIAL (decl) = make_node (BLOCK);
8859 current_function_decl = decl;
8860 init_function_start (decl);
8861 first_function_block_is_cold = false;
8862 /* Make sure unwind info is emitted for the thunk if needed. */
8863 final_start_function (emit_barrier (), asm_out_file, 1);
8865 /* Pad stack IP move with 4 instructions (two NOPs count
8866 as one instruction). */
8867 if (TARGET_PAD_SHORT_FUNCTION)
8872 fputs ("\tnop\n", asm_out_file);
8875 xops[0] = gen_rtx_REG (Pmode, regno);
8876 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8877 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8878 fputs ("\tret\n", asm_out_file);
8879 final_end_function ();
8880 init_insn_lengths ();
8881 free_after_compilation (cfun);
8883 current_function_decl = NULL;
8886 if (flag_split_stack)
8887 file_end_indicate_split_stack ();
8890 /* Emit code for the SET_GOT patterns. */
8893 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8899 if (TARGET_VXWORKS_RTP && flag_pic)
8901 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8902 xops[2] = gen_rtx_MEM (Pmode,
8903 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8904 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8906 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8907 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8908 an unadorned address. */
8909 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8910 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8911 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8915 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8917 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8919 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8922 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8925 output_asm_insn ("call\t%a2", xops);
8926 #ifdef DWARF2_UNWIND_INFO
8927 /* The call to next label acts as a push. */
8928 if (dwarf2out_do_frame ())
8932 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8933 gen_rtx_PLUS (Pmode,
8936 RTX_FRAME_RELATED_P (insn) = 1;
8937 dwarf2out_frame_debug (insn, true);
8944 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8945 is what will be referenced by the Mach-O PIC subsystem. */
8947 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8950 targetm.asm_out.internal_label (asm_out_file, "L",
8951 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8955 output_asm_insn ("pop%z0\t%0", xops);
8956 #ifdef DWARF2_UNWIND_INFO
8957 /* The pop is a pop and clobbers dest, but doesn't restore it
8958 for unwind info purposes. */
8959 if (dwarf2out_do_frame ())
8963 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8964 dwarf2out_frame_debug (insn, true);
8965 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8966 gen_rtx_PLUS (Pmode,
8969 RTX_FRAME_RELATED_P (insn) = 1;
8970 dwarf2out_frame_debug (insn, true);
8979 get_pc_thunk_name (name, REGNO (dest));
8980 pic_labels_used |= 1 << REGNO (dest);
8982 #ifdef DWARF2_UNWIND_INFO
8983 /* Ensure all queued register saves are flushed before the
8985 if (dwarf2out_do_frame ())
8986 dwarf2out_flush_queued_reg_saves ();
8988 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8989 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8990 output_asm_insn ("call\t%X2", xops);
8991 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8992 is what will be referenced by the Mach-O PIC subsystem. */
8995 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8997 targetm.asm_out.internal_label (asm_out_file, "L",
8998 CODE_LABEL_NUMBER (label));
9005 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
9006 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9008 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
9013 /* Generate an "push" pattern for input ARG. */
9018 struct machine_function *m = cfun->machine;
9020 if (m->fs.cfa_reg == stack_pointer_rtx)
9021 m->fs.cfa_offset += UNITS_PER_WORD;
9022 m->fs.sp_offset += UNITS_PER_WORD;
9024 return gen_rtx_SET (VOIDmode,
9026 gen_rtx_PRE_DEC (Pmode,
9027 stack_pointer_rtx)),
9031 /* Generate an "pop" pattern for input ARG. */
9036 return gen_rtx_SET (VOIDmode,
9039 gen_rtx_POST_INC (Pmode,
9040 stack_pointer_rtx)));
9043 /* Return >= 0 if there is an unused call-clobbered register available
9044 for the entire function. */
9047 ix86_select_alt_pic_regnum (void)
9049 if (current_function_is_leaf
9051 && !ix86_current_function_calls_tls_descriptor)
9054 /* Can't use the same register for both PIC and DRAP. */
9056 drap = REGNO (crtl->drap_reg);
9059 for (i = 2; i >= 0; --i)
9060 if (i != drap && !df_regs_ever_live_p (i))
9064 return INVALID_REGNUM;
9067 /* Return 1 if we need to save REGNO. */
9069 ix86_save_reg (unsigned int regno, int maybe_eh_return)
9071 if (pic_offset_table_rtx
9072 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9073 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9075 || crtl->calls_eh_return
9076 || crtl->uses_const_pool))
9078 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
9083 if (crtl->calls_eh_return && maybe_eh_return)
9088 unsigned test = EH_RETURN_DATA_REGNO (i);
9089 if (test == INVALID_REGNUM)
9096 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9099 return (df_regs_ever_live_p (regno)
9100 && !call_used_regs[regno]
9101 && !fixed_regs[regno]
9102 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9105 /* Return number of saved general prupose registers. */
9108 ix86_nsaved_regs (void)
9113 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9114 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9119 /* Return number of saved SSE registrers. */
9122 ix86_nsaved_sseregs (void)
9127 if (!TARGET_64BIT_MS_ABI)
9129 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9130 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9135 /* Given FROM and TO register numbers, say whether this elimination is
9136 allowed. If stack alignment is needed, we can only replace argument
9137 pointer with hard frame pointer, or replace frame pointer with stack
9138 pointer. Otherwise, frame pointer elimination is automatically
9139 handled and all other eliminations are valid. */
9142 ix86_can_eliminate (const int from, const int to)
9144 if (stack_realign_fp)
9145 return ((from == ARG_POINTER_REGNUM
9146 && to == HARD_FRAME_POINTER_REGNUM)
9147 || (from == FRAME_POINTER_REGNUM
9148 && to == STACK_POINTER_REGNUM));
9150 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9153 /* Return the offset between two registers, one to be eliminated, and the other
9154 its replacement, at the start of a routine. */
9157 ix86_initial_elimination_offset (int from, int to)
9159 struct ix86_frame frame;
9160 ix86_compute_frame_layout (&frame);
9162 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9163 return frame.hard_frame_pointer_offset;
9164 else if (from == FRAME_POINTER_REGNUM
9165 && to == HARD_FRAME_POINTER_REGNUM)
9166 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9169 gcc_assert (to == STACK_POINTER_REGNUM);
9171 if (from == ARG_POINTER_REGNUM)
9172 return frame.stack_pointer_offset;
9174 gcc_assert (from == FRAME_POINTER_REGNUM);
9175 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9179 /* In a dynamically-aligned function, we can't know the offset from
9180 stack pointer to frame pointer, so we must ensure that setjmp
9181 eliminates fp against the hard fp (%ebp) rather than trying to
9182 index from %esp up to the top of the frame across a gap that is
9183 of unknown (at compile-time) size. */
9185 ix86_builtin_setjmp_frame_value (void)
9187 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9190 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9191 field in the TCB, so they can not be used together. */
9194 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9195 struct gcc_options *opts ATTRIBUTE_UNUSED)
9199 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9201 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9204 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9207 error ("%<-fsplit-stack%> requires "
9208 "assembler support for CFI directives");
9216 /* When using -fsplit-stack, the allocation routines set a field in
9217 the TCB to the bottom of the stack plus this much space, measured
9220 #define SPLIT_STACK_AVAILABLE 256
9222 /* Fill structure ix86_frame about frame of currently computed function. */
9225 ix86_compute_frame_layout (struct ix86_frame *frame)
9227 unsigned int stack_alignment_needed;
9228 HOST_WIDE_INT offset;
9229 unsigned int preferred_alignment;
9230 HOST_WIDE_INT size = get_frame_size ();
9231 HOST_WIDE_INT to_allocate;
9233 frame->nregs = ix86_nsaved_regs ();
9234 frame->nsseregs = ix86_nsaved_sseregs ();
9236 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9237 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9239 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9240 function prologues and leaf. */
9241 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9242 && (!current_function_is_leaf || cfun->calls_alloca != 0
9243 || ix86_current_function_calls_tls_descriptor))
9245 preferred_alignment = 16;
9246 stack_alignment_needed = 16;
9247 crtl->preferred_stack_boundary = 128;
9248 crtl->stack_alignment_needed = 128;
9251 gcc_assert (!size || stack_alignment_needed);
9252 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9253 gcc_assert (preferred_alignment <= stack_alignment_needed);
9255 /* For SEH we have to limit the amount of code movement into the prologue.
9256 At present we do this via a BLOCKAGE, at which point there's very little
9257 scheduling that can be done, which means that there's very little point
9258 in doing anything except PUSHs. */
9260 cfun->machine->use_fast_prologue_epilogue = false;
9262 /* During reload iteration the amount of registers saved can change.
9263 Recompute the value as needed. Do not recompute when amount of registers
9264 didn't change as reload does multiple calls to the function and does not
9265 expect the decision to change within single iteration. */
9266 else if (!optimize_function_for_size_p (cfun)
9267 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9269 int count = frame->nregs;
9270 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9272 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9274 /* The fast prologue uses move instead of push to save registers. This
9275 is significantly longer, but also executes faster as modern hardware
9276 can execute the moves in parallel, but can't do that for push/pop.
9278 Be careful about choosing what prologue to emit: When function takes
9279 many instructions to execute we may use slow version as well as in
9280 case function is known to be outside hot spot (this is known with
9281 feedback only). Weight the size of function by number of registers
9282 to save as it is cheap to use one or two push instructions but very
9283 slow to use many of them. */
9285 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9286 if (node->frequency < NODE_FREQUENCY_NORMAL
9287 || (flag_branch_probabilities
9288 && node->frequency < NODE_FREQUENCY_HOT))
9289 cfun->machine->use_fast_prologue_epilogue = false;
9291 cfun->machine->use_fast_prologue_epilogue
9292 = !expensive_function_p (count);
9294 if (TARGET_PROLOGUE_USING_MOVE
9295 && cfun->machine->use_fast_prologue_epilogue)
9296 frame->save_regs_using_mov = true;
9298 frame->save_regs_using_mov = false;
9300 /* If static stack checking is enabled and done with probes, the registers
9301 need to be saved before allocating the frame. */
9302 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9303 frame->save_regs_using_mov = false;
9305 /* Skip return address. */
9306 offset = UNITS_PER_WORD;
9308 /* Skip pushed static chain. */
9309 if (ix86_static_chain_on_stack)
9310 offset += UNITS_PER_WORD;
9312 /* Skip saved base pointer. */
9313 if (frame_pointer_needed)
9314 offset += UNITS_PER_WORD;
9315 frame->hfp_save_offset = offset;
9317 /* The traditional frame pointer location is at the top of the frame. */
9318 frame->hard_frame_pointer_offset = offset;
9320 /* Register save area */
9321 offset += frame->nregs * UNITS_PER_WORD;
9322 frame->reg_save_offset = offset;
9324 /* Align and set SSE register save area. */
9325 if (frame->nsseregs)
9327 /* The only ABI that has saved SSE registers (Win64) also has a
9328 16-byte aligned default stack, and thus we don't need to be
9329 within the re-aligned local stack frame to save them. */
9330 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9331 offset = (offset + 16 - 1) & -16;
9332 offset += frame->nsseregs * 16;
9334 frame->sse_reg_save_offset = offset;
9336 /* The re-aligned stack starts here. Values before this point are not
9337 directly comparable with values below this point. In order to make
9338 sure that no value happens to be the same before and after, force
9339 the alignment computation below to add a non-zero value. */
9340 if (stack_realign_fp)
9341 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9344 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9345 offset += frame->va_arg_size;
9347 /* Align start of frame for local function. */
9348 if (stack_realign_fp
9349 || offset != frame->sse_reg_save_offset
9351 || !current_function_is_leaf
9352 || cfun->calls_alloca
9353 || ix86_current_function_calls_tls_descriptor)
9354 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9356 /* Frame pointer points here. */
9357 frame->frame_pointer_offset = offset;
9361 /* Add outgoing arguments area. Can be skipped if we eliminated
9362 all the function calls as dead code.
9363 Skipping is however impossible when function calls alloca. Alloca
9364 expander assumes that last crtl->outgoing_args_size
9365 of stack frame are unused. */
9366 if (ACCUMULATE_OUTGOING_ARGS
9367 && (!current_function_is_leaf || cfun->calls_alloca
9368 || ix86_current_function_calls_tls_descriptor))
9370 offset += crtl->outgoing_args_size;
9371 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9374 frame->outgoing_arguments_size = 0;
9376 /* Align stack boundary. Only needed if we're calling another function
9378 if (!current_function_is_leaf || cfun->calls_alloca
9379 || ix86_current_function_calls_tls_descriptor)
9380 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9382 /* We've reached end of stack frame. */
9383 frame->stack_pointer_offset = offset;
9385 /* Size prologue needs to allocate. */
9386 to_allocate = offset - frame->sse_reg_save_offset;
9388 if ((!to_allocate && frame->nregs <= 1)
9389 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9390 frame->save_regs_using_mov = false;
9392 if (ix86_using_red_zone ()
9393 && current_function_sp_is_unchanging
9394 && current_function_is_leaf
9395 && !ix86_current_function_calls_tls_descriptor)
9397 frame->red_zone_size = to_allocate;
9398 if (frame->save_regs_using_mov)
9399 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9400 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9401 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9404 frame->red_zone_size = 0;
9405 frame->stack_pointer_offset -= frame->red_zone_size;
9407 /* The SEH frame pointer location is near the bottom of the frame.
9408 This is enforced by the fact that the difference between the
9409 stack pointer and the frame pointer is limited to 240 bytes in
9410 the unwind data structure. */
9415 /* If we can leave the frame pointer where it is, do so. */
9416 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9417 if (diff > 240 || (diff & 15) != 0)
9419 /* Ideally we'd determine what portion of the local stack frame
9420 (within the constraint of the lowest 240) is most heavily used.
9421 But without that complication, simply bias the frame pointer
9422 by 128 bytes so as to maximize the amount of the local stack
9423 frame that is addressable with 8-bit offsets. */
9424 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9429 /* This is semi-inlined memory_address_length, but simplified
9430 since we know that we're always dealing with reg+offset, and
9431 to avoid having to create and discard all that rtl. */
9434 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9440 /* EBP and R13 cannot be encoded without an offset. */
9441 len = (regno == BP_REG || regno == R13_REG);
9443 else if (IN_RANGE (offset, -128, 127))
9446 /* ESP and R12 must be encoded with a SIB byte. */
9447 if (regno == SP_REG || regno == R12_REG)
9453 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9454 The valid base registers are taken from CFUN->MACHINE->FS. */
9457 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9459 const struct machine_function *m = cfun->machine;
9460 rtx base_reg = NULL;
9461 HOST_WIDE_INT base_offset = 0;
9463 if (m->use_fast_prologue_epilogue)
9465 /* Choose the base register most likely to allow the most scheduling
9466 opportunities. Generally FP is valid througout the function,
9467 while DRAP must be reloaded within the epilogue. But choose either
9468 over the SP due to increased encoding size. */
9472 base_reg = hard_frame_pointer_rtx;
9473 base_offset = m->fs.fp_offset - cfa_offset;
9475 else if (m->fs.drap_valid)
9477 base_reg = crtl->drap_reg;
9478 base_offset = 0 - cfa_offset;
9480 else if (m->fs.sp_valid)
9482 base_reg = stack_pointer_rtx;
9483 base_offset = m->fs.sp_offset - cfa_offset;
9488 HOST_WIDE_INT toffset;
9491 /* Choose the base register with the smallest address encoding.
9492 With a tie, choose FP > DRAP > SP. */
9495 base_reg = stack_pointer_rtx;
9496 base_offset = m->fs.sp_offset - cfa_offset;
9497 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9499 if (m->fs.drap_valid)
9501 toffset = 0 - cfa_offset;
9502 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9505 base_reg = crtl->drap_reg;
9506 base_offset = toffset;
9512 toffset = m->fs.fp_offset - cfa_offset;
9513 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9516 base_reg = hard_frame_pointer_rtx;
9517 base_offset = toffset;
9522 gcc_assert (base_reg != NULL);
9524 return plus_constant (base_reg, base_offset);
9527 /* Emit code to save registers in the prologue. */
9530 ix86_emit_save_regs (void)
9535 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9536 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9538 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9539 RTX_FRAME_RELATED_P (insn) = 1;
9543 /* Emit a single register save at CFA - CFA_OFFSET. */
9546 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9547 HOST_WIDE_INT cfa_offset)
9549 struct machine_function *m = cfun->machine;
9550 rtx reg = gen_rtx_REG (mode, regno);
9551 rtx mem, addr, base, insn;
9553 addr = choose_baseaddr (cfa_offset);
9554 mem = gen_frame_mem (mode, addr);
9556 /* For SSE saves, we need to indicate the 128-bit alignment. */
9557 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9559 insn = emit_move_insn (mem, reg);
9560 RTX_FRAME_RELATED_P (insn) = 1;
9563 if (GET_CODE (base) == PLUS)
9564 base = XEXP (base, 0);
9565 gcc_checking_assert (REG_P (base));
9567 /* When saving registers into a re-aligned local stack frame, avoid
9568 any tricky guessing by dwarf2out. */
9569 if (m->fs.realigned)
9571 gcc_checking_assert (stack_realign_drap);
9573 if (regno == REGNO (crtl->drap_reg))
9575 /* A bit of a hack. We force the DRAP register to be saved in
9576 the re-aligned stack frame, which provides us with a copy
9577 of the CFA that will last past the prologue. Install it. */
9578 gcc_checking_assert (cfun->machine->fs.fp_valid);
9579 addr = plus_constant (hard_frame_pointer_rtx,
9580 cfun->machine->fs.fp_offset - cfa_offset);
9581 mem = gen_rtx_MEM (mode, addr);
9582 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9586 /* The frame pointer is a stable reference within the
9587 aligned frame. Use it. */
9588 gcc_checking_assert (cfun->machine->fs.fp_valid);
9589 addr = plus_constant (hard_frame_pointer_rtx,
9590 cfun->machine->fs.fp_offset - cfa_offset);
9591 mem = gen_rtx_MEM (mode, addr);
9592 add_reg_note (insn, REG_CFA_EXPRESSION,
9593 gen_rtx_SET (VOIDmode, mem, reg));
9597 /* The memory may not be relative to the current CFA register,
9598 which means that we may need to generate a new pattern for
9599 use by the unwind info. */
9600 else if (base != m->fs.cfa_reg)
9602 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9603 mem = gen_rtx_MEM (mode, addr);
9604 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9608 /* Emit code to save registers using MOV insns.
9609 First register is stored at CFA - CFA_OFFSET. */
9611 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9615 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9616 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9618 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9619 cfa_offset -= UNITS_PER_WORD;
9623 /* Emit code to save SSE registers using MOV insns.
9624 First register is stored at CFA - CFA_OFFSET. */
9626 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9630 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9631 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9633 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9638 static GTY(()) rtx queued_cfa_restores;
9640 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9641 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9642 Don't add the note if the previously saved value will be left untouched
9643 within stack red-zone till return, as unwinders can find the same value
9644 in the register and on the stack. */
9647 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9649 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9654 add_reg_note (insn, REG_CFA_RESTORE, reg);
9655 RTX_FRAME_RELATED_P (insn) = 1;
9659 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9662 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9665 ix86_add_queued_cfa_restore_notes (rtx insn)
9668 if (!queued_cfa_restores)
9670 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9672 XEXP (last, 1) = REG_NOTES (insn);
9673 REG_NOTES (insn) = queued_cfa_restores;
9674 queued_cfa_restores = NULL_RTX;
9675 RTX_FRAME_RELATED_P (insn) = 1;
9678 /* Expand prologue or epilogue stack adjustment.
9679 The pattern exist to put a dependency on all ebp-based memory accesses.
9680 STYLE should be negative if instructions should be marked as frame related,
9681 zero if %r11 register is live and cannot be freely used and positive
9685 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9686 int style, bool set_cfa)
9688 struct machine_function *m = cfun->machine;
9690 bool add_frame_related_expr = false;
9693 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9694 else if (x86_64_immediate_operand (offset, DImode))
9695 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9699 /* r11 is used by indirect sibcall return as well, set before the
9700 epilogue and used after the epilogue. */
9702 tmp = gen_rtx_REG (DImode, R11_REG);
9705 gcc_assert (src != hard_frame_pointer_rtx
9706 && dest != hard_frame_pointer_rtx);
9707 tmp = hard_frame_pointer_rtx;
9709 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9711 add_frame_related_expr = true;
9713 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9716 insn = emit_insn (insn);
9718 ix86_add_queued_cfa_restore_notes (insn);
9724 gcc_assert (m->fs.cfa_reg == src);
9725 m->fs.cfa_offset += INTVAL (offset);
9726 m->fs.cfa_reg = dest;
9728 r = gen_rtx_PLUS (Pmode, src, offset);
9729 r = gen_rtx_SET (VOIDmode, dest, r);
9730 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9731 RTX_FRAME_RELATED_P (insn) = 1;
9735 RTX_FRAME_RELATED_P (insn) = 1;
9736 if (add_frame_related_expr)
9738 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9739 r = gen_rtx_SET (VOIDmode, dest, r);
9740 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9744 if (dest == stack_pointer_rtx)
9746 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9747 bool valid = m->fs.sp_valid;
9749 if (src == hard_frame_pointer_rtx)
9751 valid = m->fs.fp_valid;
9752 ooffset = m->fs.fp_offset;
9754 else if (src == crtl->drap_reg)
9756 valid = m->fs.drap_valid;
9761 /* Else there are two possibilities: SP itself, which we set
9762 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9763 taken care of this by hand along the eh_return path. */
9764 gcc_checking_assert (src == stack_pointer_rtx
9765 || offset == const0_rtx);
9768 m->fs.sp_offset = ooffset - INTVAL (offset);
9769 m->fs.sp_valid = valid;
9773 /* Find an available register to be used as dynamic realign argument
9774 pointer regsiter. Such a register will be written in prologue and
9775 used in begin of body, so it must not be
9776 1. parameter passing register.
9778 We reuse static-chain register if it is available. Otherwise, we
9779 use DI for i386 and R13 for x86-64. We chose R13 since it has
9782 Return: the regno of chosen register. */
9785 find_drap_reg (void)
9787 tree decl = cfun->decl;
9791 /* Use R13 for nested function or function need static chain.
9792 Since function with tail call may use any caller-saved
9793 registers in epilogue, DRAP must not use caller-saved
9794 register in such case. */
9795 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9802 /* Use DI for nested function or function need static chain.
9803 Since function with tail call may use any caller-saved
9804 registers in epilogue, DRAP must not use caller-saved
9805 register in such case. */
9806 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9809 /* Reuse static chain register if it isn't used for parameter
9811 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9813 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9814 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9821 /* Return minimum incoming stack alignment. */
9824 ix86_minimum_incoming_stack_boundary (bool sibcall)
9826 unsigned int incoming_stack_boundary;
9828 /* Prefer the one specified at command line. */
9829 if (ix86_user_incoming_stack_boundary)
9830 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9831 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9832 if -mstackrealign is used, it isn't used for sibcall check and
9833 estimated stack alignment is 128bit. */
9836 && ix86_force_align_arg_pointer
9837 && crtl->stack_alignment_estimated == 128)
9838 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9840 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9842 /* Incoming stack alignment can be changed on individual functions
9843 via force_align_arg_pointer attribute. We use the smallest
9844 incoming stack boundary. */
9845 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9846 && lookup_attribute (ix86_force_align_arg_pointer_string,
9847 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9848 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9850 /* The incoming stack frame has to be aligned at least at
9851 parm_stack_boundary. */
9852 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9853 incoming_stack_boundary = crtl->parm_stack_boundary;
9855 /* Stack at entrance of main is aligned by runtime. We use the
9856 smallest incoming stack boundary. */
9857 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9858 && DECL_NAME (current_function_decl)
9859 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9860 && DECL_FILE_SCOPE_P (current_function_decl))
9861 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9863 return incoming_stack_boundary;
9866 /* Update incoming stack boundary and estimated stack alignment. */
9869 ix86_update_stack_boundary (void)
9871 ix86_incoming_stack_boundary
9872 = ix86_minimum_incoming_stack_boundary (false);
9874 /* x86_64 vararg needs 16byte stack alignment for register save
9878 && crtl->stack_alignment_estimated < 128)
9879 crtl->stack_alignment_estimated = 128;
9882 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9883 needed or an rtx for DRAP otherwise. */
9886 ix86_get_drap_rtx (void)
9888 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9889 crtl->need_drap = true;
9891 if (stack_realign_drap)
9893 /* Assign DRAP to vDRAP and returns vDRAP */
9894 unsigned int regno = find_drap_reg ();
9899 arg_ptr = gen_rtx_REG (Pmode, regno);
9900 crtl->drap_reg = arg_ptr;
9903 drap_vreg = copy_to_reg (arg_ptr);
9907 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9910 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9911 RTX_FRAME_RELATED_P (insn) = 1;
9919 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9922 ix86_internal_arg_pointer (void)
9924 return virtual_incoming_args_rtx;
9927 struct scratch_reg {
9932 /* Return a short-lived scratch register for use on function entry.
9933 In 32-bit mode, it is valid only after the registers are saved
9934 in the prologue. This register must be released by means of
9935 release_scratch_register_on_entry once it is dead. */
9938 get_scratch_register_on_entry (struct scratch_reg *sr)
9946 /* We always use R11 in 64-bit mode. */
9951 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9953 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9954 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9955 int regparm = ix86_function_regparm (fntype, decl);
9957 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9959 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9960 for the static chain register. */
9961 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9962 && drap_regno != AX_REG)
9964 else if (regparm < 2 && drap_regno != DX_REG)
9966 /* ecx is the static chain register. */
9967 else if (regparm < 3 && !fastcall_p && !static_chain_p
9968 && drap_regno != CX_REG)
9970 else if (ix86_save_reg (BX_REG, true))
9972 /* esi is the static chain register. */
9973 else if (!(regparm == 3 && static_chain_p)
9974 && ix86_save_reg (SI_REG, true))
9976 else if (ix86_save_reg (DI_REG, true))
9980 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9985 sr->reg = gen_rtx_REG (Pmode, regno);
9988 rtx insn = emit_insn (gen_push (sr->reg));
9989 RTX_FRAME_RELATED_P (insn) = 1;
9993 /* Release a scratch register obtained from the preceding function. */
9996 release_scratch_register_on_entry (struct scratch_reg *sr)
10000 rtx x, insn = emit_insn (gen_pop (sr->reg));
10002 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10003 RTX_FRAME_RELATED_P (insn) = 1;
10004 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10005 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10006 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10010 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10012 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10015 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10017 /* We skip the probe for the first interval + a small dope of 4 words and
10018 probe that many bytes past the specified size to maintain a protection
10019 area at the botton of the stack. */
10020 const int dope = 4 * UNITS_PER_WORD;
10021 rtx size_rtx = GEN_INT (size), last;
10023 /* See if we have a constant small number of probes to generate. If so,
10024 that's the easy case. The run-time loop is made up of 11 insns in the
10025 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10026 for n # of intervals. */
10027 if (size <= 5 * PROBE_INTERVAL)
10029 HOST_WIDE_INT i, adjust;
10030 bool first_probe = true;
10032 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10033 values of N from 1 until it exceeds SIZE. If only one probe is
10034 needed, this will not generate any code. Then adjust and probe
10035 to PROBE_INTERVAL + SIZE. */
10036 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10040 adjust = 2 * PROBE_INTERVAL + dope;
10041 first_probe = false;
10044 adjust = PROBE_INTERVAL;
10046 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10047 plus_constant (stack_pointer_rtx, -adjust)));
10048 emit_stack_probe (stack_pointer_rtx);
10052 adjust = size + PROBE_INTERVAL + dope;
10054 adjust = size + PROBE_INTERVAL - i;
10056 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10057 plus_constant (stack_pointer_rtx, -adjust)));
10058 emit_stack_probe (stack_pointer_rtx);
10060 /* Adjust back to account for the additional first interval. */
10061 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10062 plus_constant (stack_pointer_rtx,
10063 PROBE_INTERVAL + dope)));
10066 /* Otherwise, do the same as above, but in a loop. Note that we must be
10067 extra careful with variables wrapping around because we might be at
10068 the very top (or the very bottom) of the address space and we have
10069 to be able to handle this case properly; in particular, we use an
10070 equality test for the loop condition. */
10073 HOST_WIDE_INT rounded_size;
10074 struct scratch_reg sr;
10076 get_scratch_register_on_entry (&sr);
10079 /* Step 1: round SIZE to the previous multiple of the interval. */
10081 rounded_size = size & -PROBE_INTERVAL;
10084 /* Step 2: compute initial and final value of the loop counter. */
10086 /* SP = SP_0 + PROBE_INTERVAL. */
10087 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10088 plus_constant (stack_pointer_rtx,
10089 - (PROBE_INTERVAL + dope))));
10091 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10092 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10093 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10094 gen_rtx_PLUS (Pmode, sr.reg,
10095 stack_pointer_rtx)));
10098 /* Step 3: the loop
10100 while (SP != LAST_ADDR)
10102 SP = SP + PROBE_INTERVAL
10106 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10107 values of N from 1 until it is equal to ROUNDED_SIZE. */
10109 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10112 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10113 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10115 if (size != rounded_size)
10117 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10118 plus_constant (stack_pointer_rtx,
10119 rounded_size - size)));
10120 emit_stack_probe (stack_pointer_rtx);
10123 /* Adjust back to account for the additional first interval. */
10124 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10125 plus_constant (stack_pointer_rtx,
10126 PROBE_INTERVAL + dope)));
10128 release_scratch_register_on_entry (&sr);
10131 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10133 /* Even if the stack pointer isn't the CFA register, we need to correctly
10134 describe the adjustments made to it, in particular differentiate the
10135 frame-related ones from the frame-unrelated ones. */
10138 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10139 XVECEXP (expr, 0, 0)
10140 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10141 plus_constant (stack_pointer_rtx, -size));
10142 XVECEXP (expr, 0, 1)
10143 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10144 plus_constant (stack_pointer_rtx,
10145 PROBE_INTERVAL + dope + size));
10146 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10147 RTX_FRAME_RELATED_P (last) = 1;
10149 cfun->machine->fs.sp_offset += size;
10152 /* Make sure nothing is scheduled before we are done. */
10153 emit_insn (gen_blockage ());
10156 /* Adjust the stack pointer up to REG while probing it. */
10159 output_adjust_stack_and_probe (rtx reg)
10161 static int labelno = 0;
10162 char loop_lab[32], end_lab[32];
10165 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10166 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10168 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10170 /* Jump to END_LAB if SP == LAST_ADDR. */
10171 xops[0] = stack_pointer_rtx;
10173 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10174 fputs ("\tje\t", asm_out_file);
10175 assemble_name_raw (asm_out_file, end_lab);
10176 fputc ('\n', asm_out_file);
10178 /* SP = SP + PROBE_INTERVAL. */
10179 xops[1] = GEN_INT (PROBE_INTERVAL);
10180 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10183 xops[1] = const0_rtx;
10184 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10186 fprintf (asm_out_file, "\tjmp\t");
10187 assemble_name_raw (asm_out_file, loop_lab);
10188 fputc ('\n', asm_out_file);
10190 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10195 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10196 inclusive. These are offsets from the current stack pointer. */
10199 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10201 /* See if we have a constant small number of probes to generate. If so,
10202 that's the easy case. The run-time loop is made up of 7 insns in the
10203 generic case while the compile-time loop is made up of n insns for n #
10205 if (size <= 7 * PROBE_INTERVAL)
10209 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10210 it exceeds SIZE. If only one probe is needed, this will not
10211 generate any code. Then probe at FIRST + SIZE. */
10212 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10213 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10215 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10218 /* Otherwise, do the same as above, but in a loop. Note that we must be
10219 extra careful with variables wrapping around because we might be at
10220 the very top (or the very bottom) of the address space and we have
10221 to be able to handle this case properly; in particular, we use an
10222 equality test for the loop condition. */
10225 HOST_WIDE_INT rounded_size, last;
10226 struct scratch_reg sr;
10228 get_scratch_register_on_entry (&sr);
10231 /* Step 1: round SIZE to the previous multiple of the interval. */
10233 rounded_size = size & -PROBE_INTERVAL;
10236 /* Step 2: compute initial and final value of the loop counter. */
10238 /* TEST_OFFSET = FIRST. */
10239 emit_move_insn (sr.reg, GEN_INT (-first));
10241 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10242 last = first + rounded_size;
10245 /* Step 3: the loop
10247 while (TEST_ADDR != LAST_ADDR)
10249 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10253 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10254 until it is equal to ROUNDED_SIZE. */
10256 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10259 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10260 that SIZE is equal to ROUNDED_SIZE. */
10262 if (size != rounded_size)
10263 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10266 rounded_size - size));
10268 release_scratch_register_on_entry (&sr);
10271 /* Make sure nothing is scheduled before we are done. */
10272 emit_insn (gen_blockage ());
10275 /* Probe a range of stack addresses from REG to END, inclusive. These are
10276 offsets from the current stack pointer. */
10279 output_probe_stack_range (rtx reg, rtx end)
10281 static int labelno = 0;
10282 char loop_lab[32], end_lab[32];
10285 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10286 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10288 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10290 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10293 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10294 fputs ("\tje\t", asm_out_file);
10295 assemble_name_raw (asm_out_file, end_lab);
10296 fputc ('\n', asm_out_file);
10298 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10299 xops[1] = GEN_INT (PROBE_INTERVAL);
10300 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10302 /* Probe at TEST_ADDR. */
10303 xops[0] = stack_pointer_rtx;
10305 xops[2] = const0_rtx;
10306 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10308 fprintf (asm_out_file, "\tjmp\t");
10309 assemble_name_raw (asm_out_file, loop_lab);
10310 fputc ('\n', asm_out_file);
10312 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10317 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10318 to be generated in correct form. */
10320 ix86_finalize_stack_realign_flags (void)
10322 /* Check if stack realign is really needed after reload, and
10323 stores result in cfun */
10324 unsigned int incoming_stack_boundary
10325 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10326 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10327 unsigned int stack_realign = (incoming_stack_boundary
10328 < (current_function_is_leaf
10329 ? crtl->max_used_stack_slot_alignment
10330 : crtl->stack_alignment_needed));
10332 if (crtl->stack_realign_finalized)
10334 /* After stack_realign_needed is finalized, we can't no longer
10336 gcc_assert (crtl->stack_realign_needed == stack_realign);
10340 crtl->stack_realign_needed = stack_realign;
10341 crtl->stack_realign_finalized = true;
10345 /* Expand the prologue into a bunch of separate insns. */
10348 ix86_expand_prologue (void)
10350 struct machine_function *m = cfun->machine;
10353 struct ix86_frame frame;
10354 HOST_WIDE_INT allocate;
10355 bool int_registers_saved;
10357 ix86_finalize_stack_realign_flags ();
10359 /* DRAP should not coexist with stack_realign_fp */
10360 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10362 memset (&m->fs, 0, sizeof (m->fs));
10364 /* Initialize CFA state for before the prologue. */
10365 m->fs.cfa_reg = stack_pointer_rtx;
10366 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10368 /* Track SP offset to the CFA. We continue tracking this after we've
10369 swapped the CFA register away from SP. In the case of re-alignment
10370 this is fudged; we're interested to offsets within the local frame. */
10371 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10372 m->fs.sp_valid = true;
10374 ix86_compute_frame_layout (&frame);
10376 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10378 /* We should have already generated an error for any use of
10379 ms_hook on a nested function. */
10380 gcc_checking_assert (!ix86_static_chain_on_stack);
10382 /* Check if profiling is active and we shall use profiling before
10383 prologue variant. If so sorry. */
10384 if (crtl->profile && flag_fentry != 0)
10385 sorry ("ms_hook_prologue attribute isn%'t compatible "
10386 "with -mfentry for 32-bit");
10388 /* In ix86_asm_output_function_label we emitted:
10389 8b ff movl.s %edi,%edi
10391 8b ec movl.s %esp,%ebp
10393 This matches the hookable function prologue in Win32 API
10394 functions in Microsoft Windows XP Service Pack 2 and newer.
10395 Wine uses this to enable Windows apps to hook the Win32 API
10396 functions provided by Wine.
10398 What that means is that we've already set up the frame pointer. */
10400 if (frame_pointer_needed
10401 && !(crtl->drap_reg && crtl->stack_realign_needed))
10405 /* We've decided to use the frame pointer already set up.
10406 Describe this to the unwinder by pretending that both
10407 push and mov insns happen right here.
10409 Putting the unwind info here at the end of the ms_hook
10410 is done so that we can make absolutely certain we get
10411 the required byte sequence at the start of the function,
10412 rather than relying on an assembler that can produce
10413 the exact encoding required.
10415 However it does mean (in the unpatched case) that we have
10416 a 1 insn window where the asynchronous unwind info is
10417 incorrect. However, if we placed the unwind info at
10418 its correct location we would have incorrect unwind info
10419 in the patched case. Which is probably all moot since
10420 I don't expect Wine generates dwarf2 unwind info for the
10421 system libraries that use this feature. */
10423 insn = emit_insn (gen_blockage ());
10425 push = gen_push (hard_frame_pointer_rtx);
10426 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10427 stack_pointer_rtx);
10428 RTX_FRAME_RELATED_P (push) = 1;
10429 RTX_FRAME_RELATED_P (mov) = 1;
10431 RTX_FRAME_RELATED_P (insn) = 1;
10432 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10433 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10435 /* Note that gen_push incremented m->fs.cfa_offset, even
10436 though we didn't emit the push insn here. */
10437 m->fs.cfa_reg = hard_frame_pointer_rtx;
10438 m->fs.fp_offset = m->fs.cfa_offset;
10439 m->fs.fp_valid = true;
10443 /* The frame pointer is not needed so pop %ebp again.
10444 This leaves us with a pristine state. */
10445 emit_insn (gen_pop (hard_frame_pointer_rtx));
10449 /* The first insn of a function that accepts its static chain on the
10450 stack is to push the register that would be filled in by a direct
10451 call. This insn will be skipped by the trampoline. */
10452 else if (ix86_static_chain_on_stack)
10454 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10455 emit_insn (gen_blockage ());
10457 /* We don't want to interpret this push insn as a register save,
10458 only as a stack adjustment. The real copy of the register as
10459 a save will be done later, if needed. */
10460 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10461 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10462 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10463 RTX_FRAME_RELATED_P (insn) = 1;
10466 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10467 of DRAP is needed and stack realignment is really needed after reload */
10468 if (stack_realign_drap)
10470 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10472 /* Only need to push parameter pointer reg if it is caller saved. */
10473 if (!call_used_regs[REGNO (crtl->drap_reg)])
10475 /* Push arg pointer reg */
10476 insn = emit_insn (gen_push (crtl->drap_reg));
10477 RTX_FRAME_RELATED_P (insn) = 1;
10480 /* Grab the argument pointer. */
10481 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10482 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10483 RTX_FRAME_RELATED_P (insn) = 1;
10484 m->fs.cfa_reg = crtl->drap_reg;
10485 m->fs.cfa_offset = 0;
10487 /* Align the stack. */
10488 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10490 GEN_INT (-align_bytes)));
10491 RTX_FRAME_RELATED_P (insn) = 1;
10493 /* Replicate the return address on the stack so that return
10494 address can be reached via (argp - 1) slot. This is needed
10495 to implement macro RETURN_ADDR_RTX and intrinsic function
10496 expand_builtin_return_addr etc. */
10497 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10498 t = gen_frame_mem (Pmode, t);
10499 insn = emit_insn (gen_push (t));
10500 RTX_FRAME_RELATED_P (insn) = 1;
10502 /* For the purposes of frame and register save area addressing,
10503 we've started over with a new frame. */
10504 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10505 m->fs.realigned = true;
10508 if (frame_pointer_needed && !m->fs.fp_valid)
10510 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10511 slower on all targets. Also sdb doesn't like it. */
10512 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10513 RTX_FRAME_RELATED_P (insn) = 1;
10515 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10517 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10518 RTX_FRAME_RELATED_P (insn) = 1;
10520 if (m->fs.cfa_reg == stack_pointer_rtx)
10521 m->fs.cfa_reg = hard_frame_pointer_rtx;
10522 m->fs.fp_offset = m->fs.sp_offset;
10523 m->fs.fp_valid = true;
10527 int_registers_saved = (frame.nregs == 0);
10529 if (!int_registers_saved)
10531 /* If saving registers via PUSH, do so now. */
10532 if (!frame.save_regs_using_mov)
10534 ix86_emit_save_regs ();
10535 int_registers_saved = true;
10536 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10539 /* When using red zone we may start register saving before allocating
10540 the stack frame saving one cycle of the prologue. However, avoid
10541 doing this if we have to probe the stack; at least on x86_64 the
10542 stack probe can turn into a call that clobbers a red zone location. */
10543 else if (ix86_using_red_zone ()
10544 && (! TARGET_STACK_PROBE
10545 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10547 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10548 int_registers_saved = true;
10552 if (stack_realign_fp)
10554 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10555 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10557 /* The computation of the size of the re-aligned stack frame means
10558 that we must allocate the size of the register save area before
10559 performing the actual alignment. Otherwise we cannot guarantee
10560 that there's enough storage above the realignment point. */
10561 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10562 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10563 GEN_INT (m->fs.sp_offset
10564 - frame.sse_reg_save_offset),
10567 /* Align the stack. */
10568 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10570 GEN_INT (-align_bytes)));
10572 /* For the purposes of register save area addressing, the stack
10573 pointer is no longer valid. As for the value of sp_offset,
10574 see ix86_compute_frame_layout, which we need to match in order
10575 to pass verification of stack_pointer_offset at the end. */
10576 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10577 m->fs.sp_valid = false;
10580 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10582 if (flag_stack_usage)
10584 /* We start to count from ARG_POINTER. */
10585 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10587 /* If it was realigned, take into account the fake frame. */
10588 if (stack_realign_drap)
10590 if (ix86_static_chain_on_stack)
10591 stack_size += UNITS_PER_WORD;
10593 if (!call_used_regs[REGNO (crtl->drap_reg)])
10594 stack_size += UNITS_PER_WORD;
10596 /* This over-estimates by 1 minimal-stack-alignment-unit but
10597 mitigates that by counting in the new return address slot. */
10598 current_function_dynamic_stack_size
10599 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10602 current_function_static_stack_size = stack_size;
10605 /* The stack has already been decremented by the instruction calling us
10606 so probe if the size is non-negative to preserve the protection area. */
10607 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10609 /* We expect the registers to be saved when probes are used. */
10610 gcc_assert (int_registers_saved);
10612 if (STACK_CHECK_MOVING_SP)
10614 ix86_adjust_stack_and_probe (allocate);
10619 HOST_WIDE_INT size = allocate;
10621 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10622 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10624 if (TARGET_STACK_PROBE)
10625 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10627 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10633 else if (!ix86_target_stack_probe ()
10634 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10636 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10637 GEN_INT (-allocate), -1,
10638 m->fs.cfa_reg == stack_pointer_rtx);
10642 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10644 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10646 bool eax_live = false;
10647 bool r10_live = false;
10650 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10651 if (!TARGET_64BIT_MS_ABI)
10652 eax_live = ix86_eax_live_at_start_p ();
10656 emit_insn (gen_push (eax));
10657 allocate -= UNITS_PER_WORD;
10661 r10 = gen_rtx_REG (Pmode, R10_REG);
10662 emit_insn (gen_push (r10));
10663 allocate -= UNITS_PER_WORD;
10666 emit_move_insn (eax, GEN_INT (allocate));
10667 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10669 /* Use the fact that AX still contains ALLOCATE. */
10670 adjust_stack_insn = (TARGET_64BIT
10671 ? gen_pro_epilogue_adjust_stack_di_sub
10672 : gen_pro_epilogue_adjust_stack_si_sub);
10674 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10675 stack_pointer_rtx, eax));
10677 /* Note that SEH directives need to continue tracking the stack
10678 pointer even after the frame pointer has been set up. */
10679 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10681 if (m->fs.cfa_reg == stack_pointer_rtx)
10682 m->fs.cfa_offset += allocate;
10684 RTX_FRAME_RELATED_P (insn) = 1;
10685 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10686 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10687 plus_constant (stack_pointer_rtx,
10690 m->fs.sp_offset += allocate;
10692 if (r10_live && eax_live)
10694 t = choose_baseaddr (m->fs.sp_offset - allocate);
10695 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10696 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10697 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10699 else if (eax_live || r10_live)
10701 t = choose_baseaddr (m->fs.sp_offset - allocate);
10702 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10705 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10707 /* If we havn't already set up the frame pointer, do so now. */
10708 if (frame_pointer_needed && !m->fs.fp_valid)
10710 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10711 GEN_INT (frame.stack_pointer_offset
10712 - frame.hard_frame_pointer_offset));
10713 insn = emit_insn (insn);
10714 RTX_FRAME_RELATED_P (insn) = 1;
10715 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10717 if (m->fs.cfa_reg == stack_pointer_rtx)
10718 m->fs.cfa_reg = hard_frame_pointer_rtx;
10719 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10720 m->fs.fp_valid = true;
10723 if (!int_registers_saved)
10724 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10725 if (frame.nsseregs)
10726 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10728 pic_reg_used = false;
10729 if (pic_offset_table_rtx
10730 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10733 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10735 if (alt_pic_reg_used != INVALID_REGNUM)
10736 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10738 pic_reg_used = true;
10745 if (ix86_cmodel == CM_LARGE_PIC)
10747 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10748 rtx label = gen_label_rtx ();
10749 emit_label (label);
10750 LABEL_PRESERVE_P (label) = 1;
10751 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10752 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10753 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10754 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10755 pic_offset_table_rtx, tmp_reg));
10758 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10761 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10764 /* In the pic_reg_used case, make sure that the got load isn't deleted
10765 when mcount needs it. Blockage to avoid call movement across mcount
10766 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10768 if (crtl->profile && !flag_fentry && pic_reg_used)
10769 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10771 if (crtl->drap_reg && !crtl->stack_realign_needed)
10773 /* vDRAP is setup but after reload it turns out stack realign
10774 isn't necessary, here we will emit prologue to setup DRAP
10775 without stack realign adjustment */
10776 t = choose_baseaddr (0);
10777 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10780 /* Prevent instructions from being scheduled into register save push
10781 sequence when access to the redzone area is done through frame pointer.
10782 The offset between the frame pointer and the stack pointer is calculated
10783 relative to the value of the stack pointer at the end of the function
10784 prologue, and moving instructions that access redzone area via frame
10785 pointer inside push sequence violates this assumption. */
10786 if (frame_pointer_needed && frame.red_zone_size)
10787 emit_insn (gen_memory_blockage ());
10789 /* Emit cld instruction if stringops are used in the function. */
10790 if (TARGET_CLD && ix86_current_function_needs_cld)
10791 emit_insn (gen_cld ());
10793 /* SEH requires that the prologue end within 256 bytes of the start of
10794 the function. Prevent instruction schedules that would extend that. */
10796 emit_insn (gen_blockage ());
10799 /* Emit code to restore REG using a POP insn. */
10802 ix86_emit_restore_reg_using_pop (rtx reg)
10804 struct machine_function *m = cfun->machine;
10805 rtx insn = emit_insn (gen_pop (reg));
10807 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10808 m->fs.sp_offset -= UNITS_PER_WORD;
10810 if (m->fs.cfa_reg == crtl->drap_reg
10811 && REGNO (reg) == REGNO (crtl->drap_reg))
10813 /* Previously we'd represented the CFA as an expression
10814 like *(%ebp - 8). We've just popped that value from
10815 the stack, which means we need to reset the CFA to
10816 the drap register. This will remain until we restore
10817 the stack pointer. */
10818 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10819 RTX_FRAME_RELATED_P (insn) = 1;
10821 /* This means that the DRAP register is valid for addressing too. */
10822 m->fs.drap_valid = true;
10826 if (m->fs.cfa_reg == stack_pointer_rtx)
10828 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10829 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10830 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10831 RTX_FRAME_RELATED_P (insn) = 1;
10833 m->fs.cfa_offset -= UNITS_PER_WORD;
10836 /* When the frame pointer is the CFA, and we pop it, we are
10837 swapping back to the stack pointer as the CFA. This happens
10838 for stack frames that don't allocate other data, so we assume
10839 the stack pointer is now pointing at the return address, i.e.
10840 the function entry state, which makes the offset be 1 word. */
10841 if (reg == hard_frame_pointer_rtx)
10843 m->fs.fp_valid = false;
10844 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10846 m->fs.cfa_reg = stack_pointer_rtx;
10847 m->fs.cfa_offset -= UNITS_PER_WORD;
10849 add_reg_note (insn, REG_CFA_DEF_CFA,
10850 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10851 GEN_INT (m->fs.cfa_offset)));
10852 RTX_FRAME_RELATED_P (insn) = 1;
10857 /* Emit code to restore saved registers using POP insns. */
10860 ix86_emit_restore_regs_using_pop (void)
10862 unsigned int regno;
10864 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10865 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10866 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10869 /* Emit code and notes for the LEAVE instruction. */
10872 ix86_emit_leave (void)
10874 struct machine_function *m = cfun->machine;
10875 rtx insn = emit_insn (ix86_gen_leave ());
10877 ix86_add_queued_cfa_restore_notes (insn);
10879 gcc_assert (m->fs.fp_valid);
10880 m->fs.sp_valid = true;
10881 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10882 m->fs.fp_valid = false;
10884 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10886 m->fs.cfa_reg = stack_pointer_rtx;
10887 m->fs.cfa_offset = m->fs.sp_offset;
10889 add_reg_note (insn, REG_CFA_DEF_CFA,
10890 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10891 RTX_FRAME_RELATED_P (insn) = 1;
10892 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10897 /* Emit code to restore saved registers using MOV insns.
10898 First register is restored from CFA - CFA_OFFSET. */
10900 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10901 int maybe_eh_return)
10903 struct machine_function *m = cfun->machine;
10904 unsigned int regno;
10906 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10907 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10909 rtx reg = gen_rtx_REG (Pmode, regno);
10912 mem = choose_baseaddr (cfa_offset);
10913 mem = gen_frame_mem (Pmode, mem);
10914 insn = emit_move_insn (reg, mem);
10916 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10918 /* Previously we'd represented the CFA as an expression
10919 like *(%ebp - 8). We've just popped that value from
10920 the stack, which means we need to reset the CFA to
10921 the drap register. This will remain until we restore
10922 the stack pointer. */
10923 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10924 RTX_FRAME_RELATED_P (insn) = 1;
10926 /* This means that the DRAP register is valid for addressing. */
10927 m->fs.drap_valid = true;
10930 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10932 cfa_offset -= UNITS_PER_WORD;
10936 /* Emit code to restore saved registers using MOV insns.
10937 First register is restored from CFA - CFA_OFFSET. */
10939 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10940 int maybe_eh_return)
10942 unsigned int regno;
10944 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10945 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10947 rtx reg = gen_rtx_REG (V4SFmode, regno);
10950 mem = choose_baseaddr (cfa_offset);
10951 mem = gen_rtx_MEM (V4SFmode, mem);
10952 set_mem_align (mem, 128);
10953 emit_move_insn (reg, mem);
10955 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10961 /* Restore function stack, frame, and registers. */
10964 ix86_expand_epilogue (int style)
10966 struct machine_function *m = cfun->machine;
10967 struct machine_frame_state frame_state_save = m->fs;
10968 struct ix86_frame frame;
10969 bool restore_regs_via_mov;
10972 ix86_finalize_stack_realign_flags ();
10973 ix86_compute_frame_layout (&frame);
10975 m->fs.sp_valid = (!frame_pointer_needed
10976 || (current_function_sp_is_unchanging
10977 && !stack_realign_fp));
10978 gcc_assert (!m->fs.sp_valid
10979 || m->fs.sp_offset == frame.stack_pointer_offset);
10981 /* The FP must be valid if the frame pointer is present. */
10982 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10983 gcc_assert (!m->fs.fp_valid
10984 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10986 /* We must have *some* valid pointer to the stack frame. */
10987 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10989 /* The DRAP is never valid at this point. */
10990 gcc_assert (!m->fs.drap_valid);
10992 /* See the comment about red zone and frame
10993 pointer usage in ix86_expand_prologue. */
10994 if (frame_pointer_needed && frame.red_zone_size)
10995 emit_insn (gen_memory_blockage ());
10997 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10998 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11000 /* Determine the CFA offset of the end of the red-zone. */
11001 m->fs.red_zone_offset = 0;
11002 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11004 /* The red-zone begins below the return address. */
11005 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11007 /* When the register save area is in the aligned portion of
11008 the stack, determine the maximum runtime displacement that
11009 matches up with the aligned frame. */
11010 if (stack_realign_drap)
11011 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11015 /* Special care must be taken for the normal return case of a function
11016 using eh_return: the eax and edx registers are marked as saved, but
11017 not restored along this path. Adjust the save location to match. */
11018 if (crtl->calls_eh_return && style != 2)
11019 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11021 /* EH_RETURN requires the use of moves to function properly. */
11022 if (crtl->calls_eh_return)
11023 restore_regs_via_mov = true;
11024 /* SEH requires the use of pops to identify the epilogue. */
11025 else if (TARGET_SEH)
11026 restore_regs_via_mov = false;
11027 /* If we're only restoring one register and sp is not valid then
11028 using a move instruction to restore the register since it's
11029 less work than reloading sp and popping the register. */
11030 else if (!m->fs.sp_valid && frame.nregs <= 1)
11031 restore_regs_via_mov = true;
11032 else if (TARGET_EPILOGUE_USING_MOVE
11033 && cfun->machine->use_fast_prologue_epilogue
11034 && (frame.nregs > 1
11035 || m->fs.sp_offset != frame.reg_save_offset))
11036 restore_regs_via_mov = true;
11037 else if (frame_pointer_needed
11039 && m->fs.sp_offset != frame.reg_save_offset)
11040 restore_regs_via_mov = true;
11041 else if (frame_pointer_needed
11042 && TARGET_USE_LEAVE
11043 && cfun->machine->use_fast_prologue_epilogue
11044 && frame.nregs == 1)
11045 restore_regs_via_mov = true;
11047 restore_regs_via_mov = false;
11049 if (restore_regs_via_mov || frame.nsseregs)
11051 /* Ensure that the entire register save area is addressable via
11052 the stack pointer, if we will restore via sp. */
11054 && m->fs.sp_offset > 0x7fffffff
11055 && !(m->fs.fp_valid || m->fs.drap_valid)
11056 && (frame.nsseregs + frame.nregs) != 0)
11058 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11059 GEN_INT (m->fs.sp_offset
11060 - frame.sse_reg_save_offset),
11062 m->fs.cfa_reg == stack_pointer_rtx);
11066 /* If there are any SSE registers to restore, then we have to do it
11067 via moves, since there's obviously no pop for SSE regs. */
11068 if (frame.nsseregs)
11069 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11072 if (restore_regs_via_mov)
11077 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11079 /* eh_return epilogues need %ecx added to the stack pointer. */
11082 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11084 /* Stack align doesn't work with eh_return. */
11085 gcc_assert (!stack_realign_drap);
11086 /* Neither does regparm nested functions. */
11087 gcc_assert (!ix86_static_chain_on_stack);
11089 if (frame_pointer_needed)
11091 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11092 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11093 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11095 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11096 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11098 /* Note that we use SA as a temporary CFA, as the return
11099 address is at the proper place relative to it. We
11100 pretend this happens at the FP restore insn because
11101 prior to this insn the FP would be stored at the wrong
11102 offset relative to SA, and after this insn we have no
11103 other reasonable register to use for the CFA. We don't
11104 bother resetting the CFA to the SP for the duration of
11105 the return insn. */
11106 add_reg_note (insn, REG_CFA_DEF_CFA,
11107 plus_constant (sa, UNITS_PER_WORD));
11108 ix86_add_queued_cfa_restore_notes (insn);
11109 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11110 RTX_FRAME_RELATED_P (insn) = 1;
11112 m->fs.cfa_reg = sa;
11113 m->fs.cfa_offset = UNITS_PER_WORD;
11114 m->fs.fp_valid = false;
11116 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11117 const0_rtx, style, false);
11121 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11122 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11123 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11124 ix86_add_queued_cfa_restore_notes (insn);
11126 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11127 if (m->fs.cfa_offset != UNITS_PER_WORD)
11129 m->fs.cfa_offset = UNITS_PER_WORD;
11130 add_reg_note (insn, REG_CFA_DEF_CFA,
11131 plus_constant (stack_pointer_rtx,
11133 RTX_FRAME_RELATED_P (insn) = 1;
11136 m->fs.sp_offset = UNITS_PER_WORD;
11137 m->fs.sp_valid = true;
11142 /* SEH requires that the function end with (1) a stack adjustment
11143 if necessary, (2) a sequence of pops, and (3) a return or
11144 jump instruction. Prevent insns from the function body from
11145 being scheduled into this sequence. */
11148 /* Prevent a catch region from being adjacent to the standard
11149 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11150 several other flags that would be interesting to test are
11152 if (flag_non_call_exceptions)
11153 emit_insn (gen_nops (const1_rtx));
11155 emit_insn (gen_blockage ());
11158 /* First step is to deallocate the stack frame so that we can
11159 pop the registers. */
11160 if (!m->fs.sp_valid)
11162 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11163 GEN_INT (m->fs.fp_offset
11164 - frame.reg_save_offset),
11167 else if (m->fs.sp_offset != frame.reg_save_offset)
11169 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11170 GEN_INT (m->fs.sp_offset
11171 - frame.reg_save_offset),
11173 m->fs.cfa_reg == stack_pointer_rtx);
11176 ix86_emit_restore_regs_using_pop ();
11179 /* If we used a stack pointer and haven't already got rid of it,
11181 if (m->fs.fp_valid)
11183 /* If the stack pointer is valid and pointing at the frame
11184 pointer store address, then we only need a pop. */
11185 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11186 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11187 /* Leave results in shorter dependency chains on CPUs that are
11188 able to grok it fast. */
11189 else if (TARGET_USE_LEAVE
11190 || optimize_function_for_size_p (cfun)
11191 || !cfun->machine->use_fast_prologue_epilogue)
11192 ix86_emit_leave ();
11195 pro_epilogue_adjust_stack (stack_pointer_rtx,
11196 hard_frame_pointer_rtx,
11197 const0_rtx, style, !using_drap);
11198 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11204 int param_ptr_offset = UNITS_PER_WORD;
11207 gcc_assert (stack_realign_drap);
11209 if (ix86_static_chain_on_stack)
11210 param_ptr_offset += UNITS_PER_WORD;
11211 if (!call_used_regs[REGNO (crtl->drap_reg)])
11212 param_ptr_offset += UNITS_PER_WORD;
11214 insn = emit_insn (gen_rtx_SET
11215 (VOIDmode, stack_pointer_rtx,
11216 gen_rtx_PLUS (Pmode,
11218 GEN_INT (-param_ptr_offset))));
11219 m->fs.cfa_reg = stack_pointer_rtx;
11220 m->fs.cfa_offset = param_ptr_offset;
11221 m->fs.sp_offset = param_ptr_offset;
11222 m->fs.realigned = false;
11224 add_reg_note (insn, REG_CFA_DEF_CFA,
11225 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11226 GEN_INT (param_ptr_offset)));
11227 RTX_FRAME_RELATED_P (insn) = 1;
11229 if (!call_used_regs[REGNO (crtl->drap_reg)])
11230 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11233 /* At this point the stack pointer must be valid, and we must have
11234 restored all of the registers. We may not have deallocated the
11235 entire stack frame. We've delayed this until now because it may
11236 be possible to merge the local stack deallocation with the
11237 deallocation forced by ix86_static_chain_on_stack. */
11238 gcc_assert (m->fs.sp_valid);
11239 gcc_assert (!m->fs.fp_valid);
11240 gcc_assert (!m->fs.realigned);
11241 if (m->fs.sp_offset != UNITS_PER_WORD)
11243 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11244 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11248 /* Sibcall epilogues don't want a return instruction. */
11251 m->fs = frame_state_save;
11255 /* Emit vzeroupper if needed. */
11256 if (TARGET_VZEROUPPER
11257 && !TREE_THIS_VOLATILE (cfun->decl)
11258 && !cfun->machine->caller_return_avx256_p)
11259 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11261 if (crtl->args.pops_args && crtl->args.size)
11263 rtx popc = GEN_INT (crtl->args.pops_args);
11265 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11266 address, do explicit add, and jump indirectly to the caller. */
11268 if (crtl->args.pops_args >= 65536)
11270 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11273 /* There is no "pascal" calling convention in any 64bit ABI. */
11274 gcc_assert (!TARGET_64BIT);
11276 insn = emit_insn (gen_pop (ecx));
11277 m->fs.cfa_offset -= UNITS_PER_WORD;
11278 m->fs.sp_offset -= UNITS_PER_WORD;
11280 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11281 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11282 add_reg_note (insn, REG_CFA_REGISTER,
11283 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11284 RTX_FRAME_RELATED_P (insn) = 1;
11286 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11288 emit_jump_insn (gen_return_indirect_internal (ecx));
11291 emit_jump_insn (gen_return_pop_internal (popc));
11294 emit_jump_insn (gen_return_internal ());
11296 /* Restore the state back to the state from the prologue,
11297 so that it's correct for the next epilogue. */
11298 m->fs = frame_state_save;
11301 /* Reset from the function's potential modifications. */
11304 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11305 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11307 if (pic_offset_table_rtx)
11308 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11310 /* Mach-O doesn't support labels at the end of objects, so if
11311 it looks like we might want one, insert a NOP. */
11313 rtx insn = get_last_insn ();
11316 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11317 insn = PREV_INSN (insn);
11321 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11322 fputs ("\tnop\n", file);
11328 /* Return a scratch register to use in the split stack prologue. The
11329 split stack prologue is used for -fsplit-stack. It is the first
11330 instructions in the function, even before the regular prologue.
11331 The scratch register can be any caller-saved register which is not
11332 used for parameters or for the static chain. */
11334 static unsigned int
11335 split_stack_prologue_scratch_regno (void)
11344 is_fastcall = (lookup_attribute ("fastcall",
11345 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11347 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11351 if (DECL_STATIC_CHAIN (cfun->decl))
11353 sorry ("-fsplit-stack does not support fastcall with "
11354 "nested function");
11355 return INVALID_REGNUM;
11359 else if (regparm < 3)
11361 if (!DECL_STATIC_CHAIN (cfun->decl))
11367 sorry ("-fsplit-stack does not support 2 register "
11368 " parameters for a nested function");
11369 return INVALID_REGNUM;
11376 /* FIXME: We could make this work by pushing a register
11377 around the addition and comparison. */
11378 sorry ("-fsplit-stack does not support 3 register parameters");
11379 return INVALID_REGNUM;
11384 /* A SYMBOL_REF for the function which allocates new stackspace for
11387 static GTY(()) rtx split_stack_fn;
11389 /* A SYMBOL_REF for the more stack function when using the large
11392 static GTY(()) rtx split_stack_fn_large;
11394 /* Handle -fsplit-stack. These are the first instructions in the
11395 function, even before the regular prologue. */
11398 ix86_expand_split_stack_prologue (void)
11400 struct ix86_frame frame;
11401 HOST_WIDE_INT allocate;
11402 unsigned HOST_WIDE_INT args_size;
11403 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11404 rtx scratch_reg = NULL_RTX;
11405 rtx varargs_label = NULL_RTX;
11408 gcc_assert (flag_split_stack && reload_completed);
11410 ix86_finalize_stack_realign_flags ();
11411 ix86_compute_frame_layout (&frame);
11412 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11414 /* This is the label we will branch to if we have enough stack
11415 space. We expect the basic block reordering pass to reverse this
11416 branch if optimizing, so that we branch in the unlikely case. */
11417 label = gen_label_rtx ();
11419 /* We need to compare the stack pointer minus the frame size with
11420 the stack boundary in the TCB. The stack boundary always gives
11421 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11422 can compare directly. Otherwise we need to do an addition. */
11424 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11425 UNSPEC_STACK_CHECK);
11426 limit = gen_rtx_CONST (Pmode, limit);
11427 limit = gen_rtx_MEM (Pmode, limit);
11428 if (allocate < SPLIT_STACK_AVAILABLE)
11429 current = stack_pointer_rtx;
11432 unsigned int scratch_regno;
11435 /* We need a scratch register to hold the stack pointer minus
11436 the required frame size. Since this is the very start of the
11437 function, the scratch register can be any caller-saved
11438 register which is not used for parameters. */
11439 offset = GEN_INT (- allocate);
11440 scratch_regno = split_stack_prologue_scratch_regno ();
11441 if (scratch_regno == INVALID_REGNUM)
11443 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11444 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11446 /* We don't use ix86_gen_add3 in this case because it will
11447 want to split to lea, but when not optimizing the insn
11448 will not be split after this point. */
11449 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11450 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11455 emit_move_insn (scratch_reg, offset);
11456 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11457 stack_pointer_rtx));
11459 current = scratch_reg;
11462 ix86_expand_branch (GEU, current, limit, label);
11463 jump_insn = get_last_insn ();
11464 JUMP_LABEL (jump_insn) = label;
11466 /* Mark the jump as very likely to be taken. */
11467 add_reg_note (jump_insn, REG_BR_PROB,
11468 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11470 if (split_stack_fn == NULL_RTX)
11471 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11472 fn = split_stack_fn;
11474 /* Get more stack space. We pass in the desired stack space and the
11475 size of the arguments to copy to the new stack. In 32-bit mode
11476 we push the parameters; __morestack will return on a new stack
11477 anyhow. In 64-bit mode we pass the parameters in r10 and
11479 allocate_rtx = GEN_INT (allocate);
11480 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11481 call_fusage = NULL_RTX;
11486 reg10 = gen_rtx_REG (Pmode, R10_REG);
11487 reg11 = gen_rtx_REG (Pmode, R11_REG);
11489 /* If this function uses a static chain, it will be in %r10.
11490 Preserve it across the call to __morestack. */
11491 if (DECL_STATIC_CHAIN (cfun->decl))
11495 rax = gen_rtx_REG (Pmode, AX_REG);
11496 emit_move_insn (rax, reg10);
11497 use_reg (&call_fusage, rax);
11500 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11502 HOST_WIDE_INT argval;
11504 /* When using the large model we need to load the address
11505 into a register, and we've run out of registers. So we
11506 switch to a different calling convention, and we call a
11507 different function: __morestack_large. We pass the
11508 argument size in the upper 32 bits of r10 and pass the
11509 frame size in the lower 32 bits. */
11510 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11511 gcc_assert ((args_size & 0xffffffff) == args_size);
11513 if (split_stack_fn_large == NULL_RTX)
11514 split_stack_fn_large =
11515 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11517 if (ix86_cmodel == CM_LARGE_PIC)
11521 label = gen_label_rtx ();
11522 emit_label (label);
11523 LABEL_PRESERVE_P (label) = 1;
11524 emit_insn (gen_set_rip_rex64 (reg10, label));
11525 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11526 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11527 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11529 x = gen_rtx_CONST (Pmode, x);
11530 emit_move_insn (reg11, x);
11531 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11532 x = gen_const_mem (Pmode, x);
11533 emit_move_insn (reg11, x);
11536 emit_move_insn (reg11, split_stack_fn_large);
11540 argval = ((args_size << 16) << 16) + allocate;
11541 emit_move_insn (reg10, GEN_INT (argval));
11545 emit_move_insn (reg10, allocate_rtx);
11546 emit_move_insn (reg11, GEN_INT (args_size));
11547 use_reg (&call_fusage, reg11);
11550 use_reg (&call_fusage, reg10);
11554 emit_insn (gen_push (GEN_INT (args_size)));
11555 emit_insn (gen_push (allocate_rtx));
11557 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11558 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11560 add_function_usage_to (call_insn, call_fusage);
11562 /* In order to make call/return prediction work right, we now need
11563 to execute a return instruction. See
11564 libgcc/config/i386/morestack.S for the details on how this works.
11566 For flow purposes gcc must not see this as a return
11567 instruction--we need control flow to continue at the subsequent
11568 label. Therefore, we use an unspec. */
11569 gcc_assert (crtl->args.pops_args < 65536);
11570 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11572 /* If we are in 64-bit mode and this function uses a static chain,
11573 we saved %r10 in %rax before calling _morestack. */
11574 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11575 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11576 gen_rtx_REG (Pmode, AX_REG));
11578 /* If this function calls va_start, we need to store a pointer to
11579 the arguments on the old stack, because they may not have been
11580 all copied to the new stack. At this point the old stack can be
11581 found at the frame pointer value used by __morestack, because
11582 __morestack has set that up before calling back to us. Here we
11583 store that pointer in a scratch register, and in
11584 ix86_expand_prologue we store the scratch register in a stack
11586 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11588 unsigned int scratch_regno;
11592 scratch_regno = split_stack_prologue_scratch_regno ();
11593 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11594 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11598 return address within this function
11599 return address of caller of this function
11601 So we add three words to get to the stack arguments.
11605 return address within this function
11606 first argument to __morestack
11607 second argument to __morestack
11608 return address of caller of this function
11610 So we add five words to get to the stack arguments.
11612 words = TARGET_64BIT ? 3 : 5;
11613 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11614 gen_rtx_PLUS (Pmode, frame_reg,
11615 GEN_INT (words * UNITS_PER_WORD))));
11617 varargs_label = gen_label_rtx ();
11618 emit_jump_insn (gen_jump (varargs_label));
11619 JUMP_LABEL (get_last_insn ()) = varargs_label;
11624 emit_label (label);
11625 LABEL_NUSES (label) = 1;
11627 /* If this function calls va_start, we now have to set the scratch
11628 register for the case where we do not call __morestack. In this
11629 case we need to set it based on the stack pointer. */
11630 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11632 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11633 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11634 GEN_INT (UNITS_PER_WORD))));
11636 emit_label (varargs_label);
11637 LABEL_NUSES (varargs_label) = 1;
11641 /* We may have to tell the dataflow pass that the split stack prologue
11642 is initializing a scratch register. */
11645 ix86_live_on_entry (bitmap regs)
11647 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11649 gcc_assert (flag_split_stack);
11650 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11654 /* Extract the parts of an RTL expression that is a valid memory address
11655 for an instruction. Return 0 if the structure of the address is
11656 grossly off. Return -1 if the address contains ASHIFT, so it is not
11657 strictly valid, but still used for computing length of lea instruction. */
11660 ix86_decompose_address (rtx addr, struct ix86_address *out)
11662 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11663 rtx base_reg, index_reg;
11664 HOST_WIDE_INT scale = 1;
11665 rtx scale_rtx = NULL_RTX;
11668 enum ix86_address_seg seg = SEG_DEFAULT;
11670 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11672 else if (GET_CODE (addr) == PLUS)
11674 rtx addends[4], op;
11682 addends[n++] = XEXP (op, 1);
11685 while (GET_CODE (op) == PLUS);
11690 for (i = n; i >= 0; --i)
11693 switch (GET_CODE (op))
11698 index = XEXP (op, 0);
11699 scale_rtx = XEXP (op, 1);
11705 index = XEXP (op, 0);
11706 tmp = XEXP (op, 1);
11707 if (!CONST_INT_P (tmp))
11709 scale = INTVAL (tmp);
11710 if ((unsigned HOST_WIDE_INT) scale > 3)
11712 scale = 1 << scale;
11716 if (XINT (op, 1) == UNSPEC_TP
11717 && TARGET_TLS_DIRECT_SEG_REFS
11718 && seg == SEG_DEFAULT)
11719 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11748 else if (GET_CODE (addr) == MULT)
11750 index = XEXP (addr, 0); /* index*scale */
11751 scale_rtx = XEXP (addr, 1);
11753 else if (GET_CODE (addr) == ASHIFT)
11755 /* We're called for lea too, which implements ashift on occasion. */
11756 index = XEXP (addr, 0);
11757 tmp = XEXP (addr, 1);
11758 if (!CONST_INT_P (tmp))
11760 scale = INTVAL (tmp);
11761 if ((unsigned HOST_WIDE_INT) scale > 3)
11763 scale = 1 << scale;
11767 disp = addr; /* displacement */
11769 /* Extract the integral value of scale. */
11772 if (!CONST_INT_P (scale_rtx))
11774 scale = INTVAL (scale_rtx);
11777 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11778 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11780 /* Avoid useless 0 displacement. */
11781 if (disp == const0_rtx && (base || index))
11784 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11785 if (base_reg && index_reg && scale == 1
11786 && (index_reg == arg_pointer_rtx
11787 || index_reg == frame_pointer_rtx
11788 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11791 tmp = base, base = index, index = tmp;
11792 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11795 /* Special case: %ebp cannot be encoded as a base without a displacement.
11799 && (base_reg == hard_frame_pointer_rtx
11800 || base_reg == frame_pointer_rtx
11801 || base_reg == arg_pointer_rtx
11802 || (REG_P (base_reg)
11803 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11804 || REGNO (base_reg) == R13_REG))))
11807 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11808 Avoid this by transforming to [%esi+0].
11809 Reload calls address legitimization without cfun defined, so we need
11810 to test cfun for being non-NULL. */
11811 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11812 && base_reg && !index_reg && !disp
11813 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11816 /* Special case: encode reg+reg instead of reg*2. */
11817 if (!base && index && scale == 2)
11818 base = index, base_reg = index_reg, scale = 1;
11820 /* Special case: scaling cannot be encoded without base or displacement. */
11821 if (!base && !disp && index && scale != 1)
11825 out->index = index;
11827 out->scale = scale;
11833 /* Return cost of the memory address x.
11834 For i386, it is better to use a complex address than let gcc copy
11835 the address into a reg and make a new pseudo. But not if the address
11836 requires to two regs - that would mean more pseudos with longer
11839 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11841 struct ix86_address parts;
11843 int ok = ix86_decompose_address (x, &parts);
11847 if (parts.base && GET_CODE (parts.base) == SUBREG)
11848 parts.base = SUBREG_REG (parts.base);
11849 if (parts.index && GET_CODE (parts.index) == SUBREG)
11850 parts.index = SUBREG_REG (parts.index);
11852 /* Attempt to minimize number of registers in the address. */
11854 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11856 && (!REG_P (parts.index)
11857 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11861 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11863 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11864 && parts.base != parts.index)
11867 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11868 since it's predecode logic can't detect the length of instructions
11869 and it degenerates to vector decoded. Increase cost of such
11870 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11871 to split such addresses or even refuse such addresses at all.
11873 Following addressing modes are affected:
11878 The first and last case may be avoidable by explicitly coding the zero in
11879 memory address, but I don't have AMD-K6 machine handy to check this
11883 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11884 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11885 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11891 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11892 this is used for to form addresses to local data when -fPIC is in
11896 darwin_local_data_pic (rtx disp)
11898 return (GET_CODE (disp) == UNSPEC
11899 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11902 /* Determine if a given RTX is a valid constant. We already know this
11903 satisfies CONSTANT_P. */
11906 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
11908 switch (GET_CODE (x))
11913 if (GET_CODE (x) == PLUS)
11915 if (!CONST_INT_P (XEXP (x, 1)))
11920 if (TARGET_MACHO && darwin_local_data_pic (x))
11923 /* Only some unspecs are valid as "constants". */
11924 if (GET_CODE (x) == UNSPEC)
11925 switch (XINT (x, 1))
11928 case UNSPEC_GOTOFF:
11929 case UNSPEC_PLTOFF:
11930 return TARGET_64BIT;
11932 case UNSPEC_NTPOFF:
11933 x = XVECEXP (x, 0, 0);
11934 return (GET_CODE (x) == SYMBOL_REF
11935 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11936 case UNSPEC_DTPOFF:
11937 x = XVECEXP (x, 0, 0);
11938 return (GET_CODE (x) == SYMBOL_REF
11939 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11944 /* We must have drilled down to a symbol. */
11945 if (GET_CODE (x) == LABEL_REF)
11947 if (GET_CODE (x) != SYMBOL_REF)
11952 /* TLS symbols are never valid. */
11953 if (SYMBOL_REF_TLS_MODEL (x))
11956 /* DLLIMPORT symbols are never valid. */
11957 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11958 && SYMBOL_REF_DLLIMPORT_P (x))
11962 /* mdynamic-no-pic */
11963 if (MACHO_DYNAMIC_NO_PIC_P)
11964 return machopic_symbol_defined_p (x);
11969 if (GET_MODE (x) == TImode
11970 && x != CONST0_RTX (TImode)
11976 if (!standard_sse_constant_p (x))
11983 /* Otherwise we handle everything else in the move patterns. */
11987 /* Determine if it's legal to put X into the constant pool. This
11988 is not possible for the address of thread-local symbols, which
11989 is checked above. */
11992 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
11994 /* We can always put integral constants and vectors in memory. */
11995 switch (GET_CODE (x))
12005 return !ix86_legitimate_constant_p (mode, x);
12009 /* Nonzero if the constant value X is a legitimate general operand
12010 when generating PIC code. It is given that flag_pic is on and
12011 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12014 legitimate_pic_operand_p (rtx x)
12018 switch (GET_CODE (x))
12021 inner = XEXP (x, 0);
12022 if (GET_CODE (inner) == PLUS
12023 && CONST_INT_P (XEXP (inner, 1)))
12024 inner = XEXP (inner, 0);
12026 /* Only some unspecs are valid as "constants". */
12027 if (GET_CODE (inner) == UNSPEC)
12028 switch (XINT (inner, 1))
12031 case UNSPEC_GOTOFF:
12032 case UNSPEC_PLTOFF:
12033 return TARGET_64BIT;
12035 x = XVECEXP (inner, 0, 0);
12036 return (GET_CODE (x) == SYMBOL_REF
12037 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12038 case UNSPEC_MACHOPIC_OFFSET:
12039 return legitimate_pic_address_disp_p (x);
12047 return legitimate_pic_address_disp_p (x);
12054 /* Determine if a given CONST RTX is a valid memory displacement
12058 legitimate_pic_address_disp_p (rtx disp)
12062 /* In 64bit mode we can allow direct addresses of symbols and labels
12063 when they are not dynamic symbols. */
12066 rtx op0 = disp, op1;
12068 switch (GET_CODE (disp))
12074 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12076 op0 = XEXP (XEXP (disp, 0), 0);
12077 op1 = XEXP (XEXP (disp, 0), 1);
12078 if (!CONST_INT_P (op1)
12079 || INTVAL (op1) >= 16*1024*1024
12080 || INTVAL (op1) < -16*1024*1024)
12082 if (GET_CODE (op0) == LABEL_REF)
12084 if (GET_CODE (op0) != SYMBOL_REF)
12089 /* TLS references should always be enclosed in UNSPEC. */
12090 if (SYMBOL_REF_TLS_MODEL (op0))
12092 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12093 && ix86_cmodel != CM_LARGE_PIC)
12101 if (GET_CODE (disp) != CONST)
12103 disp = XEXP (disp, 0);
12107 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12108 of GOT tables. We should not need these anyway. */
12109 if (GET_CODE (disp) != UNSPEC
12110 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12111 && XINT (disp, 1) != UNSPEC_GOTOFF
12112 && XINT (disp, 1) != UNSPEC_PCREL
12113 && XINT (disp, 1) != UNSPEC_PLTOFF))
12116 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12117 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12123 if (GET_CODE (disp) == PLUS)
12125 if (!CONST_INT_P (XEXP (disp, 1)))
12127 disp = XEXP (disp, 0);
12131 if (TARGET_MACHO && darwin_local_data_pic (disp))
12134 if (GET_CODE (disp) != UNSPEC)
12137 switch (XINT (disp, 1))
12142 /* We need to check for both symbols and labels because VxWorks loads
12143 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12145 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12146 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12147 case UNSPEC_GOTOFF:
12148 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12149 While ABI specify also 32bit relocation but we don't produce it in
12150 small PIC model at all. */
12151 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12152 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12154 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12156 case UNSPEC_GOTTPOFF:
12157 case UNSPEC_GOTNTPOFF:
12158 case UNSPEC_INDNTPOFF:
12161 disp = XVECEXP (disp, 0, 0);
12162 return (GET_CODE (disp) == SYMBOL_REF
12163 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12164 case UNSPEC_NTPOFF:
12165 disp = XVECEXP (disp, 0, 0);
12166 return (GET_CODE (disp) == SYMBOL_REF
12167 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12168 case UNSPEC_DTPOFF:
12169 disp = XVECEXP (disp, 0, 0);
12170 return (GET_CODE (disp) == SYMBOL_REF
12171 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12177 /* Recognizes RTL expressions that are valid memory addresses for an
12178 instruction. The MODE argument is the machine mode for the MEM
12179 expression that wants to use this address.
12181 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12182 convert common non-canonical forms to canonical form so that they will
12186 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12187 rtx addr, bool strict)
12189 struct ix86_address parts;
12190 rtx base, index, disp;
12191 HOST_WIDE_INT scale;
12193 if (ix86_decompose_address (addr, &parts) <= 0)
12194 /* Decomposition failed. */
12198 index = parts.index;
12200 scale = parts.scale;
12202 /* Validate base register.
12204 Don't allow SUBREG's that span more than a word here. It can lead to spill
12205 failures when the base is one word out of a two word structure, which is
12206 represented internally as a DImode int. */
12214 else if (GET_CODE (base) == SUBREG
12215 && REG_P (SUBREG_REG (base))
12216 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12218 reg = SUBREG_REG (base);
12220 /* Base is not a register. */
12223 if (GET_MODE (base) != Pmode)
12224 /* Base is not in Pmode. */
12227 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12228 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12229 /* Base is not valid. */
12233 /* Validate index register.
12235 Don't allow SUBREG's that span more than a word here -- same as above. */
12243 else if (GET_CODE (index) == SUBREG
12244 && REG_P (SUBREG_REG (index))
12245 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12247 reg = SUBREG_REG (index);
12249 /* Index is not a register. */
12252 if (GET_MODE (index) != Pmode)
12253 /* Index is not in Pmode. */
12256 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12257 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12258 /* Index is not valid. */
12262 /* Validate scale factor. */
12266 /* Scale without index. */
12269 if (scale != 2 && scale != 4 && scale != 8)
12270 /* Scale is not a valid multiplier. */
12274 /* Validate displacement. */
12277 if (GET_CODE (disp) == CONST
12278 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12279 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12280 switch (XINT (XEXP (disp, 0), 1))
12282 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12283 used. While ABI specify also 32bit relocations, we don't produce
12284 them at all and use IP relative instead. */
12286 case UNSPEC_GOTOFF:
12287 gcc_assert (flag_pic);
12289 goto is_legitimate_pic;
12291 /* 64bit address unspec. */
12294 case UNSPEC_GOTPCREL:
12296 gcc_assert (flag_pic);
12297 goto is_legitimate_pic;
12299 case UNSPEC_GOTTPOFF:
12300 case UNSPEC_GOTNTPOFF:
12301 case UNSPEC_INDNTPOFF:
12302 case UNSPEC_NTPOFF:
12303 case UNSPEC_DTPOFF:
12306 case UNSPEC_STACK_CHECK:
12307 gcc_assert (flag_split_stack);
12311 /* Invalid address unspec. */
12315 else if (SYMBOLIC_CONST (disp)
12319 && MACHOPIC_INDIRECT
12320 && !machopic_operand_p (disp)
12326 if (TARGET_64BIT && (index || base))
12328 /* foo@dtpoff(%rX) is ok. */
12329 if (GET_CODE (disp) != CONST
12330 || GET_CODE (XEXP (disp, 0)) != PLUS
12331 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12332 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12333 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12334 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12335 /* Non-constant pic memory reference. */
12338 else if ((!TARGET_MACHO || flag_pic)
12339 && ! legitimate_pic_address_disp_p (disp))
12340 /* Displacement is an invalid pic construct. */
12343 else if (MACHO_DYNAMIC_NO_PIC_P
12344 && !ix86_legitimate_constant_p (Pmode, disp))
12345 /* displacment must be referenced via non_lazy_pointer */
12349 /* This code used to verify that a symbolic pic displacement
12350 includes the pic_offset_table_rtx register.
12352 While this is good idea, unfortunately these constructs may
12353 be created by "adds using lea" optimization for incorrect
12362 This code is nonsensical, but results in addressing
12363 GOT table with pic_offset_table_rtx base. We can't
12364 just refuse it easily, since it gets matched by
12365 "addsi3" pattern, that later gets split to lea in the
12366 case output register differs from input. While this
12367 can be handled by separate addsi pattern for this case
12368 that never results in lea, this seems to be easier and
12369 correct fix for crash to disable this test. */
12371 else if (GET_CODE (disp) != LABEL_REF
12372 && !CONST_INT_P (disp)
12373 && (GET_CODE (disp) != CONST
12374 || !ix86_legitimate_constant_p (Pmode, disp))
12375 && (GET_CODE (disp) != SYMBOL_REF
12376 || !ix86_legitimate_constant_p (Pmode, disp)))
12377 /* Displacement is not constant. */
12379 else if (TARGET_64BIT
12380 && !x86_64_immediate_operand (disp, VOIDmode))
12381 /* Displacement is out of range. */
12385 /* Everything looks valid. */
12389 /* Determine if a given RTX is a valid constant address. */
12392 constant_address_p (rtx x)
12394 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12397 /* Return a unique alias set for the GOT. */
12399 static alias_set_type
12400 ix86_GOT_alias_set (void)
12402 static alias_set_type set = -1;
12404 set = new_alias_set ();
12408 /* Return a legitimate reference for ORIG (an address) using the
12409 register REG. If REG is 0, a new pseudo is generated.
12411 There are two types of references that must be handled:
12413 1. Global data references must load the address from the GOT, via
12414 the PIC reg. An insn is emitted to do this load, and the reg is
12417 2. Static data references, constant pool addresses, and code labels
12418 compute the address as an offset from the GOT, whose base is in
12419 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12420 differentiate them from global data objects. The returned
12421 address is the PIC reg + an unspec constant.
12423 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12424 reg also appears in the address. */
12427 legitimize_pic_address (rtx orig, rtx reg)
12430 rtx new_rtx = orig;
12434 if (TARGET_MACHO && !TARGET_64BIT)
12437 reg = gen_reg_rtx (Pmode);
12438 /* Use the generic Mach-O PIC machinery. */
12439 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12443 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12445 else if (TARGET_64BIT
12446 && ix86_cmodel != CM_SMALL_PIC
12447 && gotoff_operand (addr, Pmode))
12450 /* This symbol may be referenced via a displacement from the PIC
12451 base address (@GOTOFF). */
12453 if (reload_in_progress)
12454 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12455 if (GET_CODE (addr) == CONST)
12456 addr = XEXP (addr, 0);
12457 if (GET_CODE (addr) == PLUS)
12459 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12461 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12464 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12465 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12467 tmpreg = gen_reg_rtx (Pmode);
12470 emit_move_insn (tmpreg, new_rtx);
12474 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12475 tmpreg, 1, OPTAB_DIRECT);
12478 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12480 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12482 /* This symbol may be referenced via a displacement from the PIC
12483 base address (@GOTOFF). */
12485 if (reload_in_progress)
12486 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12487 if (GET_CODE (addr) == CONST)
12488 addr = XEXP (addr, 0);
12489 if (GET_CODE (addr) == PLUS)
12491 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12493 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12496 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12497 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12498 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12502 emit_move_insn (reg, new_rtx);
12506 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12507 /* We can't use @GOTOFF for text labels on VxWorks;
12508 see gotoff_operand. */
12509 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12511 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12513 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12514 return legitimize_dllimport_symbol (addr, true);
12515 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12516 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12517 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12519 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12520 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12524 /* For x64 PE-COFF there is no GOT table. So we use address
12526 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12528 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12529 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12532 reg = gen_reg_rtx (Pmode);
12533 emit_move_insn (reg, new_rtx);
12536 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12538 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12539 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12540 new_rtx = gen_const_mem (Pmode, new_rtx);
12541 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12544 reg = gen_reg_rtx (Pmode);
12545 /* Use directly gen_movsi, otherwise the address is loaded
12546 into register for CSE. We don't want to CSE this addresses,
12547 instead we CSE addresses from the GOT table, so skip this. */
12548 emit_insn (gen_movsi (reg, new_rtx));
12553 /* This symbol must be referenced via a load from the
12554 Global Offset Table (@GOT). */
12556 if (reload_in_progress)
12557 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12558 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12559 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12561 new_rtx = force_reg (Pmode, new_rtx);
12562 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12563 new_rtx = gen_const_mem (Pmode, new_rtx);
12564 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12567 reg = gen_reg_rtx (Pmode);
12568 emit_move_insn (reg, new_rtx);
12574 if (CONST_INT_P (addr)
12575 && !x86_64_immediate_operand (addr, VOIDmode))
12579 emit_move_insn (reg, addr);
12583 new_rtx = force_reg (Pmode, addr);
12585 else if (GET_CODE (addr) == CONST)
12587 addr = XEXP (addr, 0);
12589 /* We must match stuff we generate before. Assume the only
12590 unspecs that can get here are ours. Not that we could do
12591 anything with them anyway.... */
12592 if (GET_CODE (addr) == UNSPEC
12593 || (GET_CODE (addr) == PLUS
12594 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12596 gcc_assert (GET_CODE (addr) == PLUS);
12598 if (GET_CODE (addr) == PLUS)
12600 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12602 /* Check first to see if this is a constant offset from a @GOTOFF
12603 symbol reference. */
12604 if (gotoff_operand (op0, Pmode)
12605 && CONST_INT_P (op1))
12609 if (reload_in_progress)
12610 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12611 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12613 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12614 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12615 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12619 emit_move_insn (reg, new_rtx);
12625 if (INTVAL (op1) < -16*1024*1024
12626 || INTVAL (op1) >= 16*1024*1024)
12628 if (!x86_64_immediate_operand (op1, Pmode))
12629 op1 = force_reg (Pmode, op1);
12630 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12636 base = legitimize_pic_address (XEXP (addr, 0), reg);
12637 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12638 base == reg ? NULL_RTX : reg);
12640 if (CONST_INT_P (new_rtx))
12641 new_rtx = plus_constant (base, INTVAL (new_rtx));
12644 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12646 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12647 new_rtx = XEXP (new_rtx, 1);
12649 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12657 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12660 get_thread_pointer (int to_reg)
12664 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12668 reg = gen_reg_rtx (Pmode);
12669 insn = gen_rtx_SET (VOIDmode, reg, tp);
12670 insn = emit_insn (insn);
12675 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12676 false if we expect this to be used for a memory address and true if
12677 we expect to load the address into a register. */
12680 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12682 rtx dest, base, off, pic, tp;
12687 case TLS_MODEL_GLOBAL_DYNAMIC:
12688 dest = gen_reg_rtx (Pmode);
12689 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12691 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12693 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12696 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12697 insns = get_insns ();
12700 RTL_CONST_CALL_P (insns) = 1;
12701 emit_libcall_block (insns, dest, rax, x);
12703 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12704 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12706 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12708 if (TARGET_GNU2_TLS)
12710 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12712 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12716 case TLS_MODEL_LOCAL_DYNAMIC:
12717 base = gen_reg_rtx (Pmode);
12718 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12720 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12722 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12725 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12726 insns = get_insns ();
12729 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12730 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12731 RTL_CONST_CALL_P (insns) = 1;
12732 emit_libcall_block (insns, base, rax, note);
12734 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12735 emit_insn (gen_tls_local_dynamic_base_64 (base));
12737 emit_insn (gen_tls_local_dynamic_base_32 (base));
12739 if (TARGET_GNU2_TLS)
12741 rtx x = ix86_tls_module_base ();
12743 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12744 gen_rtx_MINUS (Pmode, x, tp));
12747 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12748 off = gen_rtx_CONST (Pmode, off);
12750 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12752 if (TARGET_GNU2_TLS)
12754 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12756 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12761 case TLS_MODEL_INITIAL_EXEC:
12764 if (TARGET_SUN_TLS)
12766 /* The Sun linker took the AMD64 TLS spec literally
12767 and can only handle %rax as destination of the
12768 initial executable code sequence. */
12770 dest = gen_reg_rtx (Pmode);
12771 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12776 type = UNSPEC_GOTNTPOFF;
12780 if (reload_in_progress)
12781 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12782 pic = pic_offset_table_rtx;
12783 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12785 else if (!TARGET_ANY_GNU_TLS)
12787 pic = gen_reg_rtx (Pmode);
12788 emit_insn (gen_set_got (pic));
12789 type = UNSPEC_GOTTPOFF;
12794 type = UNSPEC_INDNTPOFF;
12797 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12798 off = gen_rtx_CONST (Pmode, off);
12800 off = gen_rtx_PLUS (Pmode, pic, off);
12801 off = gen_const_mem (Pmode, off);
12802 set_mem_alias_set (off, ix86_GOT_alias_set ());
12804 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12806 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12807 off = force_reg (Pmode, off);
12808 return gen_rtx_PLUS (Pmode, base, off);
12812 base = get_thread_pointer (true);
12813 dest = gen_reg_rtx (Pmode);
12814 emit_insn (gen_subsi3 (dest, base, off));
12818 case TLS_MODEL_LOCAL_EXEC:
12819 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12820 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12821 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12822 off = gen_rtx_CONST (Pmode, off);
12824 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12826 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12827 return gen_rtx_PLUS (Pmode, base, off);
12831 base = get_thread_pointer (true);
12832 dest = gen_reg_rtx (Pmode);
12833 emit_insn (gen_subsi3 (dest, base, off));
12838 gcc_unreachable ();
12844 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12847 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12848 htab_t dllimport_map;
12851 get_dllimport_decl (tree decl)
12853 struct tree_map *h, in;
12856 const char *prefix;
12857 size_t namelen, prefixlen;
12862 if (!dllimport_map)
12863 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12865 in.hash = htab_hash_pointer (decl);
12866 in.base.from = decl;
12867 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12868 h = (struct tree_map *) *loc;
12872 *loc = h = ggc_alloc_tree_map ();
12874 h->base.from = decl;
12875 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12876 VAR_DECL, NULL, ptr_type_node);
12877 DECL_ARTIFICIAL (to) = 1;
12878 DECL_IGNORED_P (to) = 1;
12879 DECL_EXTERNAL (to) = 1;
12880 TREE_READONLY (to) = 1;
12882 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12883 name = targetm.strip_name_encoding (name);
12884 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12885 ? "*__imp_" : "*__imp__";
12886 namelen = strlen (name);
12887 prefixlen = strlen (prefix);
12888 imp_name = (char *) alloca (namelen + prefixlen + 1);
12889 memcpy (imp_name, prefix, prefixlen);
12890 memcpy (imp_name + prefixlen, name, namelen + 1);
12892 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12893 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12894 SET_SYMBOL_REF_DECL (rtl, to);
12895 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12897 rtl = gen_const_mem (Pmode, rtl);
12898 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12900 SET_DECL_RTL (to, rtl);
12901 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12906 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12907 true if we require the result be a register. */
12910 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12915 gcc_assert (SYMBOL_REF_DECL (symbol));
12916 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12918 x = DECL_RTL (imp_decl);
12920 x = force_reg (Pmode, x);
12924 /* Try machine-dependent ways of modifying an illegitimate address
12925 to be legitimate. If we find one, return the new, valid address.
12926 This macro is used in only one place: `memory_address' in explow.c.
12928 OLDX is the address as it was before break_out_memory_refs was called.
12929 In some cases it is useful to look at this to decide what needs to be done.
12931 It is always safe for this macro to do nothing. It exists to recognize
12932 opportunities to optimize the output.
12934 For the 80386, we handle X+REG by loading X into a register R and
12935 using R+REG. R will go in a general reg and indexing will be used.
12936 However, if REG is a broken-out memory address or multiplication,
12937 nothing needs to be done because REG can certainly go in a general reg.
12939 When -fpic is used, special handling is needed for symbolic references.
12940 See comments by legitimize_pic_address in i386.c for details. */
12943 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12944 enum machine_mode mode)
12949 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12951 return legitimize_tls_address (x, (enum tls_model) log, false);
12952 if (GET_CODE (x) == CONST
12953 && GET_CODE (XEXP (x, 0)) == PLUS
12954 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12955 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12957 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12958 (enum tls_model) log, false);
12959 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12962 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12964 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12965 return legitimize_dllimport_symbol (x, true);
12966 if (GET_CODE (x) == CONST
12967 && GET_CODE (XEXP (x, 0)) == PLUS
12968 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12969 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12971 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12972 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12976 if (flag_pic && SYMBOLIC_CONST (x))
12977 return legitimize_pic_address (x, 0);
12980 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12981 return machopic_indirect_data_reference (x, 0);
12984 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12985 if (GET_CODE (x) == ASHIFT
12986 && CONST_INT_P (XEXP (x, 1))
12987 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12990 log = INTVAL (XEXP (x, 1));
12991 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12992 GEN_INT (1 << log));
12995 if (GET_CODE (x) == PLUS)
12997 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12999 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13000 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13001 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13004 log = INTVAL (XEXP (XEXP (x, 0), 1));
13005 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13006 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13007 GEN_INT (1 << log));
13010 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13011 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13012 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13015 log = INTVAL (XEXP (XEXP (x, 1), 1));
13016 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13017 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13018 GEN_INT (1 << log));
13021 /* Put multiply first if it isn't already. */
13022 if (GET_CODE (XEXP (x, 1)) == MULT)
13024 rtx tmp = XEXP (x, 0);
13025 XEXP (x, 0) = XEXP (x, 1);
13030 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13031 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13032 created by virtual register instantiation, register elimination, and
13033 similar optimizations. */
13034 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13037 x = gen_rtx_PLUS (Pmode,
13038 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13039 XEXP (XEXP (x, 1), 0)),
13040 XEXP (XEXP (x, 1), 1));
13044 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13045 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13046 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13047 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13048 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13049 && CONSTANT_P (XEXP (x, 1)))
13052 rtx other = NULL_RTX;
13054 if (CONST_INT_P (XEXP (x, 1)))
13056 constant = XEXP (x, 1);
13057 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13059 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13061 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13062 other = XEXP (x, 1);
13070 x = gen_rtx_PLUS (Pmode,
13071 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13072 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13073 plus_constant (other, INTVAL (constant)));
13077 if (changed && ix86_legitimate_address_p (mode, x, false))
13080 if (GET_CODE (XEXP (x, 0)) == MULT)
13083 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13086 if (GET_CODE (XEXP (x, 1)) == MULT)
13089 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13093 && REG_P (XEXP (x, 1))
13094 && REG_P (XEXP (x, 0)))
13097 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13100 x = legitimize_pic_address (x, 0);
13103 if (changed && ix86_legitimate_address_p (mode, x, false))
13106 if (REG_P (XEXP (x, 0)))
13108 rtx temp = gen_reg_rtx (Pmode);
13109 rtx val = force_operand (XEXP (x, 1), temp);
13111 emit_move_insn (temp, val);
13113 XEXP (x, 1) = temp;
13117 else if (REG_P (XEXP (x, 1)))
13119 rtx temp = gen_reg_rtx (Pmode);
13120 rtx val = force_operand (XEXP (x, 0), temp);
13122 emit_move_insn (temp, val);
13124 XEXP (x, 0) = temp;
13132 /* Print an integer constant expression in assembler syntax. Addition
13133 and subtraction are the only arithmetic that may appear in these
13134 expressions. FILE is the stdio stream to write to, X is the rtx, and
13135 CODE is the operand print code from the output string. */
13138 output_pic_addr_const (FILE *file, rtx x, int code)
13142 switch (GET_CODE (x))
13145 gcc_assert (flag_pic);
13150 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13151 output_addr_const (file, x);
13154 const char *name = XSTR (x, 0);
13156 /* Mark the decl as referenced so that cgraph will
13157 output the function. */
13158 if (SYMBOL_REF_DECL (x))
13159 mark_decl_referenced (SYMBOL_REF_DECL (x));
13162 if (MACHOPIC_INDIRECT
13163 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13164 name = machopic_indirection_name (x, /*stub_p=*/true);
13166 assemble_name (file, name);
13168 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13169 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13170 fputs ("@PLT", file);
13177 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13178 assemble_name (asm_out_file, buf);
13182 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13186 /* This used to output parentheses around the expression,
13187 but that does not work on the 386 (either ATT or BSD assembler). */
13188 output_pic_addr_const (file, XEXP (x, 0), code);
13192 if (GET_MODE (x) == VOIDmode)
13194 /* We can use %d if the number is <32 bits and positive. */
13195 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13196 fprintf (file, "0x%lx%08lx",
13197 (unsigned long) CONST_DOUBLE_HIGH (x),
13198 (unsigned long) CONST_DOUBLE_LOW (x));
13200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13203 /* We can't handle floating point constants;
13204 TARGET_PRINT_OPERAND must handle them. */
13205 output_operand_lossage ("floating constant misused");
13209 /* Some assemblers need integer constants to appear first. */
13210 if (CONST_INT_P (XEXP (x, 0)))
13212 output_pic_addr_const (file, XEXP (x, 0), code);
13214 output_pic_addr_const (file, XEXP (x, 1), code);
13218 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13219 output_pic_addr_const (file, XEXP (x, 1), code);
13221 output_pic_addr_const (file, XEXP (x, 0), code);
13227 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13228 output_pic_addr_const (file, XEXP (x, 0), code);
13230 output_pic_addr_const (file, XEXP (x, 1), code);
13232 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13236 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13238 bool f = i386_asm_output_addr_const_extra (file, x);
13243 gcc_assert (XVECLEN (x, 0) == 1);
13244 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13245 switch (XINT (x, 1))
13248 fputs ("@GOT", file);
13250 case UNSPEC_GOTOFF:
13251 fputs ("@GOTOFF", file);
13253 case UNSPEC_PLTOFF:
13254 fputs ("@PLTOFF", file);
13257 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13258 "(%rip)" : "[rip]", file);
13260 case UNSPEC_GOTPCREL:
13261 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13262 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13264 case UNSPEC_GOTTPOFF:
13265 /* FIXME: This might be @TPOFF in Sun ld too. */
13266 fputs ("@gottpoff", file);
13269 fputs ("@tpoff", file);
13271 case UNSPEC_NTPOFF:
13273 fputs ("@tpoff", file);
13275 fputs ("@ntpoff", file);
13277 case UNSPEC_DTPOFF:
13278 fputs ("@dtpoff", file);
13280 case UNSPEC_GOTNTPOFF:
13282 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13283 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13285 fputs ("@gotntpoff", file);
13287 case UNSPEC_INDNTPOFF:
13288 fputs ("@indntpoff", file);
13291 case UNSPEC_MACHOPIC_OFFSET:
13293 machopic_output_function_base_name (file);
13297 output_operand_lossage ("invalid UNSPEC as operand");
13303 output_operand_lossage ("invalid expression as operand");
13307 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13308 We need to emit DTP-relative relocations. */
13310 static void ATTRIBUTE_UNUSED
13311 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13313 fputs (ASM_LONG, file);
13314 output_addr_const (file, x);
13315 fputs ("@dtpoff", file);
13321 fputs (", 0", file);
13324 gcc_unreachable ();
13328 /* Return true if X is a representation of the PIC register. This copes
13329 with calls from ix86_find_base_term, where the register might have
13330 been replaced by a cselib value. */
13333 ix86_pic_register_p (rtx x)
13335 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13336 return (pic_offset_table_rtx
13337 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13339 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13342 /* Helper function for ix86_delegitimize_address.
13343 Attempt to delegitimize TLS local-exec accesses. */
13346 ix86_delegitimize_tls_address (rtx orig_x)
13348 rtx x = orig_x, unspec;
13349 struct ix86_address addr;
13351 if (!TARGET_TLS_DIRECT_SEG_REFS)
13355 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13357 if (ix86_decompose_address (x, &addr) == 0
13358 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13359 || addr.disp == NULL_RTX
13360 || GET_CODE (addr.disp) != CONST)
13362 unspec = XEXP (addr.disp, 0);
13363 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13364 unspec = XEXP (unspec, 0);
13365 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13367 x = XVECEXP (unspec, 0, 0);
13368 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13369 if (unspec != XEXP (addr.disp, 0))
13370 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13373 rtx idx = addr.index;
13374 if (addr.scale != 1)
13375 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13376 x = gen_rtx_PLUS (Pmode, idx, x);
13379 x = gen_rtx_PLUS (Pmode, addr.base, x);
13380 if (MEM_P (orig_x))
13381 x = replace_equiv_address_nv (orig_x, x);
13385 /* In the name of slightly smaller debug output, and to cater to
13386 general assembler lossage, recognize PIC+GOTOFF and turn it back
13387 into a direct symbol reference.
13389 On Darwin, this is necessary to avoid a crash, because Darwin
13390 has a different PIC label for each routine but the DWARF debugging
13391 information is not associated with any particular routine, so it's
13392 necessary to remove references to the PIC label from RTL stored by
13393 the DWARF output code. */
13396 ix86_delegitimize_address (rtx x)
13398 rtx orig_x = delegitimize_mem_from_attrs (x);
13399 /* addend is NULL or some rtx if x is something+GOTOFF where
13400 something doesn't include the PIC register. */
13401 rtx addend = NULL_RTX;
13402 /* reg_addend is NULL or a multiple of some register. */
13403 rtx reg_addend = NULL_RTX;
13404 /* const_addend is NULL or a const_int. */
13405 rtx const_addend = NULL_RTX;
13406 /* This is the result, or NULL. */
13407 rtx result = NULL_RTX;
13416 if (GET_CODE (x) != CONST
13417 || GET_CODE (XEXP (x, 0)) != UNSPEC
13418 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13419 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13420 || !MEM_P (orig_x))
13421 return ix86_delegitimize_tls_address (orig_x);
13422 x = XVECEXP (XEXP (x, 0), 0, 0);
13423 if (GET_MODE (orig_x) != Pmode)
13425 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13432 if (GET_CODE (x) != PLUS
13433 || GET_CODE (XEXP (x, 1)) != CONST)
13434 return ix86_delegitimize_tls_address (orig_x);
13436 if (ix86_pic_register_p (XEXP (x, 0)))
13437 /* %ebx + GOT/GOTOFF */
13439 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13441 /* %ebx + %reg * scale + GOT/GOTOFF */
13442 reg_addend = XEXP (x, 0);
13443 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13444 reg_addend = XEXP (reg_addend, 1);
13445 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13446 reg_addend = XEXP (reg_addend, 0);
13449 reg_addend = NULL_RTX;
13450 addend = XEXP (x, 0);
13454 addend = XEXP (x, 0);
13456 x = XEXP (XEXP (x, 1), 0);
13457 if (GET_CODE (x) == PLUS
13458 && CONST_INT_P (XEXP (x, 1)))
13460 const_addend = XEXP (x, 1);
13464 if (GET_CODE (x) == UNSPEC
13465 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13466 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13467 result = XVECEXP (x, 0, 0);
13469 if (TARGET_MACHO && darwin_local_data_pic (x)
13470 && !MEM_P (orig_x))
13471 result = XVECEXP (x, 0, 0);
13474 return ix86_delegitimize_tls_address (orig_x);
13477 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13479 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13482 /* If the rest of original X doesn't involve the PIC register, add
13483 addend and subtract pic_offset_table_rtx. This can happen e.g.
13485 leal (%ebx, %ecx, 4), %ecx
13487 movl foo@GOTOFF(%ecx), %edx
13488 in which case we return (%ecx - %ebx) + foo. */
13489 if (pic_offset_table_rtx)
13490 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13491 pic_offset_table_rtx),
13496 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13498 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13499 if (result == NULL_RTX)
13505 /* If X is a machine specific address (i.e. a symbol or label being
13506 referenced as a displacement from the GOT implemented using an
13507 UNSPEC), then return the base term. Otherwise return X. */
13510 ix86_find_base_term (rtx x)
13516 if (GET_CODE (x) != CONST)
13518 term = XEXP (x, 0);
13519 if (GET_CODE (term) == PLUS
13520 && (CONST_INT_P (XEXP (term, 1))
13521 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13522 term = XEXP (term, 0);
13523 if (GET_CODE (term) != UNSPEC
13524 || (XINT (term, 1) != UNSPEC_GOTPCREL
13525 && XINT (term, 1) != UNSPEC_PCREL))
13528 return XVECEXP (term, 0, 0);
13531 return ix86_delegitimize_address (x);
13535 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13536 int fp, FILE *file)
13538 const char *suffix;
13540 if (mode == CCFPmode || mode == CCFPUmode)
13542 code = ix86_fp_compare_code_to_integer (code);
13546 code = reverse_condition (code);
13597 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13601 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13602 Those same assemblers have the same but opposite lossage on cmov. */
13603 if (mode == CCmode)
13604 suffix = fp ? "nbe" : "a";
13605 else if (mode == CCCmode)
13608 gcc_unreachable ();
13624 gcc_unreachable ();
13628 gcc_assert (mode == CCmode || mode == CCCmode);
13645 gcc_unreachable ();
13649 /* ??? As above. */
13650 gcc_assert (mode == CCmode || mode == CCCmode);
13651 suffix = fp ? "nb" : "ae";
13654 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13658 /* ??? As above. */
13659 if (mode == CCmode)
13661 else if (mode == CCCmode)
13662 suffix = fp ? "nb" : "ae";
13664 gcc_unreachable ();
13667 suffix = fp ? "u" : "p";
13670 suffix = fp ? "nu" : "np";
13673 gcc_unreachable ();
13675 fputs (suffix, file);
13678 /* Print the name of register X to FILE based on its machine mode and number.
13679 If CODE is 'w', pretend the mode is HImode.
13680 If CODE is 'b', pretend the mode is QImode.
13681 If CODE is 'k', pretend the mode is SImode.
13682 If CODE is 'q', pretend the mode is DImode.
13683 If CODE is 'x', pretend the mode is V4SFmode.
13684 If CODE is 't', pretend the mode is V8SFmode.
13685 If CODE is 'h', pretend the reg is the 'high' byte register.
13686 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13687 If CODE is 'd', duplicate the operand for AVX instruction.
13691 print_reg (rtx x, int code, FILE *file)
13694 bool duplicated = code == 'd' && TARGET_AVX;
13696 gcc_assert (x == pc_rtx
13697 || (REGNO (x) != ARG_POINTER_REGNUM
13698 && REGNO (x) != FRAME_POINTER_REGNUM
13699 && REGNO (x) != FLAGS_REG
13700 && REGNO (x) != FPSR_REG
13701 && REGNO (x) != FPCR_REG));
13703 if (ASSEMBLER_DIALECT == ASM_ATT)
13708 gcc_assert (TARGET_64BIT);
13709 fputs ("rip", file);
13713 if (code == 'w' || MMX_REG_P (x))
13715 else if (code == 'b')
13717 else if (code == 'k')
13719 else if (code == 'q')
13721 else if (code == 'y')
13723 else if (code == 'h')
13725 else if (code == 'x')
13727 else if (code == 't')
13730 code = GET_MODE_SIZE (GET_MODE (x));
13732 /* Irritatingly, AMD extended registers use different naming convention
13733 from the normal registers. */
13734 if (REX_INT_REG_P (x))
13736 gcc_assert (TARGET_64BIT);
13740 error ("extended registers have no high halves");
13743 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13746 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13749 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13752 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13755 error ("unsupported operand size for extended register");
13765 if (STACK_TOP_P (x))
13774 if (! ANY_FP_REG_P (x))
13775 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13780 reg = hi_reg_name[REGNO (x)];
13783 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13785 reg = qi_reg_name[REGNO (x)];
13788 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13790 reg = qi_high_reg_name[REGNO (x)];
13795 gcc_assert (!duplicated);
13797 fputs (hi_reg_name[REGNO (x)] + 1, file);
13802 gcc_unreachable ();
13808 if (ASSEMBLER_DIALECT == ASM_ATT)
13809 fprintf (file, ", %%%s", reg);
13811 fprintf (file, ", %s", reg);
13815 /* Locate some local-dynamic symbol still in use by this function
13816 so that we can print its name in some tls_local_dynamic_base
13820 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13824 if (GET_CODE (x) == SYMBOL_REF
13825 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13827 cfun->machine->some_ld_name = XSTR (x, 0);
13834 static const char *
13835 get_some_local_dynamic_name (void)
13839 if (cfun->machine->some_ld_name)
13840 return cfun->machine->some_ld_name;
13842 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13843 if (NONDEBUG_INSN_P (insn)
13844 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13845 return cfun->machine->some_ld_name;
13850 /* Meaning of CODE:
13851 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13852 C -- print opcode suffix for set/cmov insn.
13853 c -- like C, but print reversed condition
13854 F,f -- likewise, but for floating-point.
13855 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13857 R -- print the prefix for register names.
13858 z -- print the opcode suffix for the size of the current operand.
13859 Z -- likewise, with special suffixes for x87 instructions.
13860 * -- print a star (in certain assembler syntax)
13861 A -- print an absolute memory reference.
13862 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13863 s -- print a shift double count, followed by the assemblers argument
13865 b -- print the QImode name of the register for the indicated operand.
13866 %b0 would print %al if operands[0] is reg 0.
13867 w -- likewise, print the HImode name of the register.
13868 k -- likewise, print the SImode name of the register.
13869 q -- likewise, print the DImode name of the register.
13870 x -- likewise, print the V4SFmode name of the register.
13871 t -- likewise, print the V8SFmode name of the register.
13872 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13873 y -- print "st(0)" instead of "st" as a register.
13874 d -- print duplicated register operand for AVX instruction.
13875 D -- print condition for SSE cmp instruction.
13876 P -- if PIC, print an @PLT suffix.
13877 X -- don't print any sort of PIC '@' suffix for a symbol.
13878 & -- print some in-use local-dynamic symbol name.
13879 H -- print a memory address offset by 8; used for sse high-parts
13880 Y -- print condition for XOP pcom* instruction.
13881 + -- print a branch hint as 'cs' or 'ds' prefix
13882 ; -- print a semicolon (after prefixes due to bug in older gas).
13883 @ -- print a segment register of thread base pointer load
13887 ix86_print_operand (FILE *file, rtx x, int code)
13894 if (ASSEMBLER_DIALECT == ASM_ATT)
13900 const char *name = get_some_local_dynamic_name ();
13902 output_operand_lossage ("'%%&' used without any "
13903 "local dynamic TLS references");
13905 assemble_name (file, name);
13910 switch (ASSEMBLER_DIALECT)
13917 /* Intel syntax. For absolute addresses, registers should not
13918 be surrounded by braces. */
13922 ix86_print_operand (file, x, 0);
13929 gcc_unreachable ();
13932 ix86_print_operand (file, x, 0);
13937 if (ASSEMBLER_DIALECT == ASM_ATT)
13942 if (ASSEMBLER_DIALECT == ASM_ATT)
13947 if (ASSEMBLER_DIALECT == ASM_ATT)
13952 if (ASSEMBLER_DIALECT == ASM_ATT)
13957 if (ASSEMBLER_DIALECT == ASM_ATT)
13962 if (ASSEMBLER_DIALECT == ASM_ATT)
13967 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13969 /* Opcodes don't get size suffixes if using Intel opcodes. */
13970 if (ASSEMBLER_DIALECT == ASM_INTEL)
13973 switch (GET_MODE_SIZE (GET_MODE (x)))
13992 output_operand_lossage
13993 ("invalid operand size for operand code '%c'", code);
13998 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14000 (0, "non-integer operand used with operand code '%c'", code);
14004 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14005 if (ASSEMBLER_DIALECT == ASM_INTEL)
14008 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14010 switch (GET_MODE_SIZE (GET_MODE (x)))
14013 #ifdef HAVE_AS_IX86_FILDS
14023 #ifdef HAVE_AS_IX86_FILDQ
14026 fputs ("ll", file);
14034 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14036 /* 387 opcodes don't get size suffixes
14037 if the operands are registers. */
14038 if (STACK_REG_P (x))
14041 switch (GET_MODE_SIZE (GET_MODE (x)))
14062 output_operand_lossage
14063 ("invalid operand type used with operand code '%c'", code);
14067 output_operand_lossage
14068 ("invalid operand size for operand code '%c'", code);
14085 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14087 ix86_print_operand (file, x, 0);
14088 fputs (", ", file);
14093 /* Little bit of braindamage here. The SSE compare instructions
14094 does use completely different names for the comparisons that the
14095 fp conditional moves. */
14098 switch (GET_CODE (x))
14101 fputs ("eq", file);
14104 fputs ("eq_us", file);
14107 fputs ("lt", file);
14110 fputs ("nge", file);
14113 fputs ("le", file);
14116 fputs ("ngt", file);
14119 fputs ("unord", file);
14122 fputs ("neq", file);
14125 fputs ("neq_oq", file);
14128 fputs ("ge", file);
14131 fputs ("nlt", file);
14134 fputs ("gt", file);
14137 fputs ("nle", file);
14140 fputs ("ord", file);
14143 output_operand_lossage ("operand is not a condition code, "
14144 "invalid operand code 'D'");
14150 switch (GET_CODE (x))
14154 fputs ("eq", file);
14158 fputs ("lt", file);
14162 fputs ("le", file);
14165 fputs ("unord", file);
14169 fputs ("neq", file);
14173 fputs ("nlt", file);
14177 fputs ("nle", file);
14180 fputs ("ord", file);
14183 output_operand_lossage ("operand is not a condition code, "
14184 "invalid operand code 'D'");
14190 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14191 if (ASSEMBLER_DIALECT == ASM_ATT)
14193 switch (GET_MODE (x))
14195 case HImode: putc ('w', file); break;
14197 case SFmode: putc ('l', file); break;
14199 case DFmode: putc ('q', file); break;
14200 default: gcc_unreachable ();
14207 if (!COMPARISON_P (x))
14209 output_operand_lossage ("operand is neither a constant nor a "
14210 "condition code, invalid operand code "
14214 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14217 if (!COMPARISON_P (x))
14219 output_operand_lossage ("operand is neither a constant nor a "
14220 "condition code, invalid operand code "
14224 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14225 if (ASSEMBLER_DIALECT == ASM_ATT)
14228 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14231 /* Like above, but reverse condition */
14233 /* Check to see if argument to %c is really a constant
14234 and not a condition code which needs to be reversed. */
14235 if (!COMPARISON_P (x))
14237 output_operand_lossage ("operand is neither a constant nor a "
14238 "condition code, invalid operand "
14242 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14245 if (!COMPARISON_P (x))
14247 output_operand_lossage ("operand is neither a constant nor a "
14248 "condition code, invalid operand "
14252 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14253 if (ASSEMBLER_DIALECT == ASM_ATT)
14256 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14260 /* It doesn't actually matter what mode we use here, as we're
14261 only going to use this for printing. */
14262 x = adjust_address_nv (x, DImode, 8);
14270 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14273 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14276 int pred_val = INTVAL (XEXP (x, 0));
14278 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14279 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14281 int taken = pred_val > REG_BR_PROB_BASE / 2;
14282 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14284 /* Emit hints only in the case default branch prediction
14285 heuristics would fail. */
14286 if (taken != cputaken)
14288 /* We use 3e (DS) prefix for taken branches and
14289 2e (CS) prefix for not taken branches. */
14291 fputs ("ds ; ", file);
14293 fputs ("cs ; ", file);
14301 switch (GET_CODE (x))
14304 fputs ("neq", file);
14307 fputs ("eq", file);
14311 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14315 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14319 fputs ("le", file);
14323 fputs ("lt", file);
14326 fputs ("unord", file);
14329 fputs ("ord", file);
14332 fputs ("ueq", file);
14335 fputs ("nlt", file);
14338 fputs ("nle", file);
14341 fputs ("ule", file);
14344 fputs ("ult", file);
14347 fputs ("une", file);
14350 output_operand_lossage ("operand is not a condition code, "
14351 "invalid operand code 'Y'");
14357 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14363 if (ASSEMBLER_DIALECT == ASM_ATT)
14366 /* The kernel uses a different segment register for performance
14367 reasons; a system call would not have to trash the userspace
14368 segment register, which would be expensive. */
14369 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14370 fputs ("fs", file);
14372 fputs ("gs", file);
14376 output_operand_lossage ("invalid operand code '%c'", code);
14381 print_reg (x, code, file);
14383 else if (MEM_P (x))
14385 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14386 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14387 && GET_MODE (x) != BLKmode)
14390 switch (GET_MODE_SIZE (GET_MODE (x)))
14392 case 1: size = "BYTE"; break;
14393 case 2: size = "WORD"; break;
14394 case 4: size = "DWORD"; break;
14395 case 8: size = "QWORD"; break;
14396 case 12: size = "TBYTE"; break;
14398 if (GET_MODE (x) == XFmode)
14403 case 32: size = "YMMWORD"; break;
14405 gcc_unreachable ();
14408 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14411 else if (code == 'w')
14413 else if (code == 'k')
14416 fputs (size, file);
14417 fputs (" PTR ", file);
14421 /* Avoid (%rip) for call operands. */
14422 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14423 && !CONST_INT_P (x))
14424 output_addr_const (file, x);
14425 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14426 output_operand_lossage ("invalid constraints for operand");
14428 output_address (x);
14431 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14436 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14437 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14439 if (ASSEMBLER_DIALECT == ASM_ATT)
14441 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14443 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14445 fprintf (file, "0x%08x", (unsigned int) l);
14448 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14453 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14454 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14456 if (ASSEMBLER_DIALECT == ASM_ATT)
14458 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14461 /* These float cases don't actually occur as immediate operands. */
14462 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14466 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14467 fputs (dstr, file);
14472 /* We have patterns that allow zero sets of memory, for instance.
14473 In 64-bit mode, we should probably support all 8-byte vectors,
14474 since we can in fact encode that into an immediate. */
14475 if (GET_CODE (x) == CONST_VECTOR)
14477 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14483 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14485 if (ASSEMBLER_DIALECT == ASM_ATT)
14488 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14489 || GET_CODE (x) == LABEL_REF)
14491 if (ASSEMBLER_DIALECT == ASM_ATT)
14494 fputs ("OFFSET FLAT:", file);
14497 if (CONST_INT_P (x))
14498 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14499 else if (flag_pic || MACHOPIC_INDIRECT)
14500 output_pic_addr_const (file, x, code);
14502 output_addr_const (file, x);
14507 ix86_print_operand_punct_valid_p (unsigned char code)
14509 return (code == '@' || code == '*' || code == '+'
14510 || code == '&' || code == ';');
14513 /* Print a memory operand whose address is ADDR. */
14516 ix86_print_operand_address (FILE *file, rtx addr)
14518 struct ix86_address parts;
14519 rtx base, index, disp;
14521 int ok = ix86_decompose_address (addr, &parts);
14526 index = parts.index;
14528 scale = parts.scale;
14536 if (ASSEMBLER_DIALECT == ASM_ATT)
14538 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14541 gcc_unreachable ();
14544 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14545 if (TARGET_64BIT && !base && !index)
14549 if (GET_CODE (disp) == CONST
14550 && GET_CODE (XEXP (disp, 0)) == PLUS
14551 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14552 symbol = XEXP (XEXP (disp, 0), 0);
14554 if (GET_CODE (symbol) == LABEL_REF
14555 || (GET_CODE (symbol) == SYMBOL_REF
14556 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14559 if (!base && !index)
14561 /* Displacement only requires special attention. */
14563 if (CONST_INT_P (disp))
14565 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14566 fputs ("ds:", file);
14567 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14570 output_pic_addr_const (file, disp, 0);
14572 output_addr_const (file, disp);
14576 if (ASSEMBLER_DIALECT == ASM_ATT)
14581 output_pic_addr_const (file, disp, 0);
14582 else if (GET_CODE (disp) == LABEL_REF)
14583 output_asm_label (disp);
14585 output_addr_const (file, disp);
14590 print_reg (base, 0, file);
14594 print_reg (index, 0, file);
14596 fprintf (file, ",%d", scale);
14602 rtx offset = NULL_RTX;
14606 /* Pull out the offset of a symbol; print any symbol itself. */
14607 if (GET_CODE (disp) == CONST
14608 && GET_CODE (XEXP (disp, 0)) == PLUS
14609 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14611 offset = XEXP (XEXP (disp, 0), 1);
14612 disp = gen_rtx_CONST (VOIDmode,
14613 XEXP (XEXP (disp, 0), 0));
14617 output_pic_addr_const (file, disp, 0);
14618 else if (GET_CODE (disp) == LABEL_REF)
14619 output_asm_label (disp);
14620 else if (CONST_INT_P (disp))
14623 output_addr_const (file, disp);
14629 print_reg (base, 0, file);
14632 if (INTVAL (offset) >= 0)
14634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14645 print_reg (index, 0, file);
14647 fprintf (file, "*%d", scale);
14654 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14657 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14661 if (GET_CODE (x) != UNSPEC)
14664 op = XVECEXP (x, 0, 0);
14665 switch (XINT (x, 1))
14667 case UNSPEC_GOTTPOFF:
14668 output_addr_const (file, op);
14669 /* FIXME: This might be @TPOFF in Sun ld. */
14670 fputs ("@gottpoff", file);
14673 output_addr_const (file, op);
14674 fputs ("@tpoff", file);
14676 case UNSPEC_NTPOFF:
14677 output_addr_const (file, op);
14679 fputs ("@tpoff", file);
14681 fputs ("@ntpoff", file);
14683 case UNSPEC_DTPOFF:
14684 output_addr_const (file, op);
14685 fputs ("@dtpoff", file);
14687 case UNSPEC_GOTNTPOFF:
14688 output_addr_const (file, op);
14690 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14691 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14693 fputs ("@gotntpoff", file);
14695 case UNSPEC_INDNTPOFF:
14696 output_addr_const (file, op);
14697 fputs ("@indntpoff", file);
14700 case UNSPEC_MACHOPIC_OFFSET:
14701 output_addr_const (file, op);
14703 machopic_output_function_base_name (file);
14707 case UNSPEC_STACK_CHECK:
14711 gcc_assert (flag_split_stack);
14713 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14714 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14716 gcc_unreachable ();
14719 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14730 /* Split one or more double-mode RTL references into pairs of half-mode
14731 references. The RTL can be REG, offsettable MEM, integer constant, or
14732 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14733 split and "num" is its length. lo_half and hi_half are output arrays
14734 that parallel "operands". */
14737 split_double_mode (enum machine_mode mode, rtx operands[],
14738 int num, rtx lo_half[], rtx hi_half[])
14740 enum machine_mode half_mode;
14746 half_mode = DImode;
14749 half_mode = SImode;
14752 gcc_unreachable ();
14755 byte = GET_MODE_SIZE (half_mode);
14759 rtx op = operands[num];
14761 /* simplify_subreg refuse to split volatile memory addresses,
14762 but we still have to handle it. */
14765 lo_half[num] = adjust_address (op, half_mode, 0);
14766 hi_half[num] = adjust_address (op, half_mode, byte);
14770 lo_half[num] = simplify_gen_subreg (half_mode, op,
14771 GET_MODE (op) == VOIDmode
14772 ? mode : GET_MODE (op), 0);
14773 hi_half[num] = simplify_gen_subreg (half_mode, op,
14774 GET_MODE (op) == VOIDmode
14775 ? mode : GET_MODE (op), byte);
14780 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14781 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14782 is the expression of the binary operation. The output may either be
14783 emitted here, or returned to the caller, like all output_* functions.
14785 There is no guarantee that the operands are the same mode, as they
14786 might be within FLOAT or FLOAT_EXTEND expressions. */
14788 #ifndef SYSV386_COMPAT
14789 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14790 wants to fix the assemblers because that causes incompatibility
14791 with gcc. No-one wants to fix gcc because that causes
14792 incompatibility with assemblers... You can use the option of
14793 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14794 #define SYSV386_COMPAT 1
14798 output_387_binary_op (rtx insn, rtx *operands)
14800 static char buf[40];
14803 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14805 #ifdef ENABLE_CHECKING
14806 /* Even if we do not want to check the inputs, this documents input
14807 constraints. Which helps in understanding the following code. */
14808 if (STACK_REG_P (operands[0])
14809 && ((REG_P (operands[1])
14810 && REGNO (operands[0]) == REGNO (operands[1])
14811 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14812 || (REG_P (operands[2])
14813 && REGNO (operands[0]) == REGNO (operands[2])
14814 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14815 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14818 gcc_assert (is_sse);
14821 switch (GET_CODE (operands[3]))
14824 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14825 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14833 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14834 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14842 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14843 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14851 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14852 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14860 gcc_unreachable ();
14867 strcpy (buf, ssep);
14868 if (GET_MODE (operands[0]) == SFmode)
14869 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14871 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14875 strcpy (buf, ssep + 1);
14876 if (GET_MODE (operands[0]) == SFmode)
14877 strcat (buf, "ss\t{%2, %0|%0, %2}");
14879 strcat (buf, "sd\t{%2, %0|%0, %2}");
14885 switch (GET_CODE (operands[3]))
14889 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14891 rtx temp = operands[2];
14892 operands[2] = operands[1];
14893 operands[1] = temp;
14896 /* know operands[0] == operands[1]. */
14898 if (MEM_P (operands[2]))
14904 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14906 if (STACK_TOP_P (operands[0]))
14907 /* How is it that we are storing to a dead operand[2]?
14908 Well, presumably operands[1] is dead too. We can't
14909 store the result to st(0) as st(0) gets popped on this
14910 instruction. Instead store to operands[2] (which I
14911 think has to be st(1)). st(1) will be popped later.
14912 gcc <= 2.8.1 didn't have this check and generated
14913 assembly code that the Unixware assembler rejected. */
14914 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14916 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14920 if (STACK_TOP_P (operands[0]))
14921 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14923 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14928 if (MEM_P (operands[1]))
14934 if (MEM_P (operands[2]))
14940 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14943 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14944 derived assemblers, confusingly reverse the direction of
14945 the operation for fsub{r} and fdiv{r} when the
14946 destination register is not st(0). The Intel assembler
14947 doesn't have this brain damage. Read !SYSV386_COMPAT to
14948 figure out what the hardware really does. */
14949 if (STACK_TOP_P (operands[0]))
14950 p = "{p\t%0, %2|rp\t%2, %0}";
14952 p = "{rp\t%2, %0|p\t%0, %2}";
14954 if (STACK_TOP_P (operands[0]))
14955 /* As above for fmul/fadd, we can't store to st(0). */
14956 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14958 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14963 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14966 if (STACK_TOP_P (operands[0]))
14967 p = "{rp\t%0, %1|p\t%1, %0}";
14969 p = "{p\t%1, %0|rp\t%0, %1}";
14971 if (STACK_TOP_P (operands[0]))
14972 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14974 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14979 if (STACK_TOP_P (operands[0]))
14981 if (STACK_TOP_P (operands[1]))
14982 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14984 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14987 else if (STACK_TOP_P (operands[1]))
14990 p = "{\t%1, %0|r\t%0, %1}";
14992 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14998 p = "{r\t%2, %0|\t%0, %2}";
15000 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15006 gcc_unreachable ();
15013 /* Return needed mode for entity in optimize_mode_switching pass. */
15016 ix86_mode_needed (int entity, rtx insn)
15018 enum attr_i387_cw mode;
15020 /* The mode UNINITIALIZED is used to store control word after a
15021 function call or ASM pattern. The mode ANY specify that function
15022 has no requirements on the control word and make no changes in the
15023 bits we are interested in. */
15026 || (NONJUMP_INSN_P (insn)
15027 && (asm_noperands (PATTERN (insn)) >= 0
15028 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15029 return I387_CW_UNINITIALIZED;
15031 if (recog_memoized (insn) < 0)
15032 return I387_CW_ANY;
15034 mode = get_attr_i387_cw (insn);
15039 if (mode == I387_CW_TRUNC)
15044 if (mode == I387_CW_FLOOR)
15049 if (mode == I387_CW_CEIL)
15054 if (mode == I387_CW_MASK_PM)
15059 gcc_unreachable ();
15062 return I387_CW_ANY;
15065 /* Output code to initialize control word copies used by trunc?f?i and
15066 rounding patterns. CURRENT_MODE is set to current control word,
15067 while NEW_MODE is set to new control word. */
15070 emit_i387_cw_initialization (int mode)
15072 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15075 enum ix86_stack_slot slot;
15077 rtx reg = gen_reg_rtx (HImode);
15079 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15080 emit_move_insn (reg, copy_rtx (stored_mode));
15082 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15083 || optimize_function_for_size_p (cfun))
15087 case I387_CW_TRUNC:
15088 /* round toward zero (truncate) */
15089 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15090 slot = SLOT_CW_TRUNC;
15093 case I387_CW_FLOOR:
15094 /* round down toward -oo */
15095 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15096 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15097 slot = SLOT_CW_FLOOR;
15101 /* round up toward +oo */
15102 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15103 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15104 slot = SLOT_CW_CEIL;
15107 case I387_CW_MASK_PM:
15108 /* mask precision exception for nearbyint() */
15109 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15110 slot = SLOT_CW_MASK_PM;
15114 gcc_unreachable ();
15121 case I387_CW_TRUNC:
15122 /* round toward zero (truncate) */
15123 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15124 slot = SLOT_CW_TRUNC;
15127 case I387_CW_FLOOR:
15128 /* round down toward -oo */
15129 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15130 slot = SLOT_CW_FLOOR;
15134 /* round up toward +oo */
15135 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15136 slot = SLOT_CW_CEIL;
15139 case I387_CW_MASK_PM:
15140 /* mask precision exception for nearbyint() */
15141 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15142 slot = SLOT_CW_MASK_PM;
15146 gcc_unreachable ();
15150 gcc_assert (slot < MAX_386_STACK_LOCALS);
15152 new_mode = assign_386_stack_local (HImode, slot);
15153 emit_move_insn (new_mode, reg);
15156 /* Output code for INSN to convert a float to a signed int. OPERANDS
15157 are the insn operands. The output may be [HSD]Imode and the input
15158 operand may be [SDX]Fmode. */
15161 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
15163 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15164 int dimode_p = GET_MODE (operands[0]) == DImode;
15165 int round_mode = get_attr_i387_cw (insn);
15167 /* Jump through a hoop or two for DImode, since the hardware has no
15168 non-popping instruction. We used to do this a different way, but
15169 that was somewhat fragile and broke with post-reload splitters. */
15170 if ((dimode_p || fisttp) && !stack_top_dies)
15171 output_asm_insn ("fld\t%y1", operands);
15173 gcc_assert (STACK_TOP_P (operands[1]));
15174 gcc_assert (MEM_P (operands[0]));
15175 gcc_assert (GET_MODE (operands[1]) != TFmode);
15178 output_asm_insn ("fisttp%Z0\t%0", operands);
15181 if (round_mode != I387_CW_ANY)
15182 output_asm_insn ("fldcw\t%3", operands);
15183 if (stack_top_dies || dimode_p)
15184 output_asm_insn ("fistp%Z0\t%0", operands);
15186 output_asm_insn ("fist%Z0\t%0", operands);
15187 if (round_mode != I387_CW_ANY)
15188 output_asm_insn ("fldcw\t%2", operands);
15194 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15195 have the values zero or one, indicates the ffreep insn's operand
15196 from the OPERANDS array. */
15198 static const char *
15199 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15201 if (TARGET_USE_FFREEP)
15202 #ifdef HAVE_AS_IX86_FFREEP
15203 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15206 static char retval[32];
15207 int regno = REGNO (operands[opno]);
15209 gcc_assert (FP_REGNO_P (regno));
15211 regno -= FIRST_STACK_REG;
15213 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15218 return opno ? "fstp\t%y1" : "fstp\t%y0";
15222 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15223 should be used. UNORDERED_P is true when fucom should be used. */
15226 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15228 int stack_top_dies;
15229 rtx cmp_op0, cmp_op1;
15230 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15234 cmp_op0 = operands[0];
15235 cmp_op1 = operands[1];
15239 cmp_op0 = operands[1];
15240 cmp_op1 = operands[2];
15245 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15246 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15247 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15248 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15250 if (GET_MODE (operands[0]) == SFmode)
15252 return &ucomiss[TARGET_AVX ? 0 : 1];
15254 return &comiss[TARGET_AVX ? 0 : 1];
15257 return &ucomisd[TARGET_AVX ? 0 : 1];
15259 return &comisd[TARGET_AVX ? 0 : 1];
15262 gcc_assert (STACK_TOP_P (cmp_op0));
15264 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15266 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15268 if (stack_top_dies)
15270 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15271 return output_387_ffreep (operands, 1);
15274 return "ftst\n\tfnstsw\t%0";
15277 if (STACK_REG_P (cmp_op1)
15279 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15280 && REGNO (cmp_op1) != FIRST_STACK_REG)
15282 /* If both the top of the 387 stack dies, and the other operand
15283 is also a stack register that dies, then this must be a
15284 `fcompp' float compare */
15288 /* There is no double popping fcomi variant. Fortunately,
15289 eflags is immune from the fstp's cc clobbering. */
15291 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15293 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15294 return output_387_ffreep (operands, 0);
15299 return "fucompp\n\tfnstsw\t%0";
15301 return "fcompp\n\tfnstsw\t%0";
15306 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15308 static const char * const alt[16] =
15310 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15311 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15312 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15313 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15315 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15316 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15320 "fcomi\t{%y1, %0|%0, %y1}",
15321 "fcomip\t{%y1, %0|%0, %y1}",
15322 "fucomi\t{%y1, %0|%0, %y1}",
15323 "fucomip\t{%y1, %0|%0, %y1}",
15334 mask = eflags_p << 3;
15335 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15336 mask |= unordered_p << 1;
15337 mask |= stack_top_dies;
15339 gcc_assert (mask < 16);
15348 ix86_output_addr_vec_elt (FILE *file, int value)
15350 const char *directive = ASM_LONG;
15354 directive = ASM_QUAD;
15356 gcc_assert (!TARGET_64BIT);
15359 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15363 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15365 const char *directive = ASM_LONG;
15368 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15369 directive = ASM_QUAD;
15371 gcc_assert (!TARGET_64BIT);
15373 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15374 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15375 fprintf (file, "%s%s%d-%s%d\n",
15376 directive, LPREFIX, value, LPREFIX, rel);
15377 else if (HAVE_AS_GOTOFF_IN_DATA)
15378 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15380 else if (TARGET_MACHO)
15382 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15383 machopic_output_function_base_name (file);
15388 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15389 GOT_SYMBOL_NAME, LPREFIX, value);
15392 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15396 ix86_expand_clear (rtx dest)
15400 /* We play register width games, which are only valid after reload. */
15401 gcc_assert (reload_completed);
15403 /* Avoid HImode and its attendant prefix byte. */
15404 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15405 dest = gen_rtx_REG (SImode, REGNO (dest));
15406 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15408 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15409 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15411 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15412 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15418 /* X is an unchanging MEM. If it is a constant pool reference, return
15419 the constant pool rtx, else NULL. */
15422 maybe_get_pool_constant (rtx x)
15424 x = ix86_delegitimize_address (XEXP (x, 0));
15426 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15427 return get_pool_constant (x);
15433 ix86_expand_move (enum machine_mode mode, rtx operands[])
15436 enum tls_model model;
15441 if (GET_CODE (op1) == SYMBOL_REF)
15443 model = SYMBOL_REF_TLS_MODEL (op1);
15446 op1 = legitimize_tls_address (op1, model, true);
15447 op1 = force_operand (op1, op0);
15451 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15452 && SYMBOL_REF_DLLIMPORT_P (op1))
15453 op1 = legitimize_dllimport_symbol (op1, false);
15455 else if (GET_CODE (op1) == CONST
15456 && GET_CODE (XEXP (op1, 0)) == PLUS
15457 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15459 rtx addend = XEXP (XEXP (op1, 0), 1);
15460 rtx symbol = XEXP (XEXP (op1, 0), 0);
15463 model = SYMBOL_REF_TLS_MODEL (symbol);
15465 tmp = legitimize_tls_address (symbol, model, true);
15466 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15467 && SYMBOL_REF_DLLIMPORT_P (symbol))
15468 tmp = legitimize_dllimport_symbol (symbol, true);
15472 tmp = force_operand (tmp, NULL);
15473 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15474 op0, 1, OPTAB_DIRECT);
15480 if ((flag_pic || MACHOPIC_INDIRECT)
15481 && mode == Pmode && symbolic_operand (op1, Pmode))
15483 if (TARGET_MACHO && !TARGET_64BIT)
15486 /* dynamic-no-pic */
15487 if (MACHOPIC_INDIRECT)
15489 rtx temp = ((reload_in_progress
15490 || ((op0 && REG_P (op0))
15492 ? op0 : gen_reg_rtx (Pmode));
15493 op1 = machopic_indirect_data_reference (op1, temp);
15495 op1 = machopic_legitimize_pic_address (op1, mode,
15496 temp == op1 ? 0 : temp);
15498 if (op0 != op1 && GET_CODE (op0) != MEM)
15500 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15504 if (GET_CODE (op0) == MEM)
15505 op1 = force_reg (Pmode, op1);
15509 if (GET_CODE (temp) != REG)
15510 temp = gen_reg_rtx (Pmode);
15511 temp = legitimize_pic_address (op1, temp);
15516 /* dynamic-no-pic */
15522 op1 = force_reg (Pmode, op1);
15523 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15525 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15526 op1 = legitimize_pic_address (op1, reg);
15535 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15536 || !push_operand (op0, mode))
15538 op1 = force_reg (mode, op1);
15540 if (push_operand (op0, mode)
15541 && ! general_no_elim_operand (op1, mode))
15542 op1 = copy_to_mode_reg (mode, op1);
15544 /* Force large constants in 64bit compilation into register
15545 to get them CSEed. */
15546 if (can_create_pseudo_p ()
15547 && (mode == DImode) && TARGET_64BIT
15548 && immediate_operand (op1, mode)
15549 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15550 && !register_operand (op0, mode)
15552 op1 = copy_to_mode_reg (mode, op1);
15554 if (can_create_pseudo_p ()
15555 && FLOAT_MODE_P (mode)
15556 && GET_CODE (op1) == CONST_DOUBLE)
15558 /* If we are loading a floating point constant to a register,
15559 force the value to memory now, since we'll get better code
15560 out the back end. */
15562 op1 = validize_mem (force_const_mem (mode, op1));
15563 if (!register_operand (op0, mode))
15565 rtx temp = gen_reg_rtx (mode);
15566 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15567 emit_move_insn (op0, temp);
15573 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15577 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15579 rtx op0 = operands[0], op1 = operands[1];
15580 unsigned int align = GET_MODE_ALIGNMENT (mode);
15582 /* Force constants other than zero into memory. We do not know how
15583 the instructions used to build constants modify the upper 64 bits
15584 of the register, once we have that information we may be able
15585 to handle some of them more efficiently. */
15586 if (can_create_pseudo_p ()
15587 && register_operand (op0, mode)
15588 && (CONSTANT_P (op1)
15589 || (GET_CODE (op1) == SUBREG
15590 && CONSTANT_P (SUBREG_REG (op1))))
15591 && !standard_sse_constant_p (op1))
15592 op1 = validize_mem (force_const_mem (mode, op1));
15594 /* We need to check memory alignment for SSE mode since attribute
15595 can make operands unaligned. */
15596 if (can_create_pseudo_p ()
15597 && SSE_REG_MODE_P (mode)
15598 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15599 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15603 /* ix86_expand_vector_move_misalign() does not like constants ... */
15604 if (CONSTANT_P (op1)
15605 || (GET_CODE (op1) == SUBREG
15606 && CONSTANT_P (SUBREG_REG (op1))))
15607 op1 = validize_mem (force_const_mem (mode, op1));
15609 /* ... nor both arguments in memory. */
15610 if (!register_operand (op0, mode)
15611 && !register_operand (op1, mode))
15612 op1 = force_reg (mode, op1);
15614 tmp[0] = op0; tmp[1] = op1;
15615 ix86_expand_vector_move_misalign (mode, tmp);
15619 /* Make operand1 a register if it isn't already. */
15620 if (can_create_pseudo_p ()
15621 && !register_operand (op0, mode)
15622 && !register_operand (op1, mode))
15624 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15628 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15631 /* Split 32-byte AVX unaligned load and store if needed. */
15634 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
15637 rtx (*extract) (rtx, rtx, rtx);
15638 rtx (*move_unaligned) (rtx, rtx);
15639 enum machine_mode mode;
15641 switch (GET_MODE (op0))
15644 gcc_unreachable ();
15646 extract = gen_avx_vextractf128v32qi;
15647 move_unaligned = gen_avx_movdqu256;
15651 extract = gen_avx_vextractf128v8sf;
15652 move_unaligned = gen_avx_movups256;
15656 extract = gen_avx_vextractf128v4df;
15657 move_unaligned = gen_avx_movupd256;
15662 if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
15664 rtx r = gen_reg_rtx (mode);
15665 m = adjust_address (op1, mode, 0);
15666 emit_move_insn (r, m);
15667 m = adjust_address (op1, mode, 16);
15668 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
15669 emit_move_insn (op0, r);
15671 else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
15673 m = adjust_address (op0, mode, 0);
15674 emit_insn (extract (m, op1, const0_rtx));
15675 m = adjust_address (op0, mode, 16);
15676 emit_insn (extract (m, op1, const1_rtx));
15679 emit_insn (move_unaligned (op0, op1));
15682 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15683 straight to ix86_expand_vector_move. */
15684 /* Code generation for scalar reg-reg moves of single and double precision data:
15685 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15689 if (x86_sse_partial_reg_dependency == true)
15694 Code generation for scalar loads of double precision data:
15695 if (x86_sse_split_regs == true)
15696 movlpd mem, reg (gas syntax)
15700 Code generation for unaligned packed loads of single precision data
15701 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15702 if (x86_sse_unaligned_move_optimal)
15705 if (x86_sse_partial_reg_dependency == true)
15717 Code generation for unaligned packed loads of double precision data
15718 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15719 if (x86_sse_unaligned_move_optimal)
15722 if (x86_sse_split_regs == true)
15735 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15744 switch (GET_MODE_CLASS (mode))
15746 case MODE_VECTOR_INT:
15748 switch (GET_MODE_SIZE (mode))
15751 /* If we're optimizing for size, movups is the smallest. */
15752 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15754 op0 = gen_lowpart (V4SFmode, op0);
15755 op1 = gen_lowpart (V4SFmode, op1);
15756 emit_insn (gen_sse_movups (op0, op1));
15759 op0 = gen_lowpart (V16QImode, op0);
15760 op1 = gen_lowpart (V16QImode, op1);
15761 emit_insn (gen_sse2_movdqu (op0, op1));
15764 op0 = gen_lowpart (V32QImode, op0);
15765 op1 = gen_lowpart (V32QImode, op1);
15766 ix86_avx256_split_vector_move_misalign (op0, op1);
15769 gcc_unreachable ();
15772 case MODE_VECTOR_FLOAT:
15773 op0 = gen_lowpart (mode, op0);
15774 op1 = gen_lowpart (mode, op1);
15779 emit_insn (gen_sse_movups (op0, op1));
15782 ix86_avx256_split_vector_move_misalign (op0, op1);
15785 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15787 op0 = gen_lowpart (V4SFmode, op0);
15788 op1 = gen_lowpart (V4SFmode, op1);
15789 emit_insn (gen_sse_movups (op0, op1));
15792 emit_insn (gen_sse2_movupd (op0, op1));
15795 ix86_avx256_split_vector_move_misalign (op0, op1);
15798 gcc_unreachable ();
15803 gcc_unreachable ();
15811 /* If we're optimizing for size, movups is the smallest. */
15812 if (optimize_insn_for_size_p ()
15813 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15815 op0 = gen_lowpart (V4SFmode, op0);
15816 op1 = gen_lowpart (V4SFmode, op1);
15817 emit_insn (gen_sse_movups (op0, op1));
15821 /* ??? If we have typed data, then it would appear that using
15822 movdqu is the only way to get unaligned data loaded with
15824 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15826 op0 = gen_lowpart (V16QImode, op0);
15827 op1 = gen_lowpart (V16QImode, op1);
15828 emit_insn (gen_sse2_movdqu (op0, op1));
15832 if (TARGET_SSE2 && mode == V2DFmode)
15836 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15838 op0 = gen_lowpart (V2DFmode, op0);
15839 op1 = gen_lowpart (V2DFmode, op1);
15840 emit_insn (gen_sse2_movupd (op0, op1));
15844 /* When SSE registers are split into halves, we can avoid
15845 writing to the top half twice. */
15846 if (TARGET_SSE_SPLIT_REGS)
15848 emit_clobber (op0);
15853 /* ??? Not sure about the best option for the Intel chips.
15854 The following would seem to satisfy; the register is
15855 entirely cleared, breaking the dependency chain. We
15856 then store to the upper half, with a dependency depth
15857 of one. A rumor has it that Intel recommends two movsd
15858 followed by an unpacklpd, but this is unconfirmed. And
15859 given that the dependency depth of the unpacklpd would
15860 still be one, I'm not sure why this would be better. */
15861 zero = CONST0_RTX (V2DFmode);
15864 m = adjust_address (op1, DFmode, 0);
15865 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15866 m = adjust_address (op1, DFmode, 8);
15867 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15871 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15873 op0 = gen_lowpart (V4SFmode, op0);
15874 op1 = gen_lowpart (V4SFmode, op1);
15875 emit_insn (gen_sse_movups (op0, op1));
15879 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15880 emit_move_insn (op0, CONST0_RTX (mode));
15882 emit_clobber (op0);
15884 if (mode != V4SFmode)
15885 op0 = gen_lowpart (V4SFmode, op0);
15886 m = adjust_address (op1, V2SFmode, 0);
15887 emit_insn (gen_sse_loadlps (op0, op0, m));
15888 m = adjust_address (op1, V2SFmode, 8);
15889 emit_insn (gen_sse_loadhps (op0, op0, m));
15892 else if (MEM_P (op0))
15894 /* If we're optimizing for size, movups is the smallest. */
15895 if (optimize_insn_for_size_p ()
15896 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15898 op0 = gen_lowpart (V4SFmode, op0);
15899 op1 = gen_lowpart (V4SFmode, op1);
15900 emit_insn (gen_sse_movups (op0, op1));
15904 /* ??? Similar to above, only less clear because of quote
15905 typeless stores unquote. */
15906 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15907 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15909 op0 = gen_lowpart (V16QImode, op0);
15910 op1 = gen_lowpart (V16QImode, op1);
15911 emit_insn (gen_sse2_movdqu (op0, op1));
15915 if (TARGET_SSE2 && mode == V2DFmode)
15917 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15919 op0 = gen_lowpart (V2DFmode, op0);
15920 op1 = gen_lowpart (V2DFmode, op1);
15921 emit_insn (gen_sse2_movupd (op0, op1));
15925 m = adjust_address (op0, DFmode, 0);
15926 emit_insn (gen_sse2_storelpd (m, op1));
15927 m = adjust_address (op0, DFmode, 8);
15928 emit_insn (gen_sse2_storehpd (m, op1));
15933 if (mode != V4SFmode)
15934 op1 = gen_lowpart (V4SFmode, op1);
15936 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15938 op0 = gen_lowpart (V4SFmode, op0);
15939 emit_insn (gen_sse_movups (op0, op1));
15943 m = adjust_address (op0, V2SFmode, 0);
15944 emit_insn (gen_sse_storelps (m, op1));
15945 m = adjust_address (op0, V2SFmode, 8);
15946 emit_insn (gen_sse_storehps (m, op1));
15951 gcc_unreachable ();
15954 /* Expand a push in MODE. This is some mode for which we do not support
15955 proper push instructions, at least from the registers that we expect
15956 the value to live in. */
15959 ix86_expand_push (enum machine_mode mode, rtx x)
15963 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15964 GEN_INT (-GET_MODE_SIZE (mode)),
15965 stack_pointer_rtx, 1, OPTAB_DIRECT);
15966 if (tmp != stack_pointer_rtx)
15967 emit_move_insn (stack_pointer_rtx, tmp);
15969 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15971 /* When we push an operand onto stack, it has to be aligned at least
15972 at the function argument boundary. However since we don't have
15973 the argument type, we can't determine the actual argument
15975 emit_move_insn (tmp, x);
15978 /* Helper function of ix86_fixup_binary_operands to canonicalize
15979 operand order. Returns true if the operands should be swapped. */
15982 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15985 rtx dst = operands[0];
15986 rtx src1 = operands[1];
15987 rtx src2 = operands[2];
15989 /* If the operation is not commutative, we can't do anything. */
15990 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15993 /* Highest priority is that src1 should match dst. */
15994 if (rtx_equal_p (dst, src1))
15996 if (rtx_equal_p (dst, src2))
15999 /* Next highest priority is that immediate constants come second. */
16000 if (immediate_operand (src2, mode))
16002 if (immediate_operand (src1, mode))
16005 /* Lowest priority is that memory references should come second. */
16015 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16016 destination to use for the operation. If different from the true
16017 destination in operands[0], a copy operation will be required. */
16020 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16023 rtx dst = operands[0];
16024 rtx src1 = operands[1];
16025 rtx src2 = operands[2];
16027 /* Canonicalize operand order. */
16028 if (ix86_swap_binary_operands_p (code, mode, operands))
16032 /* It is invalid to swap operands of different modes. */
16033 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16040 /* Both source operands cannot be in memory. */
16041 if (MEM_P (src1) && MEM_P (src2))
16043 /* Optimization: Only read from memory once. */
16044 if (rtx_equal_p (src1, src2))
16046 src2 = force_reg (mode, src2);
16050 src2 = force_reg (mode, src2);
16053 /* If the destination is memory, and we do not have matching source
16054 operands, do things in registers. */
16055 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16056 dst = gen_reg_rtx (mode);
16058 /* Source 1 cannot be a constant. */
16059 if (CONSTANT_P (src1))
16060 src1 = force_reg (mode, src1);
16062 /* Source 1 cannot be a non-matching memory. */
16063 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16064 src1 = force_reg (mode, src1);
16066 operands[1] = src1;
16067 operands[2] = src2;
16071 /* Similarly, but assume that the destination has already been
16072 set up properly. */
16075 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16076 enum machine_mode mode, rtx operands[])
16078 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16079 gcc_assert (dst == operands[0]);
16082 /* Attempt to expand a binary operator. Make the expansion closer to the
16083 actual machine, then just general_operand, which will allow 3 separate
16084 memory references (one output, two input) in a single insn. */
16087 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16090 rtx src1, src2, dst, op, clob;
16092 dst = ix86_fixup_binary_operands (code, mode, operands);
16093 src1 = operands[1];
16094 src2 = operands[2];
16096 /* Emit the instruction. */
16098 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16099 if (reload_in_progress)
16101 /* Reload doesn't know about the flags register, and doesn't know that
16102 it doesn't want to clobber it. We can only do this with PLUS. */
16103 gcc_assert (code == PLUS);
16106 else if (reload_completed
16108 && !rtx_equal_p (dst, src1))
16110 /* This is going to be an LEA; avoid splitting it later. */
16115 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16116 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16119 /* Fix up the destination if needed. */
16120 if (dst != operands[0])
16121 emit_move_insn (operands[0], dst);
16124 /* Return TRUE or FALSE depending on whether the binary operator meets the
16125 appropriate constraints. */
16128 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16131 rtx dst = operands[0];
16132 rtx src1 = operands[1];
16133 rtx src2 = operands[2];
16135 /* Both source operands cannot be in memory. */
16136 if (MEM_P (src1) && MEM_P (src2))
16139 /* Canonicalize operand order for commutative operators. */
16140 if (ix86_swap_binary_operands_p (code, mode, operands))
16147 /* If the destination is memory, we must have a matching source operand. */
16148 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16151 /* Source 1 cannot be a constant. */
16152 if (CONSTANT_P (src1))
16155 /* Source 1 cannot be a non-matching memory. */
16156 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16158 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16159 return (code == AND
16162 || (TARGET_64BIT && mode == DImode))
16163 && CONST_INT_P (src2)
16164 && (INTVAL (src2) == 0xff
16165 || INTVAL (src2) == 0xffff));
16171 /* Attempt to expand a unary operator. Make the expansion closer to the
16172 actual machine, then just general_operand, which will allow 2 separate
16173 memory references (one output, one input) in a single insn. */
16176 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16179 int matching_memory;
16180 rtx src, dst, op, clob;
16185 /* If the destination is memory, and we do not have matching source
16186 operands, do things in registers. */
16187 matching_memory = 0;
16190 if (rtx_equal_p (dst, src))
16191 matching_memory = 1;
16193 dst = gen_reg_rtx (mode);
16196 /* When source operand is memory, destination must match. */
16197 if (MEM_P (src) && !matching_memory)
16198 src = force_reg (mode, src);
16200 /* Emit the instruction. */
16202 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16203 if (reload_in_progress || code == NOT)
16205 /* Reload doesn't know about the flags register, and doesn't know that
16206 it doesn't want to clobber it. */
16207 gcc_assert (code == NOT);
16212 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16213 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16216 /* Fix up the destination if needed. */
16217 if (dst != operands[0])
16218 emit_move_insn (operands[0], dst);
16221 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16222 divisor are within the range [0-255]. */
16225 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16228 rtx end_label, qimode_label;
16229 rtx insn, div, mod;
16230 rtx scratch, tmp0, tmp1, tmp2;
16231 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16232 rtx (*gen_zero_extend) (rtx, rtx);
16233 rtx (*gen_test_ccno_1) (rtx, rtx);
16238 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16239 gen_test_ccno_1 = gen_testsi_ccno_1;
16240 gen_zero_extend = gen_zero_extendqisi2;
16243 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16244 gen_test_ccno_1 = gen_testdi_ccno_1;
16245 gen_zero_extend = gen_zero_extendqidi2;
16248 gcc_unreachable ();
16251 end_label = gen_label_rtx ();
16252 qimode_label = gen_label_rtx ();
16254 scratch = gen_reg_rtx (mode);
16256 /* Use 8bit unsigned divimod if dividend and divisor are within
16257 the range [0-255]. */
16258 emit_move_insn (scratch, operands[2]);
16259 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16260 scratch, 1, OPTAB_DIRECT);
16261 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16262 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16263 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16264 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16265 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16267 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16268 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16269 JUMP_LABEL (insn) = qimode_label;
16271 /* Generate original signed/unsigned divimod. */
16272 div = gen_divmod4_1 (operands[0], operands[1],
16273 operands[2], operands[3]);
16276 /* Branch to the end. */
16277 emit_jump_insn (gen_jump (end_label));
16280 /* Generate 8bit unsigned divide. */
16281 emit_label (qimode_label);
16282 /* Don't use operands[0] for result of 8bit divide since not all
16283 registers support QImode ZERO_EXTRACT. */
16284 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16285 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16286 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16287 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16291 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16292 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16296 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16297 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16300 /* Extract remainder from AH. */
16301 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16302 if (REG_P (operands[1]))
16303 insn = emit_move_insn (operands[1], tmp1);
16306 /* Need a new scratch register since the old one has result
16308 scratch = gen_reg_rtx (mode);
16309 emit_move_insn (scratch, tmp1);
16310 insn = emit_move_insn (operands[1], scratch);
16312 set_unique_reg_note (insn, REG_EQUAL, mod);
16314 /* Zero extend quotient from AL. */
16315 tmp1 = gen_lowpart (QImode, tmp0);
16316 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16317 set_unique_reg_note (insn, REG_EQUAL, div);
16319 emit_label (end_label);
16322 #define LEA_SEARCH_THRESHOLD 12
16324 /* Search backward for non-agu definition of register number REGNO1
16325 or register number REGNO2 in INSN's basic block until
16326 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16327 2. Reach BB boundary, or
16328 3. Reach agu definition.
16329 Returns the distance between the non-agu definition point and INSN.
16330 If no definition point, returns -1. */
16333 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16336 basic_block bb = BLOCK_FOR_INSN (insn);
16339 enum attr_type insn_type;
16341 if (insn != BB_HEAD (bb))
16343 rtx prev = PREV_INSN (insn);
16344 while (prev && distance < LEA_SEARCH_THRESHOLD)
16346 if (NONDEBUG_INSN_P (prev))
16349 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16350 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16351 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16352 && (regno1 == DF_REF_REGNO (*def_rec)
16353 || regno2 == DF_REF_REGNO (*def_rec)))
16355 insn_type = get_attr_type (prev);
16356 if (insn_type != TYPE_LEA)
16360 if (prev == BB_HEAD (bb))
16362 prev = PREV_INSN (prev);
16366 if (distance < LEA_SEARCH_THRESHOLD)
16370 bool simple_loop = false;
16372 FOR_EACH_EDGE (e, ei, bb->preds)
16375 simple_loop = true;
16381 rtx prev = BB_END (bb);
16384 && distance < LEA_SEARCH_THRESHOLD)
16386 if (NONDEBUG_INSN_P (prev))
16389 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16390 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16391 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16392 && (regno1 == DF_REF_REGNO (*def_rec)
16393 || regno2 == DF_REF_REGNO (*def_rec)))
16395 insn_type = get_attr_type (prev);
16396 if (insn_type != TYPE_LEA)
16400 prev = PREV_INSN (prev);
16408 /* get_attr_type may modify recog data. We want to make sure
16409 that recog data is valid for instruction INSN, on which
16410 distance_non_agu_define is called. INSN is unchanged here. */
16411 extract_insn_cached (insn);
16415 /* Return the distance between INSN and the next insn that uses
16416 register number REGNO0 in memory address. Return -1 if no such
16417 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16420 distance_agu_use (unsigned int regno0, rtx insn)
16422 basic_block bb = BLOCK_FOR_INSN (insn);
16427 if (insn != BB_END (bb))
16429 rtx next = NEXT_INSN (insn);
16430 while (next && distance < LEA_SEARCH_THRESHOLD)
16432 if (NONDEBUG_INSN_P (next))
16436 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16437 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16438 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16439 && regno0 == DF_REF_REGNO (*use_rec))
16441 /* Return DISTANCE if OP0 is used in memory
16442 address in NEXT. */
16446 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16447 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16448 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16449 && regno0 == DF_REF_REGNO (*def_rec))
16451 /* Return -1 if OP0 is set in NEXT. */
16455 if (next == BB_END (bb))
16457 next = NEXT_INSN (next);
16461 if (distance < LEA_SEARCH_THRESHOLD)
16465 bool simple_loop = false;
16467 FOR_EACH_EDGE (e, ei, bb->succs)
16470 simple_loop = true;
16476 rtx next = BB_HEAD (bb);
16479 && distance < LEA_SEARCH_THRESHOLD)
16481 if (NONDEBUG_INSN_P (next))
16485 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16486 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16487 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16488 && regno0 == DF_REF_REGNO (*use_rec))
16490 /* Return DISTANCE if OP0 is used in memory
16491 address in NEXT. */
16495 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16496 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16497 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16498 && regno0 == DF_REF_REGNO (*def_rec))
16500 /* Return -1 if OP0 is set in NEXT. */
16505 next = NEXT_INSN (next);
16513 /* Define this macro to tune LEA priority vs ADD, it take effect when
16514 there is a dilemma of choicing LEA or ADD
16515 Negative value: ADD is more preferred than LEA
16517 Positive value: LEA is more preferred than ADD*/
16518 #define IX86_LEA_PRIORITY 2
16520 /* Return true if it is ok to optimize an ADD operation to LEA
16521 operation to avoid flag register consumation. For most processors,
16522 ADD is faster than LEA. For the processors like ATOM, if the
16523 destination register of LEA holds an actual address which will be
16524 used soon, LEA is better and otherwise ADD is better. */
16527 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16529 unsigned int regno0 = true_regnum (operands[0]);
16530 unsigned int regno1 = true_regnum (operands[1]);
16531 unsigned int regno2 = true_regnum (operands[2]);
16533 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16534 if (regno0 != regno1 && regno0 != regno2)
16537 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16541 int dist_define, dist_use;
16543 /* Return false if REGNO0 isn't used in memory address. */
16544 dist_use = distance_agu_use (regno0, insn);
16548 dist_define = distance_non_agu_define (regno1, regno2, insn);
16549 if (dist_define <= 0)
16552 /* If this insn has both backward non-agu dependence and forward
16553 agu dependence, the one with short distance take effect. */
16554 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16561 /* Return true if destination reg of SET_BODY is shift count of
16565 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16571 /* Retrieve destination of SET_BODY. */
16572 switch (GET_CODE (set_body))
16575 set_dest = SET_DEST (set_body);
16576 if (!set_dest || !REG_P (set_dest))
16580 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16581 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16589 /* Retrieve shift count of USE_BODY. */
16590 switch (GET_CODE (use_body))
16593 shift_rtx = XEXP (use_body, 1);
16596 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16597 if (ix86_dep_by_shift_count_body (set_body,
16598 XVECEXP (use_body, 0, i)))
16606 && (GET_CODE (shift_rtx) == ASHIFT
16607 || GET_CODE (shift_rtx) == LSHIFTRT
16608 || GET_CODE (shift_rtx) == ASHIFTRT
16609 || GET_CODE (shift_rtx) == ROTATE
16610 || GET_CODE (shift_rtx) == ROTATERT))
16612 rtx shift_count = XEXP (shift_rtx, 1);
16614 /* Return true if shift count is dest of SET_BODY. */
16615 if (REG_P (shift_count)
16616 && true_regnum (set_dest) == true_regnum (shift_count))
16623 /* Return true if destination reg of SET_INSN is shift count of
16627 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16629 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16630 PATTERN (use_insn));
16633 /* Return TRUE or FALSE depending on whether the unary operator meets the
16634 appropriate constraints. */
16637 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16638 enum machine_mode mode ATTRIBUTE_UNUSED,
16639 rtx operands[2] ATTRIBUTE_UNUSED)
16641 /* If one of operands is memory, source and destination must match. */
16642 if ((MEM_P (operands[0])
16643 || MEM_P (operands[1]))
16644 && ! rtx_equal_p (operands[0], operands[1]))
16649 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16650 are ok, keeping in mind the possible movddup alternative. */
16653 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16655 if (MEM_P (operands[0]))
16656 return rtx_equal_p (operands[0], operands[1 + high]);
16657 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16658 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16662 /* Post-reload splitter for converting an SF or DFmode value in an
16663 SSE register into an unsigned SImode. */
16666 ix86_split_convert_uns_si_sse (rtx operands[])
16668 enum machine_mode vecmode;
16669 rtx value, large, zero_or_two31, input, two31, x;
16671 large = operands[1];
16672 zero_or_two31 = operands[2];
16673 input = operands[3];
16674 two31 = operands[4];
16675 vecmode = GET_MODE (large);
16676 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16678 /* Load up the value into the low element. We must ensure that the other
16679 elements are valid floats -- zero is the easiest such value. */
16682 if (vecmode == V4SFmode)
16683 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16685 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16689 input = gen_rtx_REG (vecmode, REGNO (input));
16690 emit_move_insn (value, CONST0_RTX (vecmode));
16691 if (vecmode == V4SFmode)
16692 emit_insn (gen_sse_movss (value, value, input));
16694 emit_insn (gen_sse2_movsd (value, value, input));
16697 emit_move_insn (large, two31);
16698 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16700 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16701 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16703 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16704 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16706 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16707 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16709 large = gen_rtx_REG (V4SImode, REGNO (large));
16710 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16712 x = gen_rtx_REG (V4SImode, REGNO (value));
16713 if (vecmode == V4SFmode)
16714 emit_insn (gen_sse2_cvttps2dq (x, value));
16716 emit_insn (gen_sse2_cvttpd2dq (x, value));
16719 emit_insn (gen_xorv4si3 (value, value, large));
16722 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16723 Expects the 64-bit DImode to be supplied in a pair of integral
16724 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16725 -mfpmath=sse, !optimize_size only. */
16728 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16730 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16731 rtx int_xmm, fp_xmm;
16732 rtx biases, exponents;
16735 int_xmm = gen_reg_rtx (V4SImode);
16736 if (TARGET_INTER_UNIT_MOVES)
16737 emit_insn (gen_movdi_to_sse (int_xmm, input));
16738 else if (TARGET_SSE_SPLIT_REGS)
16740 emit_clobber (int_xmm);
16741 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16745 x = gen_reg_rtx (V2DImode);
16746 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16747 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16750 x = gen_rtx_CONST_VECTOR (V4SImode,
16751 gen_rtvec (4, GEN_INT (0x43300000UL),
16752 GEN_INT (0x45300000UL),
16753 const0_rtx, const0_rtx));
16754 exponents = validize_mem (force_const_mem (V4SImode, x));
16756 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16757 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16759 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16760 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16761 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16762 (0x1.0p84 + double(fp_value_hi_xmm)).
16763 Note these exponents differ by 32. */
16765 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16767 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16768 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16769 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16770 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16771 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16772 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16773 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16774 biases = validize_mem (force_const_mem (V2DFmode, biases));
16775 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16777 /* Add the upper and lower DFmode values together. */
16779 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16782 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16783 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16784 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16787 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16790 /* Not used, but eases macroization of patterns. */
16792 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16793 rtx input ATTRIBUTE_UNUSED)
16795 gcc_unreachable ();
16798 /* Convert an unsigned SImode value into a DFmode. Only currently used
16799 for SSE, but applicable anywhere. */
16802 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16804 REAL_VALUE_TYPE TWO31r;
16807 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16808 NULL, 1, OPTAB_DIRECT);
16810 fp = gen_reg_rtx (DFmode);
16811 emit_insn (gen_floatsidf2 (fp, x));
16813 real_ldexp (&TWO31r, &dconst1, 31);
16814 x = const_double_from_real_value (TWO31r, DFmode);
16816 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16818 emit_move_insn (target, x);
16821 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16822 32-bit mode; otherwise we have a direct convert instruction. */
16825 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16827 REAL_VALUE_TYPE TWO32r;
16828 rtx fp_lo, fp_hi, x;
16830 fp_lo = gen_reg_rtx (DFmode);
16831 fp_hi = gen_reg_rtx (DFmode);
16833 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16835 real_ldexp (&TWO32r, &dconst1, 32);
16836 x = const_double_from_real_value (TWO32r, DFmode);
16837 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16839 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16841 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16844 emit_move_insn (target, x);
16847 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16848 For x86_32, -mfpmath=sse, !optimize_size only. */
16850 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16852 REAL_VALUE_TYPE ONE16r;
16853 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16855 real_ldexp (&ONE16r, &dconst1, 16);
16856 x = const_double_from_real_value (ONE16r, SFmode);
16857 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16858 NULL, 0, OPTAB_DIRECT);
16859 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16860 NULL, 0, OPTAB_DIRECT);
16861 fp_hi = gen_reg_rtx (SFmode);
16862 fp_lo = gen_reg_rtx (SFmode);
16863 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16864 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16865 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16867 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16869 if (!rtx_equal_p (target, fp_hi))
16870 emit_move_insn (target, fp_hi);
16873 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16874 then replicate the value for all elements of the vector
16878 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16885 v = gen_rtvec (4, value, value, value, value);
16886 return gen_rtx_CONST_VECTOR (V4SImode, v);
16890 v = gen_rtvec (2, value, value);
16891 return gen_rtx_CONST_VECTOR (V2DImode, v);
16895 v = gen_rtvec (8, value, value, value, value,
16896 value, value, value, value);
16898 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16899 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16900 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16901 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16902 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16906 v = gen_rtvec (4, value, value, value, value);
16908 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16909 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16910 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16914 v = gen_rtvec (4, value, value, value, value);
16916 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16917 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16918 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16922 v = gen_rtvec (2, value, value);
16924 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16925 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16928 gcc_unreachable ();
16932 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16933 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16934 for an SSE register. If VECT is true, then replicate the mask for
16935 all elements of the vector register. If INVERT is true, then create
16936 a mask excluding the sign bit. */
16939 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16941 enum machine_mode vec_mode, imode;
16942 HOST_WIDE_INT hi, lo;
16947 /* Find the sign bit, sign extended to 2*HWI. */
16954 mode = GET_MODE_INNER (mode);
16956 lo = 0x80000000, hi = lo < 0;
16963 mode = GET_MODE_INNER (mode);
16965 if (HOST_BITS_PER_WIDE_INT >= 64)
16966 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16968 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16973 vec_mode = VOIDmode;
16974 if (HOST_BITS_PER_WIDE_INT >= 64)
16977 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16984 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16988 lo = ~lo, hi = ~hi;
16994 mask = immed_double_const (lo, hi, imode);
16996 vec = gen_rtvec (2, v, mask);
16997 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16998 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
17005 gcc_unreachable ();
17009 lo = ~lo, hi = ~hi;
17011 /* Force this value into the low part of a fp vector constant. */
17012 mask = immed_double_const (lo, hi, imode);
17013 mask = gen_lowpart (mode, mask);
17015 if (vec_mode == VOIDmode)
17016 return force_reg (mode, mask);
17018 v = ix86_build_const_vector (vec_mode, vect, mask);
17019 return force_reg (vec_mode, v);
17022 /* Generate code for floating point ABS or NEG. */
17025 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
17028 rtx mask, set, dst, src;
17029 bool use_sse = false;
17030 bool vector_mode = VECTOR_MODE_P (mode);
17031 enum machine_mode vmode = mode;
17035 else if (mode == TFmode)
17037 else if (TARGET_SSE_MATH)
17039 use_sse = SSE_FLOAT_MODE_P (mode);
17040 if (mode == SFmode)
17042 else if (mode == DFmode)
17046 /* NEG and ABS performed with SSE use bitwise mask operations.
17047 Create the appropriate mask now. */
17049 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
17056 set = gen_rtx_fmt_e (code, mode, src);
17057 set = gen_rtx_SET (VOIDmode, dst, set);
17064 use = gen_rtx_USE (VOIDmode, mask);
17066 par = gen_rtvec (2, set, use);
17069 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17070 par = gen_rtvec (3, set, use, clob);
17072 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
17078 /* Expand a copysign operation. Special case operand 0 being a constant. */
17081 ix86_expand_copysign (rtx operands[])
17083 enum machine_mode mode, vmode;
17084 rtx dest, op0, op1, mask, nmask;
17086 dest = operands[0];
17090 mode = GET_MODE (dest);
17092 if (mode == SFmode)
17094 else if (mode == DFmode)
17099 if (GET_CODE (op0) == CONST_DOUBLE)
17101 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
17103 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
17104 op0 = simplify_unary_operation (ABS, mode, op0, mode);
17106 if (mode == SFmode || mode == DFmode)
17108 if (op0 == CONST0_RTX (mode))
17109 op0 = CONST0_RTX (vmode);
17112 rtx v = ix86_build_const_vector (vmode, false, op0);
17114 op0 = force_reg (vmode, v);
17117 else if (op0 != CONST0_RTX (mode))
17118 op0 = force_reg (mode, op0);
17120 mask = ix86_build_signbit_mask (vmode, 0, 0);
17122 if (mode == SFmode)
17123 copysign_insn = gen_copysignsf3_const;
17124 else if (mode == DFmode)
17125 copysign_insn = gen_copysigndf3_const;
17127 copysign_insn = gen_copysigntf3_const;
17129 emit_insn (copysign_insn (dest, op0, op1, mask));
17133 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17135 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17136 mask = ix86_build_signbit_mask (vmode, 0, 0);
17138 if (mode == SFmode)
17139 copysign_insn = gen_copysignsf3_var;
17140 else if (mode == DFmode)
17141 copysign_insn = gen_copysigndf3_var;
17143 copysign_insn = gen_copysigntf3_var;
17145 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17149 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17150 be a constant, and so has already been expanded into a vector constant. */
17153 ix86_split_copysign_const (rtx operands[])
17155 enum machine_mode mode, vmode;
17156 rtx dest, op0, mask, x;
17158 dest = operands[0];
17160 mask = operands[3];
17162 mode = GET_MODE (dest);
17163 vmode = GET_MODE (mask);
17165 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17166 x = gen_rtx_AND (vmode, dest, mask);
17167 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17169 if (op0 != CONST0_RTX (vmode))
17171 x = gen_rtx_IOR (vmode, dest, op0);
17172 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17176 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17177 so we have to do two masks. */
17180 ix86_split_copysign_var (rtx operands[])
17182 enum machine_mode mode, vmode;
17183 rtx dest, scratch, op0, op1, mask, nmask, x;
17185 dest = operands[0];
17186 scratch = operands[1];
17189 nmask = operands[4];
17190 mask = operands[5];
17192 mode = GET_MODE (dest);
17193 vmode = GET_MODE (mask);
17195 if (rtx_equal_p (op0, op1))
17197 /* Shouldn't happen often (it's useless, obviously), but when it does
17198 we'd generate incorrect code if we continue below. */
17199 emit_move_insn (dest, op0);
17203 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17205 gcc_assert (REGNO (op1) == REGNO (scratch));
17207 x = gen_rtx_AND (vmode, scratch, mask);
17208 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17211 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17212 x = gen_rtx_NOT (vmode, dest);
17213 x = gen_rtx_AND (vmode, x, op0);
17214 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17218 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17220 x = gen_rtx_AND (vmode, scratch, mask);
17222 else /* alternative 2,4 */
17224 gcc_assert (REGNO (mask) == REGNO (scratch));
17225 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17226 x = gen_rtx_AND (vmode, scratch, op1);
17228 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17230 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17232 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17233 x = gen_rtx_AND (vmode, dest, nmask);
17235 else /* alternative 3,4 */
17237 gcc_assert (REGNO (nmask) == REGNO (dest));
17239 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17240 x = gen_rtx_AND (vmode, dest, op0);
17242 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17245 x = gen_rtx_IOR (vmode, dest, scratch);
17246 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17249 /* Return TRUE or FALSE depending on whether the first SET in INSN
17250 has source and destination with matching CC modes, and that the
17251 CC mode is at least as constrained as REQ_MODE. */
17254 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17257 enum machine_mode set_mode;
17259 set = PATTERN (insn);
17260 if (GET_CODE (set) == PARALLEL)
17261 set = XVECEXP (set, 0, 0);
17262 gcc_assert (GET_CODE (set) == SET);
17263 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17265 set_mode = GET_MODE (SET_DEST (set));
17269 if (req_mode != CCNOmode
17270 && (req_mode != CCmode
17271 || XEXP (SET_SRC (set), 1) != const0_rtx))
17275 if (req_mode == CCGCmode)
17279 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17283 if (req_mode == CCZmode)
17293 if (set_mode != req_mode)
17298 gcc_unreachable ();
17301 return GET_MODE (SET_SRC (set)) == set_mode;
17304 /* Generate insn patterns to do an integer compare of OPERANDS. */
17307 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17309 enum machine_mode cmpmode;
17312 cmpmode = SELECT_CC_MODE (code, op0, op1);
17313 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17315 /* This is very simple, but making the interface the same as in the
17316 FP case makes the rest of the code easier. */
17317 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17318 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17320 /* Return the test that should be put into the flags user, i.e.
17321 the bcc, scc, or cmov instruction. */
17322 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17325 /* Figure out whether to use ordered or unordered fp comparisons.
17326 Return the appropriate mode to use. */
17329 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17331 /* ??? In order to make all comparisons reversible, we do all comparisons
17332 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17333 all forms trapping and nontrapping comparisons, we can make inequality
17334 comparisons trapping again, since it results in better code when using
17335 FCOM based compares. */
17336 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17340 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17342 enum machine_mode mode = GET_MODE (op0);
17344 if (SCALAR_FLOAT_MODE_P (mode))
17346 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17347 return ix86_fp_compare_mode (code);
17352 /* Only zero flag is needed. */
17353 case EQ: /* ZF=0 */
17354 case NE: /* ZF!=0 */
17356 /* Codes needing carry flag. */
17357 case GEU: /* CF=0 */
17358 case LTU: /* CF=1 */
17359 /* Detect overflow checks. They need just the carry flag. */
17360 if (GET_CODE (op0) == PLUS
17361 && rtx_equal_p (op1, XEXP (op0, 0)))
17365 case GTU: /* CF=0 & ZF=0 */
17366 case LEU: /* CF=1 | ZF=1 */
17367 /* Detect overflow checks. They need just the carry flag. */
17368 if (GET_CODE (op0) == MINUS
17369 && rtx_equal_p (op1, XEXP (op0, 0)))
17373 /* Codes possibly doable only with sign flag when
17374 comparing against zero. */
17375 case GE: /* SF=OF or SF=0 */
17376 case LT: /* SF<>OF or SF=1 */
17377 if (op1 == const0_rtx)
17380 /* For other cases Carry flag is not required. */
17382 /* Codes doable only with sign flag when comparing
17383 against zero, but we miss jump instruction for it
17384 so we need to use relational tests against overflow
17385 that thus needs to be zero. */
17386 case GT: /* ZF=0 & SF=OF */
17387 case LE: /* ZF=1 | SF<>OF */
17388 if (op1 == const0_rtx)
17392 /* strcmp pattern do (use flags) and combine may ask us for proper
17397 gcc_unreachable ();
17401 /* Return the fixed registers used for condition codes. */
17404 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17411 /* If two condition code modes are compatible, return a condition code
17412 mode which is compatible with both. Otherwise, return
17415 static enum machine_mode
17416 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17421 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17424 if ((m1 == CCGCmode && m2 == CCGOCmode)
17425 || (m1 == CCGOCmode && m2 == CCGCmode))
17431 gcc_unreachable ();
17461 /* These are only compatible with themselves, which we already
17468 /* Return a comparison we can do and that it is equivalent to
17469 swap_condition (code) apart possibly from orderedness.
17470 But, never change orderedness if TARGET_IEEE_FP, returning
17471 UNKNOWN in that case if necessary. */
17473 static enum rtx_code
17474 ix86_fp_swap_condition (enum rtx_code code)
17478 case GT: /* GTU - CF=0 & ZF=0 */
17479 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17480 case GE: /* GEU - CF=0 */
17481 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17482 case UNLT: /* LTU - CF=1 */
17483 return TARGET_IEEE_FP ? UNKNOWN : GT;
17484 case UNLE: /* LEU - CF=1 | ZF=1 */
17485 return TARGET_IEEE_FP ? UNKNOWN : GE;
17487 return swap_condition (code);
17491 /* Return cost of comparison CODE using the best strategy for performance.
17492 All following functions do use number of instructions as a cost metrics.
17493 In future this should be tweaked to compute bytes for optimize_size and
17494 take into account performance of various instructions on various CPUs. */
17497 ix86_fp_comparison_cost (enum rtx_code code)
17501 /* The cost of code using bit-twiddling on %ah. */
17518 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17522 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17525 gcc_unreachable ();
17528 switch (ix86_fp_comparison_strategy (code))
17530 case IX86_FPCMP_COMI:
17531 return arith_cost > 4 ? 3 : 2;
17532 case IX86_FPCMP_SAHF:
17533 return arith_cost > 4 ? 4 : 3;
17539 /* Return strategy to use for floating-point. We assume that fcomi is always
17540 preferrable where available, since that is also true when looking at size
17541 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17543 enum ix86_fpcmp_strategy
17544 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17546 /* Do fcomi/sahf based test when profitable. */
17549 return IX86_FPCMP_COMI;
17551 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17552 return IX86_FPCMP_SAHF;
17554 return IX86_FPCMP_ARITH;
17557 /* Swap, force into registers, or otherwise massage the two operands
17558 to a fp comparison. The operands are updated in place; the new
17559 comparison code is returned. */
17561 static enum rtx_code
17562 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17564 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17565 rtx op0 = *pop0, op1 = *pop1;
17566 enum machine_mode op_mode = GET_MODE (op0);
17567 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17569 /* All of the unordered compare instructions only work on registers.
17570 The same is true of the fcomi compare instructions. The XFmode
17571 compare instructions require registers except when comparing
17572 against zero or when converting operand 1 from fixed point to
17576 && (fpcmp_mode == CCFPUmode
17577 || (op_mode == XFmode
17578 && ! (standard_80387_constant_p (op0) == 1
17579 || standard_80387_constant_p (op1) == 1)
17580 && GET_CODE (op1) != FLOAT)
17581 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17583 op0 = force_reg (op_mode, op0);
17584 op1 = force_reg (op_mode, op1);
17588 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17589 things around if they appear profitable, otherwise force op0
17590 into a register. */
17592 if (standard_80387_constant_p (op0) == 0
17594 && ! (standard_80387_constant_p (op1) == 0
17597 enum rtx_code new_code = ix86_fp_swap_condition (code);
17598 if (new_code != UNKNOWN)
17601 tmp = op0, op0 = op1, op1 = tmp;
17607 op0 = force_reg (op_mode, op0);
17609 if (CONSTANT_P (op1))
17611 int tmp = standard_80387_constant_p (op1);
17613 op1 = validize_mem (force_const_mem (op_mode, op1));
17617 op1 = force_reg (op_mode, op1);
17620 op1 = force_reg (op_mode, op1);
17624 /* Try to rearrange the comparison to make it cheaper. */
17625 if (ix86_fp_comparison_cost (code)
17626 > ix86_fp_comparison_cost (swap_condition (code))
17627 && (REG_P (op1) || can_create_pseudo_p ()))
17630 tmp = op0, op0 = op1, op1 = tmp;
17631 code = swap_condition (code);
17633 op0 = force_reg (op_mode, op0);
17641 /* Convert comparison codes we use to represent FP comparison to integer
17642 code that will result in proper branch. Return UNKNOWN if no such code
17646 ix86_fp_compare_code_to_integer (enum rtx_code code)
17675 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17678 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17680 enum machine_mode fpcmp_mode, intcmp_mode;
17683 fpcmp_mode = ix86_fp_compare_mode (code);
17684 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17686 /* Do fcomi/sahf based test when profitable. */
17687 switch (ix86_fp_comparison_strategy (code))
17689 case IX86_FPCMP_COMI:
17690 intcmp_mode = fpcmp_mode;
17691 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17692 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17697 case IX86_FPCMP_SAHF:
17698 intcmp_mode = fpcmp_mode;
17699 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17700 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17704 scratch = gen_reg_rtx (HImode);
17705 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17706 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17709 case IX86_FPCMP_ARITH:
17710 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17711 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17712 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17714 scratch = gen_reg_rtx (HImode);
17715 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17717 /* In the unordered case, we have to check C2 for NaN's, which
17718 doesn't happen to work out to anything nice combination-wise.
17719 So do some bit twiddling on the value we've got in AH to come
17720 up with an appropriate set of condition codes. */
17722 intcmp_mode = CCNOmode;
17727 if (code == GT || !TARGET_IEEE_FP)
17729 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17734 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17735 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17736 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17737 intcmp_mode = CCmode;
17743 if (code == LT && TARGET_IEEE_FP)
17745 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17746 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17747 intcmp_mode = CCmode;
17752 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17758 if (code == GE || !TARGET_IEEE_FP)
17760 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17765 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17766 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17772 if (code == LE && TARGET_IEEE_FP)
17774 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17775 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17776 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17777 intcmp_mode = CCmode;
17782 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17788 if (code == EQ && TARGET_IEEE_FP)
17790 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17791 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17792 intcmp_mode = CCmode;
17797 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17803 if (code == NE && TARGET_IEEE_FP)
17805 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17806 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17812 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17818 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17827 gcc_unreachable ();
17835 /* Return the test that should be put into the flags user, i.e.
17836 the bcc, scc, or cmov instruction. */
17837 return gen_rtx_fmt_ee (code, VOIDmode,
17838 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17843 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17847 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17848 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17850 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17852 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17853 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17856 ret = ix86_expand_int_compare (code, op0, op1);
17862 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17864 enum machine_mode mode = GET_MODE (op0);
17876 tmp = ix86_expand_compare (code, op0, op1);
17877 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17878 gen_rtx_LABEL_REF (VOIDmode, label),
17880 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17887 /* Expand DImode branch into multiple compare+branch. */
17889 rtx lo[2], hi[2], label2;
17890 enum rtx_code code1, code2, code3;
17891 enum machine_mode submode;
17893 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17895 tmp = op0, op0 = op1, op1 = tmp;
17896 code = swap_condition (code);
17899 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17900 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17902 submode = mode == DImode ? SImode : DImode;
17904 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17905 avoid two branches. This costs one extra insn, so disable when
17906 optimizing for size. */
17908 if ((code == EQ || code == NE)
17909 && (!optimize_insn_for_size_p ()
17910 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17915 if (hi[1] != const0_rtx)
17916 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17917 NULL_RTX, 0, OPTAB_WIDEN);
17920 if (lo[1] != const0_rtx)
17921 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17922 NULL_RTX, 0, OPTAB_WIDEN);
17924 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17925 NULL_RTX, 0, OPTAB_WIDEN);
17927 ix86_expand_branch (code, tmp, const0_rtx, label);
17931 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17932 op1 is a constant and the low word is zero, then we can just
17933 examine the high word. Similarly for low word -1 and
17934 less-or-equal-than or greater-than. */
17936 if (CONST_INT_P (hi[1]))
17939 case LT: case LTU: case GE: case GEU:
17940 if (lo[1] == const0_rtx)
17942 ix86_expand_branch (code, hi[0], hi[1], label);
17946 case LE: case LEU: case GT: case GTU:
17947 if (lo[1] == constm1_rtx)
17949 ix86_expand_branch (code, hi[0], hi[1], label);
17957 /* Otherwise, we need two or three jumps. */
17959 label2 = gen_label_rtx ();
17962 code2 = swap_condition (code);
17963 code3 = unsigned_condition (code);
17967 case LT: case GT: case LTU: case GTU:
17970 case LE: code1 = LT; code2 = GT; break;
17971 case GE: code1 = GT; code2 = LT; break;
17972 case LEU: code1 = LTU; code2 = GTU; break;
17973 case GEU: code1 = GTU; code2 = LTU; break;
17975 case EQ: code1 = UNKNOWN; code2 = NE; break;
17976 case NE: code2 = UNKNOWN; break;
17979 gcc_unreachable ();
17984 * if (hi(a) < hi(b)) goto true;
17985 * if (hi(a) > hi(b)) goto false;
17986 * if (lo(a) < lo(b)) goto true;
17990 if (code1 != UNKNOWN)
17991 ix86_expand_branch (code1, hi[0], hi[1], label);
17992 if (code2 != UNKNOWN)
17993 ix86_expand_branch (code2, hi[0], hi[1], label2);
17995 ix86_expand_branch (code3, lo[0], lo[1], label);
17997 if (code2 != UNKNOWN)
17998 emit_label (label2);
18003 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
18008 /* Split branch based on floating point condition. */
18010 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
18011 rtx target1, rtx target2, rtx tmp, rtx pushed)
18016 if (target2 != pc_rtx)
18019 code = reverse_condition_maybe_unordered (code);
18024 condition = ix86_expand_fp_compare (code, op1, op2,
18027 /* Remove pushed operand from stack. */
18029 ix86_free_from_memory (GET_MODE (pushed));
18031 i = emit_jump_insn (gen_rtx_SET
18033 gen_rtx_IF_THEN_ELSE (VOIDmode,
18034 condition, target1, target2)));
18035 if (split_branch_probability >= 0)
18036 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
18040 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18044 gcc_assert (GET_MODE (dest) == QImode);
18046 ret = ix86_expand_compare (code, op0, op1);
18047 PUT_MODE (ret, QImode);
18048 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
18051 /* Expand comparison setting or clearing carry flag. Return true when
18052 successful and set pop for the operation. */
18054 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
18056 enum machine_mode mode =
18057 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
18059 /* Do not handle double-mode compares that go through special path. */
18060 if (mode == (TARGET_64BIT ? TImode : DImode))
18063 if (SCALAR_FLOAT_MODE_P (mode))
18065 rtx compare_op, compare_seq;
18067 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18069 /* Shortcut: following common codes never translate
18070 into carry flag compares. */
18071 if (code == EQ || code == NE || code == UNEQ || code == LTGT
18072 || code == ORDERED || code == UNORDERED)
18075 /* These comparisons require zero flag; swap operands so they won't. */
18076 if ((code == GT || code == UNLE || code == LE || code == UNGT)
18077 && !TARGET_IEEE_FP)
18082 code = swap_condition (code);
18085 /* Try to expand the comparison and verify that we end up with
18086 carry flag based comparison. This fails to be true only when
18087 we decide to expand comparison using arithmetic that is not
18088 too common scenario. */
18090 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
18091 compare_seq = get_insns ();
18094 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
18095 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
18096 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
18098 code = GET_CODE (compare_op);
18100 if (code != LTU && code != GEU)
18103 emit_insn (compare_seq);
18108 if (!INTEGRAL_MODE_P (mode))
18117 /* Convert a==0 into (unsigned)a<1. */
18120 if (op1 != const0_rtx)
18123 code = (code == EQ ? LTU : GEU);
18126 /* Convert a>b into b<a or a>=b-1. */
18129 if (CONST_INT_P (op1))
18131 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18132 /* Bail out on overflow. We still can swap operands but that
18133 would force loading of the constant into register. */
18134 if (op1 == const0_rtx
18135 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18137 code = (code == GTU ? GEU : LTU);
18144 code = (code == GTU ? LTU : GEU);
18148 /* Convert a>=0 into (unsigned)a<0x80000000. */
18151 if (mode == DImode || op1 != const0_rtx)
18153 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18154 code = (code == LT ? GEU : LTU);
18158 if (mode == DImode || op1 != constm1_rtx)
18160 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18161 code = (code == LE ? GEU : LTU);
18167 /* Swapping operands may cause constant to appear as first operand. */
18168 if (!nonimmediate_operand (op0, VOIDmode))
18170 if (!can_create_pseudo_p ())
18172 op0 = force_reg (mode, op0);
18174 *pop = ix86_expand_compare (code, op0, op1);
18175 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18180 ix86_expand_int_movcc (rtx operands[])
18182 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18183 rtx compare_seq, compare_op;
18184 enum machine_mode mode = GET_MODE (operands[0]);
18185 bool sign_bit_compare_p = false;
18186 rtx op0 = XEXP (operands[1], 0);
18187 rtx op1 = XEXP (operands[1], 1);
18190 compare_op = ix86_expand_compare (code, op0, op1);
18191 compare_seq = get_insns ();
18194 compare_code = GET_CODE (compare_op);
18196 if ((op1 == const0_rtx && (code == GE || code == LT))
18197 || (op1 == constm1_rtx && (code == GT || code == LE)))
18198 sign_bit_compare_p = true;
18200 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18201 HImode insns, we'd be swallowed in word prefix ops. */
18203 if ((mode != HImode || TARGET_FAST_PREFIX)
18204 && (mode != (TARGET_64BIT ? TImode : DImode))
18205 && CONST_INT_P (operands[2])
18206 && CONST_INT_P (operands[3]))
18208 rtx out = operands[0];
18209 HOST_WIDE_INT ct = INTVAL (operands[2]);
18210 HOST_WIDE_INT cf = INTVAL (operands[3]);
18211 HOST_WIDE_INT diff;
18214 /* Sign bit compares are better done using shifts than we do by using
18216 if (sign_bit_compare_p
18217 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18219 /* Detect overlap between destination and compare sources. */
18222 if (!sign_bit_compare_p)
18225 bool fpcmp = false;
18227 compare_code = GET_CODE (compare_op);
18229 flags = XEXP (compare_op, 0);
18231 if (GET_MODE (flags) == CCFPmode
18232 || GET_MODE (flags) == CCFPUmode)
18236 = ix86_fp_compare_code_to_integer (compare_code);
18239 /* To simplify rest of code, restrict to the GEU case. */
18240 if (compare_code == LTU)
18242 HOST_WIDE_INT tmp = ct;
18245 compare_code = reverse_condition (compare_code);
18246 code = reverse_condition (code);
18251 PUT_CODE (compare_op,
18252 reverse_condition_maybe_unordered
18253 (GET_CODE (compare_op)));
18255 PUT_CODE (compare_op,
18256 reverse_condition (GET_CODE (compare_op)));
18260 if (reg_overlap_mentioned_p (out, op0)
18261 || reg_overlap_mentioned_p (out, op1))
18262 tmp = gen_reg_rtx (mode);
18264 if (mode == DImode)
18265 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18267 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18268 flags, compare_op));
18272 if (code == GT || code == GE)
18273 code = reverse_condition (code);
18276 HOST_WIDE_INT tmp = ct;
18281 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18294 tmp = expand_simple_binop (mode, PLUS,
18296 copy_rtx (tmp), 1, OPTAB_DIRECT);
18307 tmp = expand_simple_binop (mode, IOR,
18309 copy_rtx (tmp), 1, OPTAB_DIRECT);
18311 else if (diff == -1 && ct)
18321 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18323 tmp = expand_simple_binop (mode, PLUS,
18324 copy_rtx (tmp), GEN_INT (cf),
18325 copy_rtx (tmp), 1, OPTAB_DIRECT);
18333 * andl cf - ct, dest
18343 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18346 tmp = expand_simple_binop (mode, AND,
18348 gen_int_mode (cf - ct, mode),
18349 copy_rtx (tmp), 1, OPTAB_DIRECT);
18351 tmp = expand_simple_binop (mode, PLUS,
18352 copy_rtx (tmp), GEN_INT (ct),
18353 copy_rtx (tmp), 1, OPTAB_DIRECT);
18356 if (!rtx_equal_p (tmp, out))
18357 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18364 enum machine_mode cmp_mode = GET_MODE (op0);
18367 tmp = ct, ct = cf, cf = tmp;
18370 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18372 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18374 /* We may be reversing unordered compare to normal compare, that
18375 is not valid in general (we may convert non-trapping condition
18376 to trapping one), however on i386 we currently emit all
18377 comparisons unordered. */
18378 compare_code = reverse_condition_maybe_unordered (compare_code);
18379 code = reverse_condition_maybe_unordered (code);
18383 compare_code = reverse_condition (compare_code);
18384 code = reverse_condition (code);
18388 compare_code = UNKNOWN;
18389 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18390 && CONST_INT_P (op1))
18392 if (op1 == const0_rtx
18393 && (code == LT || code == GE))
18394 compare_code = code;
18395 else if (op1 == constm1_rtx)
18399 else if (code == GT)
18404 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18405 if (compare_code != UNKNOWN
18406 && GET_MODE (op0) == GET_MODE (out)
18407 && (cf == -1 || ct == -1))
18409 /* If lea code below could be used, only optimize
18410 if it results in a 2 insn sequence. */
18412 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18413 || diff == 3 || diff == 5 || diff == 9)
18414 || (compare_code == LT && ct == -1)
18415 || (compare_code == GE && cf == -1))
18418 * notl op1 (if necessary)
18426 code = reverse_condition (code);
18429 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18431 out = expand_simple_binop (mode, IOR,
18433 out, 1, OPTAB_DIRECT);
18434 if (out != operands[0])
18435 emit_move_insn (operands[0], out);
18442 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18443 || diff == 3 || diff == 5 || diff == 9)
18444 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18446 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18452 * lea cf(dest*(ct-cf)),dest
18456 * This also catches the degenerate setcc-only case.
18462 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18465 /* On x86_64 the lea instruction operates on Pmode, so we need
18466 to get arithmetics done in proper mode to match. */
18468 tmp = copy_rtx (out);
18472 out1 = copy_rtx (out);
18473 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18477 tmp = gen_rtx_PLUS (mode, tmp, out1);
18483 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18486 if (!rtx_equal_p (tmp, out))
18489 out = force_operand (tmp, copy_rtx (out));
18491 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18493 if (!rtx_equal_p (out, operands[0]))
18494 emit_move_insn (operands[0], copy_rtx (out));
18500 * General case: Jumpful:
18501 * xorl dest,dest cmpl op1, op2
18502 * cmpl op1, op2 movl ct, dest
18503 * setcc dest jcc 1f
18504 * decl dest movl cf, dest
18505 * andl (cf-ct),dest 1:
18508 * Size 20. Size 14.
18510 * This is reasonably steep, but branch mispredict costs are
18511 * high on modern cpus, so consider failing only if optimizing
18515 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18516 && BRANCH_COST (optimize_insn_for_speed_p (),
18521 enum machine_mode cmp_mode = GET_MODE (op0);
18526 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18528 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18530 /* We may be reversing unordered compare to normal compare,
18531 that is not valid in general (we may convert non-trapping
18532 condition to trapping one), however on i386 we currently
18533 emit all comparisons unordered. */
18534 code = reverse_condition_maybe_unordered (code);
18538 code = reverse_condition (code);
18539 if (compare_code != UNKNOWN)
18540 compare_code = reverse_condition (compare_code);
18544 if (compare_code != UNKNOWN)
18546 /* notl op1 (if needed)
18551 For x < 0 (resp. x <= -1) there will be no notl,
18552 so if possible swap the constants to get rid of the
18554 True/false will be -1/0 while code below (store flag
18555 followed by decrement) is 0/-1, so the constants need
18556 to be exchanged once more. */
18558 if (compare_code == GE || !cf)
18560 code = reverse_condition (code);
18565 HOST_WIDE_INT tmp = cf;
18570 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18574 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18576 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18578 copy_rtx (out), 1, OPTAB_DIRECT);
18581 out = expand_simple_binop (mode, AND, copy_rtx (out),
18582 gen_int_mode (cf - ct, mode),
18583 copy_rtx (out), 1, OPTAB_DIRECT);
18585 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18586 copy_rtx (out), 1, OPTAB_DIRECT);
18587 if (!rtx_equal_p (out, operands[0]))
18588 emit_move_insn (operands[0], copy_rtx (out));
18594 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18596 /* Try a few things more with specific constants and a variable. */
18599 rtx var, orig_out, out, tmp;
18601 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18604 /* If one of the two operands is an interesting constant, load a
18605 constant with the above and mask it in with a logical operation. */
18607 if (CONST_INT_P (operands[2]))
18610 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18611 operands[3] = constm1_rtx, op = and_optab;
18612 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18613 operands[3] = const0_rtx, op = ior_optab;
18617 else if (CONST_INT_P (operands[3]))
18620 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18621 operands[2] = constm1_rtx, op = and_optab;
18622 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18623 operands[2] = const0_rtx, op = ior_optab;
18630 orig_out = operands[0];
18631 tmp = gen_reg_rtx (mode);
18634 /* Recurse to get the constant loaded. */
18635 if (ix86_expand_int_movcc (operands) == 0)
18638 /* Mask in the interesting variable. */
18639 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18641 if (!rtx_equal_p (out, orig_out))
18642 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18648 * For comparison with above,
18658 if (! nonimmediate_operand (operands[2], mode))
18659 operands[2] = force_reg (mode, operands[2]);
18660 if (! nonimmediate_operand (operands[3], mode))
18661 operands[3] = force_reg (mode, operands[3]);
18663 if (! register_operand (operands[2], VOIDmode)
18665 || ! register_operand (operands[3], VOIDmode)))
18666 operands[2] = force_reg (mode, operands[2]);
18669 && ! register_operand (operands[3], VOIDmode))
18670 operands[3] = force_reg (mode, operands[3]);
18672 emit_insn (compare_seq);
18673 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18674 gen_rtx_IF_THEN_ELSE (mode,
18675 compare_op, operands[2],
18680 /* Swap, force into registers, or otherwise massage the two operands
18681 to an sse comparison with a mask result. Thus we differ a bit from
18682 ix86_prepare_fp_compare_args which expects to produce a flags result.
18684 The DEST operand exists to help determine whether to commute commutative
18685 operators. The POP0/POP1 operands are updated in place. The new
18686 comparison code is returned, or UNKNOWN if not implementable. */
18688 static enum rtx_code
18689 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18690 rtx *pop0, rtx *pop1)
18698 /* We have no LTGT as an operator. We could implement it with
18699 NE & ORDERED, but this requires an extra temporary. It's
18700 not clear that it's worth it. */
18707 /* These are supported directly. */
18714 /* For commutative operators, try to canonicalize the destination
18715 operand to be first in the comparison - this helps reload to
18716 avoid extra moves. */
18717 if (!dest || !rtx_equal_p (dest, *pop1))
18725 /* These are not supported directly. Swap the comparison operands
18726 to transform into something that is supported. */
18730 code = swap_condition (code);
18734 gcc_unreachable ();
18740 /* Detect conditional moves that exactly match min/max operational
18741 semantics. Note that this is IEEE safe, as long as we don't
18742 interchange the operands.
18744 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18745 and TRUE if the operation is successful and instructions are emitted. */
18748 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18749 rtx cmp_op1, rtx if_true, rtx if_false)
18751 enum machine_mode mode;
18757 else if (code == UNGE)
18760 if_true = if_false;
18766 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18768 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18773 mode = GET_MODE (dest);
18775 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18776 but MODE may be a vector mode and thus not appropriate. */
18777 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18779 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18782 if_true = force_reg (mode, if_true);
18783 v = gen_rtvec (2, if_true, if_false);
18784 tmp = gen_rtx_UNSPEC (mode, v, u);
18788 code = is_min ? SMIN : SMAX;
18789 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18792 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18796 /* Expand an sse vector comparison. Return the register with the result. */
18799 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18800 rtx op_true, rtx op_false)
18802 enum machine_mode mode = GET_MODE (dest);
18805 cmp_op0 = force_reg (mode, cmp_op0);
18806 if (!nonimmediate_operand (cmp_op1, mode))
18807 cmp_op1 = force_reg (mode, cmp_op1);
18810 || reg_overlap_mentioned_p (dest, op_true)
18811 || reg_overlap_mentioned_p (dest, op_false))
18812 dest = gen_reg_rtx (mode);
18814 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18820 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18821 operations. This is used for both scalar and vector conditional moves. */
18824 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18826 enum machine_mode mode = GET_MODE (dest);
18829 if (op_false == CONST0_RTX (mode))
18831 op_true = force_reg (mode, op_true);
18832 x = gen_rtx_AND (mode, cmp, op_true);
18833 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18835 else if (op_true == CONST0_RTX (mode))
18837 op_false = force_reg (mode, op_false);
18838 x = gen_rtx_NOT (mode, cmp);
18839 x = gen_rtx_AND (mode, x, op_false);
18840 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18842 else if (TARGET_XOP)
18844 rtx pcmov = gen_rtx_SET (mode, dest,
18845 gen_rtx_IF_THEN_ELSE (mode, cmp,
18852 op_true = force_reg (mode, op_true);
18853 op_false = force_reg (mode, op_false);
18855 t2 = gen_reg_rtx (mode);
18857 t3 = gen_reg_rtx (mode);
18861 x = gen_rtx_AND (mode, op_true, cmp);
18862 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18864 x = gen_rtx_NOT (mode, cmp);
18865 x = gen_rtx_AND (mode, x, op_false);
18866 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18868 x = gen_rtx_IOR (mode, t3, t2);
18869 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18873 /* Expand a floating-point conditional move. Return true if successful. */
18876 ix86_expand_fp_movcc (rtx operands[])
18878 enum machine_mode mode = GET_MODE (operands[0]);
18879 enum rtx_code code = GET_CODE (operands[1]);
18880 rtx tmp, compare_op;
18881 rtx op0 = XEXP (operands[1], 0);
18882 rtx op1 = XEXP (operands[1], 1);
18884 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18886 enum machine_mode cmode;
18888 /* Since we've no cmove for sse registers, don't force bad register
18889 allocation just to gain access to it. Deny movcc when the
18890 comparison mode doesn't match the move mode. */
18891 cmode = GET_MODE (op0);
18892 if (cmode == VOIDmode)
18893 cmode = GET_MODE (op1);
18897 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18898 if (code == UNKNOWN)
18901 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18902 operands[2], operands[3]))
18905 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18906 operands[2], operands[3]);
18907 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18911 /* The floating point conditional move instructions don't directly
18912 support conditions resulting from a signed integer comparison. */
18914 compare_op = ix86_expand_compare (code, op0, op1);
18915 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18917 tmp = gen_reg_rtx (QImode);
18918 ix86_expand_setcc (tmp, code, op0, op1);
18920 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18923 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18924 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18925 operands[2], operands[3])));
18930 /* Expand a floating-point vector conditional move; a vcond operation
18931 rather than a movcc operation. */
18934 ix86_expand_fp_vcond (rtx operands[])
18936 enum rtx_code code = GET_CODE (operands[3]);
18939 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18940 &operands[4], &operands[5]);
18941 if (code == UNKNOWN)
18944 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18945 operands[5], operands[1], operands[2]))
18948 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18949 operands[1], operands[2]);
18950 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18954 /* Expand a signed/unsigned integral vector conditional move. */
18957 ix86_expand_int_vcond (rtx operands[])
18959 enum machine_mode mode = GET_MODE (operands[0]);
18960 enum rtx_code code = GET_CODE (operands[3]);
18961 bool negate = false;
18964 cop0 = operands[4];
18965 cop1 = operands[5];
18967 /* XOP supports all of the comparisons on all vector int types. */
18970 /* Canonicalize the comparison to EQ, GT, GTU. */
18981 code = reverse_condition (code);
18987 code = reverse_condition (code);
18993 code = swap_condition (code);
18994 x = cop0, cop0 = cop1, cop1 = x;
18998 gcc_unreachable ();
19001 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19002 if (mode == V2DImode)
19007 /* SSE4.1 supports EQ. */
19008 if (!TARGET_SSE4_1)
19014 /* SSE4.2 supports GT/GTU. */
19015 if (!TARGET_SSE4_2)
19020 gcc_unreachable ();
19024 /* Unsigned parallel compare is not supported by the hardware.
19025 Play some tricks to turn this into a signed comparison
19029 cop0 = force_reg (mode, cop0);
19037 rtx (*gen_sub3) (rtx, rtx, rtx);
19039 /* Subtract (-(INT MAX) - 1) from both operands to make
19041 mask = ix86_build_signbit_mask (mode, true, false);
19042 gen_sub3 = (mode == V4SImode
19043 ? gen_subv4si3 : gen_subv2di3);
19044 t1 = gen_reg_rtx (mode);
19045 emit_insn (gen_sub3 (t1, cop0, mask));
19047 t2 = gen_reg_rtx (mode);
19048 emit_insn (gen_sub3 (t2, cop1, mask));
19058 /* Perform a parallel unsigned saturating subtraction. */
19059 x = gen_reg_rtx (mode);
19060 emit_insn (gen_rtx_SET (VOIDmode, x,
19061 gen_rtx_US_MINUS (mode, cop0, cop1)));
19064 cop1 = CONST0_RTX (mode);
19070 gcc_unreachable ();
19075 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
19076 operands[1+negate], operands[2-negate]);
19078 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
19079 operands[2-negate]);
19083 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19084 true if we should do zero extension, else sign extension. HIGH_P is
19085 true if we want the N/2 high elements, else the low elements. */
19088 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19090 enum machine_mode imode = GET_MODE (operands[1]);
19095 rtx (*unpack)(rtx, rtx);
19101 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19103 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19107 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19109 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19113 unpack = gen_sse4_1_zero_extendv2siv2di2;
19115 unpack = gen_sse4_1_sign_extendv2siv2di2;
19118 gcc_unreachable ();
19123 /* Shift higher 8 bytes to lower 8 bytes. */
19124 tmp = gen_reg_rtx (imode);
19125 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
19126 gen_lowpart (V1TImode, operands[1]),
19132 emit_insn (unpack (operands[0], tmp));
19136 rtx (*unpack)(rtx, rtx, rtx);
19142 unpack = gen_vec_interleave_highv16qi;
19144 unpack = gen_vec_interleave_lowv16qi;
19148 unpack = gen_vec_interleave_highv8hi;
19150 unpack = gen_vec_interleave_lowv8hi;
19154 unpack = gen_vec_interleave_highv4si;
19156 unpack = gen_vec_interleave_lowv4si;
19159 gcc_unreachable ();
19162 dest = gen_lowpart (imode, operands[0]);
19165 tmp = force_reg (imode, CONST0_RTX (imode));
19167 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19168 operands[1], pc_rtx, pc_rtx);
19170 emit_insn (unpack (dest, operands[1], tmp));
19174 /* Expand conditional increment or decrement using adb/sbb instructions.
19175 The default case using setcc followed by the conditional move can be
19176 done by generic code. */
19178 ix86_expand_int_addcc (rtx operands[])
19180 enum rtx_code code = GET_CODE (operands[1]);
19182 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19184 rtx val = const0_rtx;
19185 bool fpcmp = false;
19186 enum machine_mode mode;
19187 rtx op0 = XEXP (operands[1], 0);
19188 rtx op1 = XEXP (operands[1], 1);
19190 if (operands[3] != const1_rtx
19191 && operands[3] != constm1_rtx)
19193 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19195 code = GET_CODE (compare_op);
19197 flags = XEXP (compare_op, 0);
19199 if (GET_MODE (flags) == CCFPmode
19200 || GET_MODE (flags) == CCFPUmode)
19203 code = ix86_fp_compare_code_to_integer (code);
19210 PUT_CODE (compare_op,
19211 reverse_condition_maybe_unordered
19212 (GET_CODE (compare_op)));
19214 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19217 mode = GET_MODE (operands[0]);
19219 /* Construct either adc or sbb insn. */
19220 if ((code == LTU) == (operands[3] == constm1_rtx))
19225 insn = gen_subqi3_carry;
19228 insn = gen_subhi3_carry;
19231 insn = gen_subsi3_carry;
19234 insn = gen_subdi3_carry;
19237 gcc_unreachable ();
19245 insn = gen_addqi3_carry;
19248 insn = gen_addhi3_carry;
19251 insn = gen_addsi3_carry;
19254 insn = gen_adddi3_carry;
19257 gcc_unreachable ();
19260 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19266 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19267 but works for floating pointer parameters and nonoffsetable memories.
19268 For pushes, it returns just stack offsets; the values will be saved
19269 in the right order. Maximally three parts are generated. */
19272 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19277 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19279 size = (GET_MODE_SIZE (mode) + 4) / 8;
19281 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19282 gcc_assert (size >= 2 && size <= 4);
19284 /* Optimize constant pool reference to immediates. This is used by fp
19285 moves, that force all constants to memory to allow combining. */
19286 if (MEM_P (operand) && MEM_READONLY_P (operand))
19288 rtx tmp = maybe_get_pool_constant (operand);
19293 if (MEM_P (operand) && !offsettable_memref_p (operand))
19295 /* The only non-offsetable memories we handle are pushes. */
19296 int ok = push_operand (operand, VOIDmode);
19300 operand = copy_rtx (operand);
19301 PUT_MODE (operand, Pmode);
19302 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19306 if (GET_CODE (operand) == CONST_VECTOR)
19308 enum machine_mode imode = int_mode_for_mode (mode);
19309 /* Caution: if we looked through a constant pool memory above,
19310 the operand may actually have a different mode now. That's
19311 ok, since we want to pun this all the way back to an integer. */
19312 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19313 gcc_assert (operand != NULL);
19319 if (mode == DImode)
19320 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19325 if (REG_P (operand))
19327 gcc_assert (reload_completed);
19328 for (i = 0; i < size; i++)
19329 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19331 else if (offsettable_memref_p (operand))
19333 operand = adjust_address (operand, SImode, 0);
19334 parts[0] = operand;
19335 for (i = 1; i < size; i++)
19336 parts[i] = adjust_address (operand, SImode, 4 * i);
19338 else if (GET_CODE (operand) == CONST_DOUBLE)
19343 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19347 real_to_target (l, &r, mode);
19348 parts[3] = gen_int_mode (l[3], SImode);
19349 parts[2] = gen_int_mode (l[2], SImode);
19352 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19353 parts[2] = gen_int_mode (l[2], SImode);
19356 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19359 gcc_unreachable ();
19361 parts[1] = gen_int_mode (l[1], SImode);
19362 parts[0] = gen_int_mode (l[0], SImode);
19365 gcc_unreachable ();
19370 if (mode == TImode)
19371 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19372 if (mode == XFmode || mode == TFmode)
19374 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19375 if (REG_P (operand))
19377 gcc_assert (reload_completed);
19378 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19379 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19381 else if (offsettable_memref_p (operand))
19383 operand = adjust_address (operand, DImode, 0);
19384 parts[0] = operand;
19385 parts[1] = adjust_address (operand, upper_mode, 8);
19387 else if (GET_CODE (operand) == CONST_DOUBLE)
19392 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19393 real_to_target (l, &r, mode);
19395 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19396 if (HOST_BITS_PER_WIDE_INT >= 64)
19399 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19400 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19403 parts[0] = immed_double_const (l[0], l[1], DImode);
19405 if (upper_mode == SImode)
19406 parts[1] = gen_int_mode (l[2], SImode);
19407 else if (HOST_BITS_PER_WIDE_INT >= 64)
19410 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19411 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19414 parts[1] = immed_double_const (l[2], l[3], DImode);
19417 gcc_unreachable ();
19424 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19425 Return false when normal moves are needed; true when all required
19426 insns have been emitted. Operands 2-4 contain the input values
19427 int the correct order; operands 5-7 contain the output values. */
19430 ix86_split_long_move (rtx operands[])
19435 int collisions = 0;
19436 enum machine_mode mode = GET_MODE (operands[0]);
19437 bool collisionparts[4];
19439 /* The DFmode expanders may ask us to move double.
19440 For 64bit target this is single move. By hiding the fact
19441 here we simplify i386.md splitters. */
19442 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19444 /* Optimize constant pool reference to immediates. This is used by
19445 fp moves, that force all constants to memory to allow combining. */
19447 if (MEM_P (operands[1])
19448 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19449 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19450 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19451 if (push_operand (operands[0], VOIDmode))
19453 operands[0] = copy_rtx (operands[0]);
19454 PUT_MODE (operands[0], Pmode);
19457 operands[0] = gen_lowpart (DImode, operands[0]);
19458 operands[1] = gen_lowpart (DImode, operands[1]);
19459 emit_move_insn (operands[0], operands[1]);
19463 /* The only non-offsettable memory we handle is push. */
19464 if (push_operand (operands[0], VOIDmode))
19467 gcc_assert (!MEM_P (operands[0])
19468 || offsettable_memref_p (operands[0]));
19470 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19471 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19473 /* When emitting push, take care for source operands on the stack. */
19474 if (push && MEM_P (operands[1])
19475 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19477 rtx src_base = XEXP (part[1][nparts - 1], 0);
19479 /* Compensate for the stack decrement by 4. */
19480 if (!TARGET_64BIT && nparts == 3
19481 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19482 src_base = plus_constant (src_base, 4);
19484 /* src_base refers to the stack pointer and is
19485 automatically decreased by emitted push. */
19486 for (i = 0; i < nparts; i++)
19487 part[1][i] = change_address (part[1][i],
19488 GET_MODE (part[1][i]), src_base);
19491 /* We need to do copy in the right order in case an address register
19492 of the source overlaps the destination. */
19493 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19497 for (i = 0; i < nparts; i++)
19500 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19501 if (collisionparts[i])
19505 /* Collision in the middle part can be handled by reordering. */
19506 if (collisions == 1 && nparts == 3 && collisionparts [1])
19508 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19509 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19511 else if (collisions == 1
19513 && (collisionparts [1] || collisionparts [2]))
19515 if (collisionparts [1])
19517 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19518 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19522 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19523 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19527 /* If there are more collisions, we can't handle it by reordering.
19528 Do an lea to the last part and use only one colliding move. */
19529 else if (collisions > 1)
19535 base = part[0][nparts - 1];
19537 /* Handle the case when the last part isn't valid for lea.
19538 Happens in 64-bit mode storing the 12-byte XFmode. */
19539 if (GET_MODE (base) != Pmode)
19540 base = gen_rtx_REG (Pmode, REGNO (base));
19542 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19543 part[1][0] = replace_equiv_address (part[1][0], base);
19544 for (i = 1; i < nparts; i++)
19546 tmp = plus_constant (base, UNITS_PER_WORD * i);
19547 part[1][i] = replace_equiv_address (part[1][i], tmp);
19558 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19559 emit_insn (gen_addsi3 (stack_pointer_rtx,
19560 stack_pointer_rtx, GEN_INT (-4)));
19561 emit_move_insn (part[0][2], part[1][2]);
19563 else if (nparts == 4)
19565 emit_move_insn (part[0][3], part[1][3]);
19566 emit_move_insn (part[0][2], part[1][2]);
19571 /* In 64bit mode we don't have 32bit push available. In case this is
19572 register, it is OK - we will just use larger counterpart. We also
19573 retype memory - these comes from attempt to avoid REX prefix on
19574 moving of second half of TFmode value. */
19575 if (GET_MODE (part[1][1]) == SImode)
19577 switch (GET_CODE (part[1][1]))
19580 part[1][1] = adjust_address (part[1][1], DImode, 0);
19584 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19588 gcc_unreachable ();
19591 if (GET_MODE (part[1][0]) == SImode)
19592 part[1][0] = part[1][1];
19595 emit_move_insn (part[0][1], part[1][1]);
19596 emit_move_insn (part[0][0], part[1][0]);
19600 /* Choose correct order to not overwrite the source before it is copied. */
19601 if ((REG_P (part[0][0])
19602 && REG_P (part[1][1])
19603 && (REGNO (part[0][0]) == REGNO (part[1][1])
19605 && REGNO (part[0][0]) == REGNO (part[1][2]))
19607 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19609 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19611 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19613 operands[2 + i] = part[0][j];
19614 operands[6 + i] = part[1][j];
19619 for (i = 0; i < nparts; i++)
19621 operands[2 + i] = part[0][i];
19622 operands[6 + i] = part[1][i];
19626 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19627 if (optimize_insn_for_size_p ())
19629 for (j = 0; j < nparts - 1; j++)
19630 if (CONST_INT_P (operands[6 + j])
19631 && operands[6 + j] != const0_rtx
19632 && REG_P (operands[2 + j]))
19633 for (i = j; i < nparts - 1; i++)
19634 if (CONST_INT_P (operands[7 + i])
19635 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19636 operands[7 + i] = operands[2 + j];
19639 for (i = 0; i < nparts; i++)
19640 emit_move_insn (operands[2 + i], operands[6 + i]);
19645 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19646 left shift by a constant, either using a single shift or
19647 a sequence of add instructions. */
19650 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19652 rtx (*insn)(rtx, rtx, rtx);
19655 || (count * ix86_cost->add <= ix86_cost->shift_const
19656 && !optimize_insn_for_size_p ()))
19658 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19659 while (count-- > 0)
19660 emit_insn (insn (operand, operand, operand));
19664 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19665 emit_insn (insn (operand, operand, GEN_INT (count)));
19670 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19672 rtx (*gen_ashl3)(rtx, rtx, rtx);
19673 rtx (*gen_shld)(rtx, rtx, rtx);
19674 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19676 rtx low[2], high[2];
19679 if (CONST_INT_P (operands[2]))
19681 split_double_mode (mode, operands, 2, low, high);
19682 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19684 if (count >= half_width)
19686 emit_move_insn (high[0], low[1]);
19687 emit_move_insn (low[0], const0_rtx);
19689 if (count > half_width)
19690 ix86_expand_ashl_const (high[0], count - half_width, mode);
19694 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19696 if (!rtx_equal_p (operands[0], operands[1]))
19697 emit_move_insn (operands[0], operands[1]);
19699 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19700 ix86_expand_ashl_const (low[0], count, mode);
19705 split_double_mode (mode, operands, 1, low, high);
19707 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19709 if (operands[1] == const1_rtx)
19711 /* Assuming we've chosen a QImode capable registers, then 1 << N
19712 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19713 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19715 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19717 ix86_expand_clear (low[0]);
19718 ix86_expand_clear (high[0]);
19719 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19721 d = gen_lowpart (QImode, low[0]);
19722 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19723 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19724 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19726 d = gen_lowpart (QImode, high[0]);
19727 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19728 s = gen_rtx_NE (QImode, flags, const0_rtx);
19729 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19732 /* Otherwise, we can get the same results by manually performing
19733 a bit extract operation on bit 5/6, and then performing the two
19734 shifts. The two methods of getting 0/1 into low/high are exactly
19735 the same size. Avoiding the shift in the bit extract case helps
19736 pentium4 a bit; no one else seems to care much either way. */
19739 enum machine_mode half_mode;
19740 rtx (*gen_lshr3)(rtx, rtx, rtx);
19741 rtx (*gen_and3)(rtx, rtx, rtx);
19742 rtx (*gen_xor3)(rtx, rtx, rtx);
19743 HOST_WIDE_INT bits;
19746 if (mode == DImode)
19748 half_mode = SImode;
19749 gen_lshr3 = gen_lshrsi3;
19750 gen_and3 = gen_andsi3;
19751 gen_xor3 = gen_xorsi3;
19756 half_mode = DImode;
19757 gen_lshr3 = gen_lshrdi3;
19758 gen_and3 = gen_anddi3;
19759 gen_xor3 = gen_xordi3;
19763 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19764 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19766 x = gen_lowpart (half_mode, operands[2]);
19767 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19769 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19770 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19771 emit_move_insn (low[0], high[0]);
19772 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19775 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19776 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19780 if (operands[1] == constm1_rtx)
19782 /* For -1 << N, we can avoid the shld instruction, because we
19783 know that we're shifting 0...31/63 ones into a -1. */
19784 emit_move_insn (low[0], constm1_rtx);
19785 if (optimize_insn_for_size_p ())
19786 emit_move_insn (high[0], low[0]);
19788 emit_move_insn (high[0], constm1_rtx);
19792 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19794 if (!rtx_equal_p (operands[0], operands[1]))
19795 emit_move_insn (operands[0], operands[1]);
19797 split_double_mode (mode, operands, 1, low, high);
19798 emit_insn (gen_shld (high[0], low[0], operands[2]));
19801 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19803 if (TARGET_CMOVE && scratch)
19805 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19806 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19808 ix86_expand_clear (scratch);
19809 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19813 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19814 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19816 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19821 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19823 rtx (*gen_ashr3)(rtx, rtx, rtx)
19824 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19825 rtx (*gen_shrd)(rtx, rtx, rtx);
19826 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19828 rtx low[2], high[2];
19831 if (CONST_INT_P (operands[2]))
19833 split_double_mode (mode, operands, 2, low, high);
19834 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19836 if (count == GET_MODE_BITSIZE (mode) - 1)
19838 emit_move_insn (high[0], high[1]);
19839 emit_insn (gen_ashr3 (high[0], high[0],
19840 GEN_INT (half_width - 1)));
19841 emit_move_insn (low[0], high[0]);
19844 else if (count >= half_width)
19846 emit_move_insn (low[0], high[1]);
19847 emit_move_insn (high[0], low[0]);
19848 emit_insn (gen_ashr3 (high[0], high[0],
19849 GEN_INT (half_width - 1)));
19851 if (count > half_width)
19852 emit_insn (gen_ashr3 (low[0], low[0],
19853 GEN_INT (count - half_width)));
19857 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19859 if (!rtx_equal_p (operands[0], operands[1]))
19860 emit_move_insn (operands[0], operands[1]);
19862 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19863 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19868 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19870 if (!rtx_equal_p (operands[0], operands[1]))
19871 emit_move_insn (operands[0], operands[1]);
19873 split_double_mode (mode, operands, 1, low, high);
19875 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19876 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19878 if (TARGET_CMOVE && scratch)
19880 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19881 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19883 emit_move_insn (scratch, high[0]);
19884 emit_insn (gen_ashr3 (scratch, scratch,
19885 GEN_INT (half_width - 1)));
19886 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19891 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19892 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19894 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19900 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19902 rtx (*gen_lshr3)(rtx, rtx, rtx)
19903 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19904 rtx (*gen_shrd)(rtx, rtx, rtx);
19905 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19907 rtx low[2], high[2];
19910 if (CONST_INT_P (operands[2]))
19912 split_double_mode (mode, operands, 2, low, high);
19913 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19915 if (count >= half_width)
19917 emit_move_insn (low[0], high[1]);
19918 ix86_expand_clear (high[0]);
19920 if (count > half_width)
19921 emit_insn (gen_lshr3 (low[0], low[0],
19922 GEN_INT (count - half_width)));
19926 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19928 if (!rtx_equal_p (operands[0], operands[1]))
19929 emit_move_insn (operands[0], operands[1]);
19931 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19932 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19937 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19939 if (!rtx_equal_p (operands[0], operands[1]))
19940 emit_move_insn (operands[0], operands[1]);
19942 split_double_mode (mode, operands, 1, low, high);
19944 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19945 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19947 if (TARGET_CMOVE && scratch)
19949 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19950 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19952 ix86_expand_clear (scratch);
19953 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19958 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19959 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19961 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19966 /* Predict just emitted jump instruction to be taken with probability PROB. */
19968 predict_jump (int prob)
19970 rtx insn = get_last_insn ();
19971 gcc_assert (JUMP_P (insn));
19972 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19975 /* Helper function for the string operations below. Dest VARIABLE whether
19976 it is aligned to VALUE bytes. If true, jump to the label. */
19978 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19980 rtx label = gen_label_rtx ();
19981 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19982 if (GET_MODE (variable) == DImode)
19983 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19985 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19986 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19989 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19991 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19995 /* Adjust COUNTER by the VALUE. */
19997 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19999 rtx (*gen_add)(rtx, rtx, rtx)
20000 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
20002 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
20005 /* Zero extend possibly SImode EXP to Pmode register. */
20007 ix86_zero_extend_to_Pmode (rtx exp)
20010 if (GET_MODE (exp) == VOIDmode)
20011 return force_reg (Pmode, exp);
20012 if (GET_MODE (exp) == Pmode)
20013 return copy_to_mode_reg (Pmode, exp);
20014 r = gen_reg_rtx (Pmode);
20015 emit_insn (gen_zero_extendsidi2 (r, exp));
20019 /* Divide COUNTREG by SCALE. */
20021 scale_counter (rtx countreg, int scale)
20027 if (CONST_INT_P (countreg))
20028 return GEN_INT (INTVAL (countreg) / scale);
20029 gcc_assert (REG_P (countreg));
20031 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
20032 GEN_INT (exact_log2 (scale)),
20033 NULL, 1, OPTAB_DIRECT);
20037 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20038 DImode for constant loop counts. */
20040 static enum machine_mode
20041 counter_mode (rtx count_exp)
20043 if (GET_MODE (count_exp) != VOIDmode)
20044 return GET_MODE (count_exp);
20045 if (!CONST_INT_P (count_exp))
20047 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
20052 /* When SRCPTR is non-NULL, output simple loop to move memory
20053 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20054 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20055 equivalent loop to set memory by VALUE (supposed to be in MODE).
20057 The size is rounded down to whole number of chunk size moved at once.
20058 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20062 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
20063 rtx destptr, rtx srcptr, rtx value,
20064 rtx count, enum machine_mode mode, int unroll,
20067 rtx out_label, top_label, iter, tmp;
20068 enum machine_mode iter_mode = counter_mode (count);
20069 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
20070 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
20076 top_label = gen_label_rtx ();
20077 out_label = gen_label_rtx ();
20078 iter = gen_reg_rtx (iter_mode);
20080 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
20081 NULL, 1, OPTAB_DIRECT);
20082 /* Those two should combine. */
20083 if (piece_size == const1_rtx)
20085 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
20087 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20089 emit_move_insn (iter, const0_rtx);
20091 emit_label (top_label);
20093 tmp = convert_modes (Pmode, iter_mode, iter, true);
20094 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
20095 destmem = change_address (destmem, mode, x_addr);
20099 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
20100 srcmem = change_address (srcmem, mode, y_addr);
20102 /* When unrolling for chips that reorder memory reads and writes,
20103 we can save registers by using single temporary.
20104 Also using 4 temporaries is overkill in 32bit mode. */
20105 if (!TARGET_64BIT && 0)
20107 for (i = 0; i < unroll; i++)
20112 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20114 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20116 emit_move_insn (destmem, srcmem);
20122 gcc_assert (unroll <= 4);
20123 for (i = 0; i < unroll; i++)
20125 tmpreg[i] = gen_reg_rtx (mode);
20129 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20131 emit_move_insn (tmpreg[i], srcmem);
20133 for (i = 0; i < unroll; i++)
20138 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20140 emit_move_insn (destmem, tmpreg[i]);
20145 for (i = 0; i < unroll; i++)
20149 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20150 emit_move_insn (destmem, value);
20153 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20154 true, OPTAB_LIB_WIDEN);
20156 emit_move_insn (iter, tmp);
20158 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20160 if (expected_size != -1)
20162 expected_size /= GET_MODE_SIZE (mode) * unroll;
20163 if (expected_size == 0)
20165 else if (expected_size > REG_BR_PROB_BASE)
20166 predict_jump (REG_BR_PROB_BASE - 1);
20168 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20171 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20172 iter = ix86_zero_extend_to_Pmode (iter);
20173 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20174 true, OPTAB_LIB_WIDEN);
20175 if (tmp != destptr)
20176 emit_move_insn (destptr, tmp);
20179 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20180 true, OPTAB_LIB_WIDEN);
20182 emit_move_insn (srcptr, tmp);
20184 emit_label (out_label);
20187 /* Output "rep; mov" instruction.
20188 Arguments have same meaning as for previous function */
20190 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20191 rtx destptr, rtx srcptr,
20193 enum machine_mode mode)
20199 /* If the size is known, it is shorter to use rep movs. */
20200 if (mode == QImode && CONST_INT_P (count)
20201 && !(INTVAL (count) & 3))
20204 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20205 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20206 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20207 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20208 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20209 if (mode != QImode)
20211 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20212 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20213 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20214 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20215 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20216 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20220 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20221 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20223 if (CONST_INT_P (count))
20225 count = GEN_INT (INTVAL (count)
20226 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20227 destmem = shallow_copy_rtx (destmem);
20228 srcmem = shallow_copy_rtx (srcmem);
20229 set_mem_size (destmem, count);
20230 set_mem_size (srcmem, count);
20234 if (MEM_SIZE (destmem))
20235 set_mem_size (destmem, NULL_RTX);
20236 if (MEM_SIZE (srcmem))
20237 set_mem_size (srcmem, NULL_RTX);
20239 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20243 /* Output "rep; stos" instruction.
20244 Arguments have same meaning as for previous function */
20246 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20247 rtx count, enum machine_mode mode,
20253 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20254 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20255 value = force_reg (mode, gen_lowpart (mode, value));
20256 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20257 if (mode != QImode)
20259 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20260 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20261 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20264 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20265 if (orig_value == const0_rtx && CONST_INT_P (count))
20267 count = GEN_INT (INTVAL (count)
20268 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20269 destmem = shallow_copy_rtx (destmem);
20270 set_mem_size (destmem, count);
20272 else if (MEM_SIZE (destmem))
20273 set_mem_size (destmem, NULL_RTX);
20274 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20278 emit_strmov (rtx destmem, rtx srcmem,
20279 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20281 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20282 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20283 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20286 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20288 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20289 rtx destptr, rtx srcptr, rtx count, int max_size)
20292 if (CONST_INT_P (count))
20294 HOST_WIDE_INT countval = INTVAL (count);
20297 if ((countval & 0x10) && max_size > 16)
20301 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20302 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20305 gcc_unreachable ();
20308 if ((countval & 0x08) && max_size > 8)
20311 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20314 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20315 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20319 if ((countval & 0x04) && max_size > 4)
20321 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20324 if ((countval & 0x02) && max_size > 2)
20326 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20329 if ((countval & 0x01) && max_size > 1)
20331 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20338 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20339 count, 1, OPTAB_DIRECT);
20340 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20341 count, QImode, 1, 4);
20345 /* When there are stringops, we can cheaply increase dest and src pointers.
20346 Otherwise we save code size by maintaining offset (zero is readily
20347 available from preceding rep operation) and using x86 addressing modes.
20349 if (TARGET_SINGLE_STRINGOP)
20353 rtx label = ix86_expand_aligntest (count, 4, true);
20354 src = change_address (srcmem, SImode, srcptr);
20355 dest = change_address (destmem, SImode, destptr);
20356 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20357 emit_label (label);
20358 LABEL_NUSES (label) = 1;
20362 rtx label = ix86_expand_aligntest (count, 2, true);
20363 src = change_address (srcmem, HImode, srcptr);
20364 dest = change_address (destmem, HImode, destptr);
20365 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20366 emit_label (label);
20367 LABEL_NUSES (label) = 1;
20371 rtx label = ix86_expand_aligntest (count, 1, true);
20372 src = change_address (srcmem, QImode, srcptr);
20373 dest = change_address (destmem, QImode, destptr);
20374 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20375 emit_label (label);
20376 LABEL_NUSES (label) = 1;
20381 rtx offset = force_reg (Pmode, const0_rtx);
20386 rtx label = ix86_expand_aligntest (count, 4, true);
20387 src = change_address (srcmem, SImode, srcptr);
20388 dest = change_address (destmem, SImode, destptr);
20389 emit_move_insn (dest, src);
20390 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20391 true, OPTAB_LIB_WIDEN);
20393 emit_move_insn (offset, tmp);
20394 emit_label (label);
20395 LABEL_NUSES (label) = 1;
20399 rtx label = ix86_expand_aligntest (count, 2, true);
20400 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20401 src = change_address (srcmem, HImode, tmp);
20402 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20403 dest = change_address (destmem, HImode, tmp);
20404 emit_move_insn (dest, src);
20405 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20406 true, OPTAB_LIB_WIDEN);
20408 emit_move_insn (offset, tmp);
20409 emit_label (label);
20410 LABEL_NUSES (label) = 1;
20414 rtx label = ix86_expand_aligntest (count, 1, true);
20415 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20416 src = change_address (srcmem, QImode, tmp);
20417 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20418 dest = change_address (destmem, QImode, tmp);
20419 emit_move_insn (dest, src);
20420 emit_label (label);
20421 LABEL_NUSES (label) = 1;
20426 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20428 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20429 rtx count, int max_size)
20432 expand_simple_binop (counter_mode (count), AND, count,
20433 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20434 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20435 gen_lowpart (QImode, value), count, QImode,
20439 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20441 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20445 if (CONST_INT_P (count))
20447 HOST_WIDE_INT countval = INTVAL (count);
20450 if ((countval & 0x10) && max_size > 16)
20454 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20455 emit_insn (gen_strset (destptr, dest, value));
20456 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20457 emit_insn (gen_strset (destptr, dest, value));
20460 gcc_unreachable ();
20463 if ((countval & 0x08) && max_size > 8)
20467 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20468 emit_insn (gen_strset (destptr, dest, value));
20472 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20473 emit_insn (gen_strset (destptr, dest, value));
20474 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20475 emit_insn (gen_strset (destptr, dest, value));
20479 if ((countval & 0x04) && max_size > 4)
20481 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20482 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20485 if ((countval & 0x02) && max_size > 2)
20487 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20488 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20491 if ((countval & 0x01) && max_size > 1)
20493 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20494 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20501 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20506 rtx label = ix86_expand_aligntest (count, 16, true);
20509 dest = change_address (destmem, DImode, destptr);
20510 emit_insn (gen_strset (destptr, dest, value));
20511 emit_insn (gen_strset (destptr, dest, value));
20515 dest = change_address (destmem, SImode, destptr);
20516 emit_insn (gen_strset (destptr, dest, value));
20517 emit_insn (gen_strset (destptr, dest, value));
20518 emit_insn (gen_strset (destptr, dest, value));
20519 emit_insn (gen_strset (destptr, dest, value));
20521 emit_label (label);
20522 LABEL_NUSES (label) = 1;
20526 rtx label = ix86_expand_aligntest (count, 8, true);
20529 dest = change_address (destmem, DImode, destptr);
20530 emit_insn (gen_strset (destptr, dest, value));
20534 dest = change_address (destmem, SImode, destptr);
20535 emit_insn (gen_strset (destptr, dest, value));
20536 emit_insn (gen_strset (destptr, dest, value));
20538 emit_label (label);
20539 LABEL_NUSES (label) = 1;
20543 rtx label = ix86_expand_aligntest (count, 4, true);
20544 dest = change_address (destmem, SImode, destptr);
20545 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20546 emit_label (label);
20547 LABEL_NUSES (label) = 1;
20551 rtx label = ix86_expand_aligntest (count, 2, true);
20552 dest = change_address (destmem, HImode, destptr);
20553 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20554 emit_label (label);
20555 LABEL_NUSES (label) = 1;
20559 rtx label = ix86_expand_aligntest (count, 1, true);
20560 dest = change_address (destmem, QImode, destptr);
20561 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20562 emit_label (label);
20563 LABEL_NUSES (label) = 1;
20567 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20568 DESIRED_ALIGNMENT. */
20570 expand_movmem_prologue (rtx destmem, rtx srcmem,
20571 rtx destptr, rtx srcptr, rtx count,
20572 int align, int desired_alignment)
20574 if (align <= 1 && desired_alignment > 1)
20576 rtx label = ix86_expand_aligntest (destptr, 1, false);
20577 srcmem = change_address (srcmem, QImode, srcptr);
20578 destmem = change_address (destmem, QImode, destptr);
20579 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20580 ix86_adjust_counter (count, 1);
20581 emit_label (label);
20582 LABEL_NUSES (label) = 1;
20584 if (align <= 2 && desired_alignment > 2)
20586 rtx label = ix86_expand_aligntest (destptr, 2, false);
20587 srcmem = change_address (srcmem, HImode, srcptr);
20588 destmem = change_address (destmem, HImode, destptr);
20589 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20590 ix86_adjust_counter (count, 2);
20591 emit_label (label);
20592 LABEL_NUSES (label) = 1;
20594 if (align <= 4 && desired_alignment > 4)
20596 rtx label = ix86_expand_aligntest (destptr, 4, false);
20597 srcmem = change_address (srcmem, SImode, srcptr);
20598 destmem = change_address (destmem, SImode, destptr);
20599 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20600 ix86_adjust_counter (count, 4);
20601 emit_label (label);
20602 LABEL_NUSES (label) = 1;
20604 gcc_assert (desired_alignment <= 8);
20607 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20608 ALIGN_BYTES is how many bytes need to be copied. */
20610 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20611 int desired_align, int align_bytes)
20614 rtx src_size, dst_size;
20616 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20617 if (src_align_bytes >= 0)
20618 src_align_bytes = desired_align - src_align_bytes;
20619 src_size = MEM_SIZE (src);
20620 dst_size = MEM_SIZE (dst);
20621 if (align_bytes & 1)
20623 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20624 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20626 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20628 if (align_bytes & 2)
20630 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20631 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20632 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20633 set_mem_align (dst, 2 * BITS_PER_UNIT);
20634 if (src_align_bytes >= 0
20635 && (src_align_bytes & 1) == (align_bytes & 1)
20636 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20637 set_mem_align (src, 2 * BITS_PER_UNIT);
20639 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20641 if (align_bytes & 4)
20643 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20644 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20645 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20646 set_mem_align (dst, 4 * BITS_PER_UNIT);
20647 if (src_align_bytes >= 0)
20649 unsigned int src_align = 0;
20650 if ((src_align_bytes & 3) == (align_bytes & 3))
20652 else if ((src_align_bytes & 1) == (align_bytes & 1))
20654 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20655 set_mem_align (src, src_align * BITS_PER_UNIT);
20658 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20660 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20661 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20662 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20663 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20664 if (src_align_bytes >= 0)
20666 unsigned int src_align = 0;
20667 if ((src_align_bytes & 7) == (align_bytes & 7))
20669 else if ((src_align_bytes & 3) == (align_bytes & 3))
20671 else if ((src_align_bytes & 1) == (align_bytes & 1))
20673 if (src_align > (unsigned int) desired_align)
20674 src_align = desired_align;
20675 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20676 set_mem_align (src, src_align * BITS_PER_UNIT);
20679 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20681 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20686 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20687 DESIRED_ALIGNMENT. */
20689 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20690 int align, int desired_alignment)
20692 if (align <= 1 && desired_alignment > 1)
20694 rtx label = ix86_expand_aligntest (destptr, 1, false);
20695 destmem = change_address (destmem, QImode, destptr);
20696 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20697 ix86_adjust_counter (count, 1);
20698 emit_label (label);
20699 LABEL_NUSES (label) = 1;
20701 if (align <= 2 && desired_alignment > 2)
20703 rtx label = ix86_expand_aligntest (destptr, 2, false);
20704 destmem = change_address (destmem, HImode, destptr);
20705 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20706 ix86_adjust_counter (count, 2);
20707 emit_label (label);
20708 LABEL_NUSES (label) = 1;
20710 if (align <= 4 && desired_alignment > 4)
20712 rtx label = ix86_expand_aligntest (destptr, 4, false);
20713 destmem = change_address (destmem, SImode, destptr);
20714 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20715 ix86_adjust_counter (count, 4);
20716 emit_label (label);
20717 LABEL_NUSES (label) = 1;
20719 gcc_assert (desired_alignment <= 8);
20722 /* Set enough from DST to align DST known to by aligned by ALIGN to
20723 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20725 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20726 int desired_align, int align_bytes)
20729 rtx dst_size = MEM_SIZE (dst);
20730 if (align_bytes & 1)
20732 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20734 emit_insn (gen_strset (destreg, dst,
20735 gen_lowpart (QImode, value)));
20737 if (align_bytes & 2)
20739 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20740 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20741 set_mem_align (dst, 2 * BITS_PER_UNIT);
20743 emit_insn (gen_strset (destreg, dst,
20744 gen_lowpart (HImode, value)));
20746 if (align_bytes & 4)
20748 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20749 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20750 set_mem_align (dst, 4 * BITS_PER_UNIT);
20752 emit_insn (gen_strset (destreg, dst,
20753 gen_lowpart (SImode, value)));
20755 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20756 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20757 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20759 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20763 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20764 static enum stringop_alg
20765 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20766 int *dynamic_check)
20768 const struct stringop_algs * algs;
20769 bool optimize_for_speed;
20770 /* Algorithms using the rep prefix want at least edi and ecx;
20771 additionally, memset wants eax and memcpy wants esi. Don't
20772 consider such algorithms if the user has appropriated those
20773 registers for their own purposes. */
20774 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20776 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20778 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20779 || (alg != rep_prefix_1_byte \
20780 && alg != rep_prefix_4_byte \
20781 && alg != rep_prefix_8_byte))
20782 const struct processor_costs *cost;
20784 /* Even if the string operation call is cold, we still might spend a lot
20785 of time processing large blocks. */
20786 if (optimize_function_for_size_p (cfun)
20787 || (optimize_insn_for_size_p ()
20788 && expected_size != -1 && expected_size < 256))
20789 optimize_for_speed = false;
20791 optimize_for_speed = true;
20793 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20795 *dynamic_check = -1;
20797 algs = &cost->memset[TARGET_64BIT != 0];
20799 algs = &cost->memcpy[TARGET_64BIT != 0];
20800 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20801 return stringop_alg;
20802 /* rep; movq or rep; movl is the smallest variant. */
20803 else if (!optimize_for_speed)
20805 if (!count || (count & 3))
20806 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20808 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20810 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20812 else if (expected_size != -1 && expected_size < 4)
20813 return loop_1_byte;
20814 else if (expected_size != -1)
20817 enum stringop_alg alg = libcall;
20818 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20820 /* We get here if the algorithms that were not libcall-based
20821 were rep-prefix based and we are unable to use rep prefixes
20822 based on global register usage. Break out of the loop and
20823 use the heuristic below. */
20824 if (algs->size[i].max == 0)
20826 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20828 enum stringop_alg candidate = algs->size[i].alg;
20830 if (candidate != libcall && ALG_USABLE_P (candidate))
20832 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20833 last non-libcall inline algorithm. */
20834 if (TARGET_INLINE_ALL_STRINGOPS)
20836 /* When the current size is best to be copied by a libcall,
20837 but we are still forced to inline, run the heuristic below
20838 that will pick code for medium sized blocks. */
20839 if (alg != libcall)
20843 else if (ALG_USABLE_P (candidate))
20847 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20849 /* When asked to inline the call anyway, try to pick meaningful choice.
20850 We look for maximal size of block that is faster to copy by hand and
20851 take blocks of at most of that size guessing that average size will
20852 be roughly half of the block.
20854 If this turns out to be bad, we might simply specify the preferred
20855 choice in ix86_costs. */
20856 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20857 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20860 enum stringop_alg alg;
20862 bool any_alg_usable_p = true;
20864 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20866 enum stringop_alg candidate = algs->size[i].alg;
20867 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20869 if (candidate != libcall && candidate
20870 && ALG_USABLE_P (candidate))
20871 max = algs->size[i].max;
20873 /* If there aren't any usable algorithms, then recursing on
20874 smaller sizes isn't going to find anything. Just return the
20875 simple byte-at-a-time copy loop. */
20876 if (!any_alg_usable_p)
20878 /* Pick something reasonable. */
20879 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20880 *dynamic_check = 128;
20881 return loop_1_byte;
20885 alg = decide_alg (count, max / 2, memset, dynamic_check);
20886 gcc_assert (*dynamic_check == -1);
20887 gcc_assert (alg != libcall);
20888 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20889 *dynamic_check = max;
20892 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20893 #undef ALG_USABLE_P
20896 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20897 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20899 decide_alignment (int align,
20900 enum stringop_alg alg,
20903 int desired_align = 0;
20907 gcc_unreachable ();
20909 case unrolled_loop:
20910 desired_align = GET_MODE_SIZE (Pmode);
20912 case rep_prefix_8_byte:
20915 case rep_prefix_4_byte:
20916 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20917 copying whole cacheline at once. */
20918 if (TARGET_PENTIUMPRO)
20923 case rep_prefix_1_byte:
20924 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20925 copying whole cacheline at once. */
20926 if (TARGET_PENTIUMPRO)
20940 if (desired_align < align)
20941 desired_align = align;
20942 if (expected_size != -1 && expected_size < 4)
20943 desired_align = align;
20944 return desired_align;
20947 /* Return the smallest power of 2 greater than VAL. */
20949 smallest_pow2_greater_than (int val)
20957 /* Expand string move (memcpy) operation. Use i386 string operations
20958 when profitable. expand_setmem contains similar code. The code
20959 depends upon architecture, block size and alignment, but always has
20960 the same overall structure:
20962 1) Prologue guard: Conditional that jumps up to epilogues for small
20963 blocks that can be handled by epilogue alone. This is faster
20964 but also needed for correctness, since prologue assume the block
20965 is larger than the desired alignment.
20967 Optional dynamic check for size and libcall for large
20968 blocks is emitted here too, with -minline-stringops-dynamically.
20970 2) Prologue: copy first few bytes in order to get destination
20971 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
20972 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
20973 copied. We emit either a jump tree on power of two sized
20974 blocks, or a byte loop.
20976 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20977 with specified algorithm.
20979 4) Epilogue: code copying tail of the block that is too small to be
20980 handled by main body (or up to size guarded by prologue guard). */
20983 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20984 rtx expected_align_exp, rtx expected_size_exp)
20990 rtx jump_around_label = NULL;
20991 HOST_WIDE_INT align = 1;
20992 unsigned HOST_WIDE_INT count = 0;
20993 HOST_WIDE_INT expected_size = -1;
20994 int size_needed = 0, epilogue_size_needed;
20995 int desired_align = 0, align_bytes = 0;
20996 enum stringop_alg alg;
20998 bool need_zero_guard = false;
21000 if (CONST_INT_P (align_exp))
21001 align = INTVAL (align_exp);
21002 /* i386 can do misaligned access on reasonably increased cost. */
21003 if (CONST_INT_P (expected_align_exp)
21004 && INTVAL (expected_align_exp) > align)
21005 align = INTVAL (expected_align_exp);
21006 /* ALIGN is the minimum of destination and source alignment, but we care here
21007 just about destination alignment. */
21008 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
21009 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
21011 if (CONST_INT_P (count_exp))
21012 count = expected_size = INTVAL (count_exp);
21013 if (CONST_INT_P (expected_size_exp) && count == 0)
21014 expected_size = INTVAL (expected_size_exp);
21016 /* Make sure we don't need to care about overflow later on. */
21017 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21020 /* Step 0: Decide on preferred algorithm, desired alignment and
21021 size of chunks to be copied by main loop. */
21023 alg = decide_alg (count, expected_size, false, &dynamic_check);
21024 desired_align = decide_alignment (align, alg, expected_size);
21026 if (!TARGET_ALIGN_STRINGOPS)
21027 align = desired_align;
21029 if (alg == libcall)
21031 gcc_assert (alg != no_stringop);
21033 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
21034 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21035 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
21040 gcc_unreachable ();
21042 need_zero_guard = true;
21043 size_needed = GET_MODE_SIZE (Pmode);
21045 case unrolled_loop:
21046 need_zero_guard = true;
21047 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
21049 case rep_prefix_8_byte:
21052 case rep_prefix_4_byte:
21055 case rep_prefix_1_byte:
21059 need_zero_guard = true;
21064 epilogue_size_needed = size_needed;
21066 /* Step 1: Prologue guard. */
21068 /* Alignment code needs count to be in register. */
21069 if (CONST_INT_P (count_exp) && desired_align > align)
21071 if (INTVAL (count_exp) > desired_align
21072 && INTVAL (count_exp) > size_needed)
21075 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21076 if (align_bytes <= 0)
21079 align_bytes = desired_align - align_bytes;
21081 if (align_bytes == 0)
21082 count_exp = force_reg (counter_mode (count_exp), count_exp);
21084 gcc_assert (desired_align >= 1 && align >= 1);
21086 /* Ensure that alignment prologue won't copy past end of block. */
21087 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21089 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21090 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21091 Make sure it is power of 2. */
21092 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21096 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21098 /* If main algorithm works on QImode, no epilogue is needed.
21099 For small sizes just don't align anything. */
21100 if (size_needed == 1)
21101 desired_align = align;
21108 label = gen_label_rtx ();
21109 emit_cmp_and_jump_insns (count_exp,
21110 GEN_INT (epilogue_size_needed),
21111 LTU, 0, counter_mode (count_exp), 1, label);
21112 if (expected_size == -1 || expected_size < epilogue_size_needed)
21113 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21115 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21119 /* Emit code to decide on runtime whether library call or inline should be
21121 if (dynamic_check != -1)
21123 if (CONST_INT_P (count_exp))
21125 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21127 emit_block_move_via_libcall (dst, src, count_exp, false);
21128 count_exp = const0_rtx;
21134 rtx hot_label = gen_label_rtx ();
21135 jump_around_label = gen_label_rtx ();
21136 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21137 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21138 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21139 emit_block_move_via_libcall (dst, src, count_exp, false);
21140 emit_jump (jump_around_label);
21141 emit_label (hot_label);
21145 /* Step 2: Alignment prologue. */
21147 if (desired_align > align)
21149 if (align_bytes == 0)
21151 /* Except for the first move in epilogue, we no longer know
21152 constant offset in aliasing info. It don't seems to worth
21153 the pain to maintain it for the first move, so throw away
21155 src = change_address (src, BLKmode, srcreg);
21156 dst = change_address (dst, BLKmode, destreg);
21157 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21162 /* If we know how many bytes need to be stored before dst is
21163 sufficiently aligned, maintain aliasing info accurately. */
21164 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21165 desired_align, align_bytes);
21166 count_exp = plus_constant (count_exp, -align_bytes);
21167 count -= align_bytes;
21169 if (need_zero_guard
21170 && (count < (unsigned HOST_WIDE_INT) size_needed
21171 || (align_bytes == 0
21172 && count < ((unsigned HOST_WIDE_INT) size_needed
21173 + desired_align - align))))
21175 /* It is possible that we copied enough so the main loop will not
21177 gcc_assert (size_needed > 1);
21178 if (label == NULL_RTX)
21179 label = gen_label_rtx ();
21180 emit_cmp_and_jump_insns (count_exp,
21181 GEN_INT (size_needed),
21182 LTU, 0, counter_mode (count_exp), 1, label);
21183 if (expected_size == -1
21184 || expected_size < (desired_align - align) / 2 + size_needed)
21185 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21187 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21190 if (label && size_needed == 1)
21192 emit_label (label);
21193 LABEL_NUSES (label) = 1;
21195 epilogue_size_needed = 1;
21197 else if (label == NULL_RTX)
21198 epilogue_size_needed = size_needed;
21200 /* Step 3: Main loop. */
21206 gcc_unreachable ();
21208 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21209 count_exp, QImode, 1, expected_size);
21212 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21213 count_exp, Pmode, 1, expected_size);
21215 case unrolled_loop:
21216 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21217 registers for 4 temporaries anyway. */
21218 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21219 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21222 case rep_prefix_8_byte:
21223 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21226 case rep_prefix_4_byte:
21227 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21230 case rep_prefix_1_byte:
21231 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21235 /* Adjust properly the offset of src and dest memory for aliasing. */
21236 if (CONST_INT_P (count_exp))
21238 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21239 (count / size_needed) * size_needed);
21240 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21241 (count / size_needed) * size_needed);
21245 src = change_address (src, BLKmode, srcreg);
21246 dst = change_address (dst, BLKmode, destreg);
21249 /* Step 4: Epilogue to copy the remaining bytes. */
21253 /* When the main loop is done, COUNT_EXP might hold original count,
21254 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21255 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21256 bytes. Compensate if needed. */
21258 if (size_needed < epilogue_size_needed)
21261 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21262 GEN_INT (size_needed - 1), count_exp, 1,
21264 if (tmp != count_exp)
21265 emit_move_insn (count_exp, tmp);
21267 emit_label (label);
21268 LABEL_NUSES (label) = 1;
21271 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21272 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21273 epilogue_size_needed);
21274 if (jump_around_label)
21275 emit_label (jump_around_label);
21279 /* Helper function for memcpy. For QImode value 0xXY produce
21280 0xXYXYXYXY of wide specified by MODE. This is essentially
21281 a * 0x10101010, but we can do slightly better than
21282 synth_mult by unwinding the sequence by hand on CPUs with
21285 promote_duplicated_reg (enum machine_mode mode, rtx val)
21287 enum machine_mode valmode = GET_MODE (val);
21289 int nops = mode == DImode ? 3 : 2;
21291 gcc_assert (mode == SImode || mode == DImode);
21292 if (val == const0_rtx)
21293 return copy_to_mode_reg (mode, const0_rtx);
21294 if (CONST_INT_P (val))
21296 HOST_WIDE_INT v = INTVAL (val) & 255;
21300 if (mode == DImode)
21301 v |= (v << 16) << 16;
21302 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21305 if (valmode == VOIDmode)
21307 if (valmode != QImode)
21308 val = gen_lowpart (QImode, val);
21309 if (mode == QImode)
21311 if (!TARGET_PARTIAL_REG_STALL)
21313 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21314 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21315 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21316 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21318 rtx reg = convert_modes (mode, QImode, val, true);
21319 tmp = promote_duplicated_reg (mode, const1_rtx);
21320 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21325 rtx reg = convert_modes (mode, QImode, val, true);
21327 if (!TARGET_PARTIAL_REG_STALL)
21328 if (mode == SImode)
21329 emit_insn (gen_movsi_insv_1 (reg, reg));
21331 emit_insn (gen_movdi_insv_1 (reg, reg));
21334 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21335 NULL, 1, OPTAB_DIRECT);
21337 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21339 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21340 NULL, 1, OPTAB_DIRECT);
21341 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21342 if (mode == SImode)
21344 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21345 NULL, 1, OPTAB_DIRECT);
21346 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21351 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21352 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21353 alignment from ALIGN to DESIRED_ALIGN. */
21355 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21360 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21361 promoted_val = promote_duplicated_reg (DImode, val);
21362 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21363 promoted_val = promote_duplicated_reg (SImode, val);
21364 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21365 promoted_val = promote_duplicated_reg (HImode, val);
21367 promoted_val = val;
21369 return promoted_val;
21372 /* Expand string clear operation (bzero). Use i386 string operations when
21373 profitable. See expand_movmem comment for explanation of individual
21374 steps performed. */
21376 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21377 rtx expected_align_exp, rtx expected_size_exp)
21382 rtx jump_around_label = NULL;
21383 HOST_WIDE_INT align = 1;
21384 unsigned HOST_WIDE_INT count = 0;
21385 HOST_WIDE_INT expected_size = -1;
21386 int size_needed = 0, epilogue_size_needed;
21387 int desired_align = 0, align_bytes = 0;
21388 enum stringop_alg alg;
21389 rtx promoted_val = NULL;
21390 bool force_loopy_epilogue = false;
21392 bool need_zero_guard = false;
21394 if (CONST_INT_P (align_exp))
21395 align = INTVAL (align_exp);
21396 /* i386 can do misaligned access on reasonably increased cost. */
21397 if (CONST_INT_P (expected_align_exp)
21398 && INTVAL (expected_align_exp) > align)
21399 align = INTVAL (expected_align_exp);
21400 if (CONST_INT_P (count_exp))
21401 count = expected_size = INTVAL (count_exp);
21402 if (CONST_INT_P (expected_size_exp) && count == 0)
21403 expected_size = INTVAL (expected_size_exp);
21405 /* Make sure we don't need to care about overflow later on. */
21406 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21409 /* Step 0: Decide on preferred algorithm, desired alignment and
21410 size of chunks to be copied by main loop. */
21412 alg = decide_alg (count, expected_size, true, &dynamic_check);
21413 desired_align = decide_alignment (align, alg, expected_size);
21415 if (!TARGET_ALIGN_STRINGOPS)
21416 align = desired_align;
21418 if (alg == libcall)
21420 gcc_assert (alg != no_stringop);
21422 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21423 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21428 gcc_unreachable ();
21430 need_zero_guard = true;
21431 size_needed = GET_MODE_SIZE (Pmode);
21433 case unrolled_loop:
21434 need_zero_guard = true;
21435 size_needed = GET_MODE_SIZE (Pmode) * 4;
21437 case rep_prefix_8_byte:
21440 case rep_prefix_4_byte:
21443 case rep_prefix_1_byte:
21447 need_zero_guard = true;
21451 epilogue_size_needed = size_needed;
21453 /* Step 1: Prologue guard. */
21455 /* Alignment code needs count to be in register. */
21456 if (CONST_INT_P (count_exp) && desired_align > align)
21458 if (INTVAL (count_exp) > desired_align
21459 && INTVAL (count_exp) > size_needed)
21462 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21463 if (align_bytes <= 0)
21466 align_bytes = desired_align - align_bytes;
21468 if (align_bytes == 0)
21470 enum machine_mode mode = SImode;
21471 if (TARGET_64BIT && (count & ~0xffffffff))
21473 count_exp = force_reg (mode, count_exp);
21476 /* Do the cheap promotion to allow better CSE across the
21477 main loop and epilogue (ie one load of the big constant in the
21478 front of all code. */
21479 if (CONST_INT_P (val_exp))
21480 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21481 desired_align, align);
21482 /* Ensure that alignment prologue won't copy past end of block. */
21483 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21485 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21486 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21487 Make sure it is power of 2. */
21488 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21490 /* To improve performance of small blocks, we jump around the VAL
21491 promoting mode. This mean that if the promoted VAL is not constant,
21492 we might not use it in the epilogue and have to use byte
21494 if (epilogue_size_needed > 2 && !promoted_val)
21495 force_loopy_epilogue = true;
21498 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21500 /* If main algorithm works on QImode, no epilogue is needed.
21501 For small sizes just don't align anything. */
21502 if (size_needed == 1)
21503 desired_align = align;
21510 label = gen_label_rtx ();
21511 emit_cmp_and_jump_insns (count_exp,
21512 GEN_INT (epilogue_size_needed),
21513 LTU, 0, counter_mode (count_exp), 1, label);
21514 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21515 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21517 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21520 if (dynamic_check != -1)
21522 rtx hot_label = gen_label_rtx ();
21523 jump_around_label = gen_label_rtx ();
21524 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21525 LEU, 0, counter_mode (count_exp), 1, hot_label);
21526 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21527 set_storage_via_libcall (dst, count_exp, val_exp, false);
21528 emit_jump (jump_around_label);
21529 emit_label (hot_label);
21532 /* Step 2: Alignment prologue. */
21534 /* Do the expensive promotion once we branched off the small blocks. */
21536 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21537 desired_align, align);
21538 gcc_assert (desired_align >= 1 && align >= 1);
21540 if (desired_align > align)
21542 if (align_bytes == 0)
21544 /* Except for the first move in epilogue, we no longer know
21545 constant offset in aliasing info. It don't seems to worth
21546 the pain to maintain it for the first move, so throw away
21548 dst = change_address (dst, BLKmode, destreg);
21549 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21554 /* If we know how many bytes need to be stored before dst is
21555 sufficiently aligned, maintain aliasing info accurately. */
21556 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21557 desired_align, align_bytes);
21558 count_exp = plus_constant (count_exp, -align_bytes);
21559 count -= align_bytes;
21561 if (need_zero_guard
21562 && (count < (unsigned HOST_WIDE_INT) size_needed
21563 || (align_bytes == 0
21564 && count < ((unsigned HOST_WIDE_INT) size_needed
21565 + desired_align - align))))
21567 /* It is possible that we copied enough so the main loop will not
21569 gcc_assert (size_needed > 1);
21570 if (label == NULL_RTX)
21571 label = gen_label_rtx ();
21572 emit_cmp_and_jump_insns (count_exp,
21573 GEN_INT (size_needed),
21574 LTU, 0, counter_mode (count_exp), 1, label);
21575 if (expected_size == -1
21576 || expected_size < (desired_align - align) / 2 + size_needed)
21577 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21579 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21582 if (label && size_needed == 1)
21584 emit_label (label);
21585 LABEL_NUSES (label) = 1;
21587 promoted_val = val_exp;
21588 epilogue_size_needed = 1;
21590 else if (label == NULL_RTX)
21591 epilogue_size_needed = size_needed;
21593 /* Step 3: Main loop. */
21599 gcc_unreachable ();
21601 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21602 count_exp, QImode, 1, expected_size);
21605 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21606 count_exp, Pmode, 1, expected_size);
21608 case unrolled_loop:
21609 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21610 count_exp, Pmode, 4, expected_size);
21612 case rep_prefix_8_byte:
21613 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21616 case rep_prefix_4_byte:
21617 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21620 case rep_prefix_1_byte:
21621 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21625 /* Adjust properly the offset of src and dest memory for aliasing. */
21626 if (CONST_INT_P (count_exp))
21627 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21628 (count / size_needed) * size_needed);
21630 dst = change_address (dst, BLKmode, destreg);
21632 /* Step 4: Epilogue to copy the remaining bytes. */
21636 /* When the main loop is done, COUNT_EXP might hold original count,
21637 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21638 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21639 bytes. Compensate if needed. */
21641 if (size_needed < epilogue_size_needed)
21644 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21645 GEN_INT (size_needed - 1), count_exp, 1,
21647 if (tmp != count_exp)
21648 emit_move_insn (count_exp, tmp);
21650 emit_label (label);
21651 LABEL_NUSES (label) = 1;
21654 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21656 if (force_loopy_epilogue)
21657 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21658 epilogue_size_needed);
21660 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21661 epilogue_size_needed);
21663 if (jump_around_label)
21664 emit_label (jump_around_label);
21668 /* Expand the appropriate insns for doing strlen if not just doing
21671 out = result, initialized with the start address
21672 align_rtx = alignment of the address.
21673 scratch = scratch register, initialized with the startaddress when
21674 not aligned, otherwise undefined
21676 This is just the body. It needs the initializations mentioned above and
21677 some address computing at the end. These things are done in i386.md. */
21680 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21684 rtx align_2_label = NULL_RTX;
21685 rtx align_3_label = NULL_RTX;
21686 rtx align_4_label = gen_label_rtx ();
21687 rtx end_0_label = gen_label_rtx ();
21689 rtx tmpreg = gen_reg_rtx (SImode);
21690 rtx scratch = gen_reg_rtx (SImode);
21694 if (CONST_INT_P (align_rtx))
21695 align = INTVAL (align_rtx);
21697 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21699 /* Is there a known alignment and is it less than 4? */
21702 rtx scratch1 = gen_reg_rtx (Pmode);
21703 emit_move_insn (scratch1, out);
21704 /* Is there a known alignment and is it not 2? */
21707 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21708 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21710 /* Leave just the 3 lower bits. */
21711 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21712 NULL_RTX, 0, OPTAB_WIDEN);
21714 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21715 Pmode, 1, align_4_label);
21716 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21717 Pmode, 1, align_2_label);
21718 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21719 Pmode, 1, align_3_label);
21723 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21724 check if is aligned to 4 - byte. */
21726 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21727 NULL_RTX, 0, OPTAB_WIDEN);
21729 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21730 Pmode, 1, align_4_label);
21733 mem = change_address (src, QImode, out);
21735 /* Now compare the bytes. */
21737 /* Compare the first n unaligned byte on a byte per byte basis. */
21738 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21739 QImode, 1, end_0_label);
21741 /* Increment the address. */
21742 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21744 /* Not needed with an alignment of 2 */
21747 emit_label (align_2_label);
21749 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21752 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21754 emit_label (align_3_label);
21757 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21760 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21763 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21764 align this loop. It gives only huge programs, but does not help to
21766 emit_label (align_4_label);
21768 mem = change_address (src, SImode, out);
21769 emit_move_insn (scratch, mem);
21770 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21772 /* This formula yields a nonzero result iff one of the bytes is zero.
21773 This saves three branches inside loop and many cycles. */
21775 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21776 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21777 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21778 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21779 gen_int_mode (0x80808080, SImode)));
21780 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21785 rtx reg = gen_reg_rtx (SImode);
21786 rtx reg2 = gen_reg_rtx (Pmode);
21787 emit_move_insn (reg, tmpreg);
21788 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21790 /* If zero is not in the first two bytes, move two bytes forward. */
21791 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21792 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21793 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21794 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21795 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21798 /* Emit lea manually to avoid clobbering of flags. */
21799 emit_insn (gen_rtx_SET (SImode, reg2,
21800 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21802 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21803 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21804 emit_insn (gen_rtx_SET (VOIDmode, out,
21805 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21811 rtx end_2_label = gen_label_rtx ();
21812 /* Is zero in the first two bytes? */
21814 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21815 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21816 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21817 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21818 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21820 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21821 JUMP_LABEL (tmp) = end_2_label;
21823 /* Not in the first two. Move two bytes forward. */
21824 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21825 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21827 emit_label (end_2_label);
21831 /* Avoid branch in fixing the byte. */
21832 tmpreg = gen_lowpart (QImode, tmpreg);
21833 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21834 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21835 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21836 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21838 emit_label (end_0_label);
21841 /* Expand strlen. */
21844 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21846 rtx addr, scratch1, scratch2, scratch3, scratch4;
21848 /* The generic case of strlen expander is long. Avoid it's
21849 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21851 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21852 && !TARGET_INLINE_ALL_STRINGOPS
21853 && !optimize_insn_for_size_p ()
21854 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21857 addr = force_reg (Pmode, XEXP (src, 0));
21858 scratch1 = gen_reg_rtx (Pmode);
21860 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21861 && !optimize_insn_for_size_p ())
21863 /* Well it seems that some optimizer does not combine a call like
21864 foo(strlen(bar), strlen(bar));
21865 when the move and the subtraction is done here. It does calculate
21866 the length just once when these instructions are done inside of
21867 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21868 often used and I use one fewer register for the lifetime of
21869 output_strlen_unroll() this is better. */
21871 emit_move_insn (out, addr);
21873 ix86_expand_strlensi_unroll_1 (out, src, align);
21875 /* strlensi_unroll_1 returns the address of the zero at the end of
21876 the string, like memchr(), so compute the length by subtracting
21877 the start address. */
21878 emit_insn (ix86_gen_sub3 (out, out, addr));
21884 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21885 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21888 scratch2 = gen_reg_rtx (Pmode);
21889 scratch3 = gen_reg_rtx (Pmode);
21890 scratch4 = force_reg (Pmode, constm1_rtx);
21892 emit_move_insn (scratch3, addr);
21893 eoschar = force_reg (QImode, eoschar);
21895 src = replace_equiv_address_nv (src, scratch3);
21897 /* If .md starts supporting :P, this can be done in .md. */
21898 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21899 scratch4), UNSPEC_SCAS);
21900 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21901 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21902 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21907 /* For given symbol (function) construct code to compute address of it's PLT
21908 entry in large x86-64 PIC model. */
21910 construct_plt_address (rtx symbol)
21912 rtx tmp = gen_reg_rtx (Pmode);
21913 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21915 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21916 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21918 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21919 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21924 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21926 rtx pop, int sibcall)
21928 rtx use = NULL, call;
21930 if (pop == const0_rtx)
21932 gcc_assert (!TARGET_64BIT || !pop);
21934 if (TARGET_MACHO && !TARGET_64BIT)
21937 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21938 fnaddr = machopic_indirect_call_target (fnaddr);
21943 /* Static functions and indirect calls don't need the pic register. */
21944 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21945 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21946 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21947 use_reg (&use, pic_offset_table_rtx);
21950 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21952 rtx al = gen_rtx_REG (QImode, AX_REG);
21953 emit_move_insn (al, callarg2);
21954 use_reg (&use, al);
21957 if (ix86_cmodel == CM_LARGE_PIC
21959 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21960 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21961 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21963 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21964 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21966 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21967 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21970 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21972 call = gen_rtx_SET (VOIDmode, retval, call);
21975 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21976 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21977 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21979 if (TARGET_64BIT_MS_ABI
21980 && (!callarg2 || INTVAL (callarg2) != -2))
21982 /* We need to represent that SI and DI registers are clobbered
21984 static int clobbered_registers[] = {
21985 XMM6_REG, XMM7_REG, XMM8_REG,
21986 XMM9_REG, XMM10_REG, XMM11_REG,
21987 XMM12_REG, XMM13_REG, XMM14_REG,
21988 XMM15_REG, SI_REG, DI_REG
21991 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21992 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21993 UNSPEC_MS_TO_SYSV_CALL);
21997 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21998 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
22001 (SSE_REGNO_P (clobbered_registers[i])
22003 clobbered_registers[i]));
22005 call = gen_rtx_PARALLEL (VOIDmode,
22006 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
22010 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22011 if (TARGET_VZEROUPPER)
22016 if (cfun->machine->callee_pass_avx256_p)
22018 if (cfun->machine->callee_return_avx256_p)
22019 avx256 = callee_return_pass_avx256;
22021 avx256 = callee_pass_avx256;
22023 else if (cfun->machine->callee_return_avx256_p)
22024 avx256 = callee_return_avx256;
22026 avx256 = call_no_avx256;
22028 if (reload_completed)
22029 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
22032 unspec = gen_rtx_UNSPEC (VOIDmode,
22033 gen_rtvec (1, GEN_INT (avx256)),
22034 UNSPEC_CALL_NEEDS_VZEROUPPER);
22035 call = gen_rtx_PARALLEL (VOIDmode,
22036 gen_rtvec (2, call, unspec));
22040 call = emit_call_insn (call);
22042 CALL_INSN_FUNCTION_USAGE (call) = use;
22048 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
22050 rtx call = XVECEXP (PATTERN (insn), 0, 0);
22051 emit_insn (gen_avx_vzeroupper (vzeroupper));
22052 emit_call_insn (call);
22055 /* Output the assembly for a call instruction. */
22058 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
22060 bool direct_p = constant_call_address_operand (call_op, Pmode);
22061 bool seh_nop_p = false;
22063 gcc_assert (addr_op == 0 || addr_op == 1);
22065 if (SIBLING_CALL_P (insn))
22068 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
22069 /* SEH epilogue detection requires the indirect branch case
22070 to include REX.W. */
22071 else if (TARGET_SEH)
22072 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
22074 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
22077 /* SEH unwinding can require an extra nop to be emitted in several
22078 circumstances. Determine if we have one of those. */
22083 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
22085 /* If we get to another real insn, we don't need the nop. */
22089 /* If we get to the epilogue note, prevent a catch region from
22090 being adjacent to the standard epilogue sequence. If non-
22091 call-exceptions, we'll have done this during epilogue emission. */
22092 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
22093 && !flag_non_call_exceptions
22094 && !can_throw_internal (insn))
22101 /* If we didn't find a real insn following the call, prevent the
22102 unwinder from looking into the next function. */
22110 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
22112 return addr_op ? "call\t%P1" : "call\t%P0";
22117 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
22119 return addr_op ? "call\t%A1" : "call\t%A0";
22123 /* Clear stack slot assignments remembered from previous functions.
22124 This is called from INIT_EXPANDERS once before RTL is emitted for each
22127 static struct machine_function *
22128 ix86_init_machine_status (void)
22130 struct machine_function *f;
22132 f = ggc_alloc_cleared_machine_function ();
22133 f->use_fast_prologue_epilogue_nregs = -1;
22134 f->tls_descriptor_call_expanded_p = 0;
22135 f->call_abi = ix86_abi;
22140 /* Return a MEM corresponding to a stack slot with mode MODE.
22141 Allocate a new slot if necessary.
22143 The RTL for a function can have several slots available: N is
22144 which slot to use. */
22147 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22149 struct stack_local_entry *s;
22151 gcc_assert (n < MAX_386_STACK_LOCALS);
22153 /* Virtual slot is valid only before vregs are instantiated. */
22154 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22156 for (s = ix86_stack_locals; s; s = s->next)
22157 if (s->mode == mode && s->n == n)
22158 return copy_rtx (s->rtl);
22160 s = ggc_alloc_stack_local_entry ();
22163 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22165 s->next = ix86_stack_locals;
22166 ix86_stack_locals = s;
22170 /* Construct the SYMBOL_REF for the tls_get_addr function. */
22172 static GTY(()) rtx ix86_tls_symbol;
22174 ix86_tls_get_addr (void)
22177 if (!ix86_tls_symbol)
22179 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
22180 (TARGET_ANY_GNU_TLS
22182 ? "___tls_get_addr"
22183 : "__tls_get_addr");
22186 return ix86_tls_symbol;
22189 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
22191 static GTY(()) rtx ix86_tls_module_base_symbol;
22193 ix86_tls_module_base (void)
22196 if (!ix86_tls_module_base_symbol)
22198 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
22199 "_TLS_MODULE_BASE_");
22200 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
22201 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
22204 return ix86_tls_module_base_symbol;
22207 /* Calculate the length of the memory address in the instruction
22208 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22211 memory_address_length (rtx addr)
22213 struct ix86_address parts;
22214 rtx base, index, disp;
22218 if (GET_CODE (addr) == PRE_DEC
22219 || GET_CODE (addr) == POST_INC
22220 || GET_CODE (addr) == PRE_MODIFY
22221 || GET_CODE (addr) == POST_MODIFY)
22224 ok = ix86_decompose_address (addr, &parts);
22227 if (parts.base && GET_CODE (parts.base) == SUBREG)
22228 parts.base = SUBREG_REG (parts.base);
22229 if (parts.index && GET_CODE (parts.index) == SUBREG)
22230 parts.index = SUBREG_REG (parts.index);
22233 index = parts.index;
22238 - esp as the base always wants an index,
22239 - ebp as the base always wants a displacement,
22240 - r12 as the base always wants an index,
22241 - r13 as the base always wants a displacement. */
22243 /* Register Indirect. */
22244 if (base && !index && !disp)
22246 /* esp (for its index) and ebp (for its displacement) need
22247 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22250 && (addr == arg_pointer_rtx
22251 || addr == frame_pointer_rtx
22252 || REGNO (addr) == SP_REG
22253 || REGNO (addr) == BP_REG
22254 || REGNO (addr) == R12_REG
22255 || REGNO (addr) == R13_REG))
22259 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22260 is not disp32, but disp32(%rip), so for disp32
22261 SIB byte is needed, unless print_operand_address
22262 optimizes it into disp32(%rip) or (%rip) is implied
22264 else if (disp && !base && !index)
22271 if (GET_CODE (disp) == CONST)
22272 symbol = XEXP (disp, 0);
22273 if (GET_CODE (symbol) == PLUS
22274 && CONST_INT_P (XEXP (symbol, 1)))
22275 symbol = XEXP (symbol, 0);
22277 if (GET_CODE (symbol) != LABEL_REF
22278 && (GET_CODE (symbol) != SYMBOL_REF
22279 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22280 && (GET_CODE (symbol) != UNSPEC
22281 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22282 && XINT (symbol, 1) != UNSPEC_PCREL
22283 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22290 /* Find the length of the displacement constant. */
22293 if (base && satisfies_constraint_K (disp))
22298 /* ebp always wants a displacement. Similarly r13. */
22299 else if (base && REG_P (base)
22300 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22303 /* An index requires the two-byte modrm form.... */
22305 /* ...like esp (or r12), which always wants an index. */
22306 || base == arg_pointer_rtx
22307 || base == frame_pointer_rtx
22308 || (base && REG_P (base)
22309 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22326 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22327 is set, expect that insn have 8bit immediate alternative. */
22329 ix86_attr_length_immediate_default (rtx insn, int shortform)
22333 extract_insn_cached (insn);
22334 for (i = recog_data.n_operands - 1; i >= 0; --i)
22335 if (CONSTANT_P (recog_data.operand[i]))
22337 enum attr_mode mode = get_attr_mode (insn);
22340 if (shortform && CONST_INT_P (recog_data.operand[i]))
22342 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22349 ival = trunc_int_for_mode (ival, HImode);
22352 ival = trunc_int_for_mode (ival, SImode);
22357 if (IN_RANGE (ival, -128, 127))
22374 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22379 fatal_insn ("unknown insn mode", insn);
22384 /* Compute default value for "length_address" attribute. */
22386 ix86_attr_length_address_default (rtx insn)
22390 if (get_attr_type (insn) == TYPE_LEA)
22392 rtx set = PATTERN (insn), addr;
22394 if (GET_CODE (set) == PARALLEL)
22395 set = XVECEXP (set, 0, 0);
22397 gcc_assert (GET_CODE (set) == SET);
22399 addr = SET_SRC (set);
22400 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22402 if (GET_CODE (addr) == ZERO_EXTEND)
22403 addr = XEXP (addr, 0);
22404 if (GET_CODE (addr) == SUBREG)
22405 addr = SUBREG_REG (addr);
22408 return memory_address_length (addr);
22411 extract_insn_cached (insn);
22412 for (i = recog_data.n_operands - 1; i >= 0; --i)
22413 if (MEM_P (recog_data.operand[i]))
22415 constrain_operands_cached (reload_completed);
22416 if (which_alternative != -1)
22418 const char *constraints = recog_data.constraints[i];
22419 int alt = which_alternative;
22421 while (*constraints == '=' || *constraints == '+')
22424 while (*constraints++ != ',')
22426 /* Skip ignored operands. */
22427 if (*constraints == 'X')
22430 return memory_address_length (XEXP (recog_data.operand[i], 0));
22435 /* Compute default value for "length_vex" attribute. It includes
22436 2 or 3 byte VEX prefix and 1 opcode byte. */
22439 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22444 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22445 byte VEX prefix. */
22446 if (!has_0f_opcode || has_vex_w)
22449 /* We can always use 2 byte VEX prefix in 32bit. */
22453 extract_insn_cached (insn);
22455 for (i = recog_data.n_operands - 1; i >= 0; --i)
22456 if (REG_P (recog_data.operand[i]))
22458 /* REX.W bit uses 3 byte VEX prefix. */
22459 if (GET_MODE (recog_data.operand[i]) == DImode
22460 && GENERAL_REG_P (recog_data.operand[i]))
22465 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22466 if (MEM_P (recog_data.operand[i])
22467 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22474 /* Return the maximum number of instructions a cpu can issue. */
22477 ix86_issue_rate (void)
22481 case PROCESSOR_PENTIUM:
22482 case PROCESSOR_ATOM:
22486 case PROCESSOR_PENTIUMPRO:
22487 case PROCESSOR_PENTIUM4:
22488 case PROCESSOR_CORE2_32:
22489 case PROCESSOR_CORE2_64:
22490 case PROCESSOR_COREI7_32:
22491 case PROCESSOR_COREI7_64:
22492 case PROCESSOR_ATHLON:
22494 case PROCESSOR_AMDFAM10:
22495 case PROCESSOR_NOCONA:
22496 case PROCESSOR_GENERIC32:
22497 case PROCESSOR_GENERIC64:
22498 case PROCESSOR_BDVER1:
22499 case PROCESSOR_BTVER1:
22507 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22508 by DEP_INSN and nothing set by DEP_INSN. */
22511 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22515 /* Simplify the test for uninteresting insns. */
22516 if (insn_type != TYPE_SETCC
22517 && insn_type != TYPE_ICMOV
22518 && insn_type != TYPE_FCMOV
22519 && insn_type != TYPE_IBR)
22522 if ((set = single_set (dep_insn)) != 0)
22524 set = SET_DEST (set);
22527 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22528 && XVECLEN (PATTERN (dep_insn), 0) == 2
22529 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22530 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22532 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22533 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22538 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22541 /* This test is true if the dependent insn reads the flags but
22542 not any other potentially set register. */
22543 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22546 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22552 /* Return true iff USE_INSN has a memory address with operands set by
22556 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22559 extract_insn_cached (use_insn);
22560 for (i = recog_data.n_operands - 1; i >= 0; --i)
22561 if (MEM_P (recog_data.operand[i]))
22563 rtx addr = XEXP (recog_data.operand[i], 0);
22564 return modified_in_p (addr, set_insn) != 0;
22570 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22572 enum attr_type insn_type, dep_insn_type;
22573 enum attr_memory memory;
22575 int dep_insn_code_number;
22577 /* Anti and output dependencies have zero cost on all CPUs. */
22578 if (REG_NOTE_KIND (link) != 0)
22581 dep_insn_code_number = recog_memoized (dep_insn);
22583 /* If we can't recognize the insns, we can't really do anything. */
22584 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22587 insn_type = get_attr_type (insn);
22588 dep_insn_type = get_attr_type (dep_insn);
22592 case PROCESSOR_PENTIUM:
22593 /* Address Generation Interlock adds a cycle of latency. */
22594 if (insn_type == TYPE_LEA)
22596 rtx addr = PATTERN (insn);
22598 if (GET_CODE (addr) == PARALLEL)
22599 addr = XVECEXP (addr, 0, 0);
22601 gcc_assert (GET_CODE (addr) == SET);
22603 addr = SET_SRC (addr);
22604 if (modified_in_p (addr, dep_insn))
22607 else if (ix86_agi_dependent (dep_insn, insn))
22610 /* ??? Compares pair with jump/setcc. */
22611 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22614 /* Floating point stores require value to be ready one cycle earlier. */
22615 if (insn_type == TYPE_FMOV
22616 && get_attr_memory (insn) == MEMORY_STORE
22617 && !ix86_agi_dependent (dep_insn, insn))
22621 case PROCESSOR_PENTIUMPRO:
22622 memory = get_attr_memory (insn);
22624 /* INT->FP conversion is expensive. */
22625 if (get_attr_fp_int_src (dep_insn))
22628 /* There is one cycle extra latency between an FP op and a store. */
22629 if (insn_type == TYPE_FMOV
22630 && (set = single_set (dep_insn)) != NULL_RTX
22631 && (set2 = single_set (insn)) != NULL_RTX
22632 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22633 && MEM_P (SET_DEST (set2)))
22636 /* Show ability of reorder buffer to hide latency of load by executing
22637 in parallel with previous instruction in case
22638 previous instruction is not needed to compute the address. */
22639 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22640 && !ix86_agi_dependent (dep_insn, insn))
22642 /* Claim moves to take one cycle, as core can issue one load
22643 at time and the next load can start cycle later. */
22644 if (dep_insn_type == TYPE_IMOV
22645 || dep_insn_type == TYPE_FMOV)
22653 memory = get_attr_memory (insn);
22655 /* The esp dependency is resolved before the instruction is really
22657 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22658 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22661 /* INT->FP conversion is expensive. */
22662 if (get_attr_fp_int_src (dep_insn))
22665 /* Show ability of reorder buffer to hide latency of load by executing
22666 in parallel with previous instruction in case
22667 previous instruction is not needed to compute the address. */
22668 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22669 && !ix86_agi_dependent (dep_insn, insn))
22671 /* Claim moves to take one cycle, as core can issue one load
22672 at time and the next load can start cycle later. */
22673 if (dep_insn_type == TYPE_IMOV
22674 || dep_insn_type == TYPE_FMOV)
22683 case PROCESSOR_ATHLON:
22685 case PROCESSOR_AMDFAM10:
22686 case PROCESSOR_BDVER1:
22687 case PROCESSOR_BTVER1:
22688 case PROCESSOR_ATOM:
22689 case PROCESSOR_GENERIC32:
22690 case PROCESSOR_GENERIC64:
22691 memory = get_attr_memory (insn);
22693 /* Show ability of reorder buffer to hide latency of load by executing
22694 in parallel with previous instruction in case
22695 previous instruction is not needed to compute the address. */
22696 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22697 && !ix86_agi_dependent (dep_insn, insn))
22699 enum attr_unit unit = get_attr_unit (insn);
22702 /* Because of the difference between the length of integer and
22703 floating unit pipeline preparation stages, the memory operands
22704 for floating point are cheaper.
22706 ??? For Athlon it the difference is most probably 2. */
22707 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22710 loadcost = TARGET_ATHLON ? 2 : 0;
22712 if (cost >= loadcost)
22725 /* How many alternative schedules to try. This should be as wide as the
22726 scheduling freedom in the DFA, but no wider. Making this value too
22727 large results extra work for the scheduler. */
22730 ia32_multipass_dfa_lookahead (void)
22734 case PROCESSOR_PENTIUM:
22737 case PROCESSOR_PENTIUMPRO:
22741 case PROCESSOR_CORE2_32:
22742 case PROCESSOR_CORE2_64:
22743 case PROCESSOR_COREI7_32:
22744 case PROCESSOR_COREI7_64:
22745 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22746 as many instructions can be executed on a cycle, i.e.,
22747 issue_rate. I wonder why tuning for many CPUs does not do this. */
22748 return ix86_issue_rate ();
22757 /* Model decoder of Core 2/i7.
22758 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22759 track the instruction fetch block boundaries and make sure that long
22760 (9+ bytes) instructions are assigned to D0. */
22762 /* Maximum length of an insn that can be handled by
22763 a secondary decoder unit. '8' for Core 2/i7. */
22764 static int core2i7_secondary_decoder_max_insn_size;
22766 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22767 '16' for Core 2/i7. */
22768 static int core2i7_ifetch_block_size;
22770 /* Maximum number of instructions decoder can handle per cycle.
22771 '6' for Core 2/i7. */
22772 static int core2i7_ifetch_block_max_insns;
22774 typedef struct ix86_first_cycle_multipass_data_ *
22775 ix86_first_cycle_multipass_data_t;
22776 typedef const struct ix86_first_cycle_multipass_data_ *
22777 const_ix86_first_cycle_multipass_data_t;
22779 /* A variable to store target state across calls to max_issue within
22781 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22782 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22784 /* Initialize DATA. */
22786 core2i7_first_cycle_multipass_init (void *_data)
22788 ix86_first_cycle_multipass_data_t data
22789 = (ix86_first_cycle_multipass_data_t) _data;
22791 data->ifetch_block_len = 0;
22792 data->ifetch_block_n_insns = 0;
22793 data->ready_try_change = NULL;
22794 data->ready_try_change_size = 0;
22797 /* Advancing the cycle; reset ifetch block counts. */
22799 core2i7_dfa_post_advance_cycle (void)
22801 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22803 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22805 data->ifetch_block_len = 0;
22806 data->ifetch_block_n_insns = 0;
22809 static int min_insn_size (rtx);
22811 /* Filter out insns from ready_try that the core will not be able to issue
22812 on current cycle due to decoder. */
22814 core2i7_first_cycle_multipass_filter_ready_try
22815 (const_ix86_first_cycle_multipass_data_t data,
22816 char *ready_try, int n_ready, bool first_cycle_insn_p)
22823 if (ready_try[n_ready])
22826 insn = get_ready_element (n_ready);
22827 insn_size = min_insn_size (insn);
22829 if (/* If this is a too long an insn for a secondary decoder ... */
22830 (!first_cycle_insn_p
22831 && insn_size > core2i7_secondary_decoder_max_insn_size)
22832 /* ... or it would not fit into the ifetch block ... */
22833 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22834 /* ... or the decoder is full already ... */
22835 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22836 /* ... mask the insn out. */
22838 ready_try[n_ready] = 1;
22840 if (data->ready_try_change)
22841 SET_BIT (data->ready_try_change, n_ready);
22846 /* Prepare for a new round of multipass lookahead scheduling. */
22848 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22849 bool first_cycle_insn_p)
22851 ix86_first_cycle_multipass_data_t data
22852 = (ix86_first_cycle_multipass_data_t) _data;
22853 const_ix86_first_cycle_multipass_data_t prev_data
22854 = ix86_first_cycle_multipass_data;
22856 /* Restore the state from the end of the previous round. */
22857 data->ifetch_block_len = prev_data->ifetch_block_len;
22858 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22860 /* Filter instructions that cannot be issued on current cycle due to
22861 decoder restrictions. */
22862 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22863 first_cycle_insn_p);
22866 /* INSN is being issued in current solution. Account for its impact on
22867 the decoder model. */
22869 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22870 rtx insn, const void *_prev_data)
22872 ix86_first_cycle_multipass_data_t data
22873 = (ix86_first_cycle_multipass_data_t) _data;
22874 const_ix86_first_cycle_multipass_data_t prev_data
22875 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22877 int insn_size = min_insn_size (insn);
22879 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22880 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22881 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22882 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22884 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22885 if (!data->ready_try_change)
22887 data->ready_try_change = sbitmap_alloc (n_ready);
22888 data->ready_try_change_size = n_ready;
22890 else if (data->ready_try_change_size < n_ready)
22892 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22894 data->ready_try_change_size = n_ready;
22896 sbitmap_zero (data->ready_try_change);
22898 /* Filter out insns from ready_try that the core will not be able to issue
22899 on current cycle due to decoder. */
22900 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22904 /* Revert the effect on ready_try. */
22906 core2i7_first_cycle_multipass_backtrack (const void *_data,
22908 int n_ready ATTRIBUTE_UNUSED)
22910 const_ix86_first_cycle_multipass_data_t data
22911 = (const_ix86_first_cycle_multipass_data_t) _data;
22912 unsigned int i = 0;
22913 sbitmap_iterator sbi;
22915 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22916 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22922 /* Save the result of multipass lookahead scheduling for the next round. */
22924 core2i7_first_cycle_multipass_end (const void *_data)
22926 const_ix86_first_cycle_multipass_data_t data
22927 = (const_ix86_first_cycle_multipass_data_t) _data;
22928 ix86_first_cycle_multipass_data_t next_data
22929 = ix86_first_cycle_multipass_data;
22933 next_data->ifetch_block_len = data->ifetch_block_len;
22934 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22938 /* Deallocate target data. */
22940 core2i7_first_cycle_multipass_fini (void *_data)
22942 ix86_first_cycle_multipass_data_t data
22943 = (ix86_first_cycle_multipass_data_t) _data;
22945 if (data->ready_try_change)
22947 sbitmap_free (data->ready_try_change);
22948 data->ready_try_change = NULL;
22949 data->ready_try_change_size = 0;
22953 /* Prepare for scheduling pass. */
22955 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22956 int verbose ATTRIBUTE_UNUSED,
22957 int max_uid ATTRIBUTE_UNUSED)
22959 /* Install scheduling hooks for current CPU. Some of these hooks are used
22960 in time-critical parts of the scheduler, so we only set them up when
22961 they are actually used. */
22964 case PROCESSOR_CORE2_32:
22965 case PROCESSOR_CORE2_64:
22966 case PROCESSOR_COREI7_32:
22967 case PROCESSOR_COREI7_64:
22968 targetm.sched.dfa_post_advance_cycle
22969 = core2i7_dfa_post_advance_cycle;
22970 targetm.sched.first_cycle_multipass_init
22971 = core2i7_first_cycle_multipass_init;
22972 targetm.sched.first_cycle_multipass_begin
22973 = core2i7_first_cycle_multipass_begin;
22974 targetm.sched.first_cycle_multipass_issue
22975 = core2i7_first_cycle_multipass_issue;
22976 targetm.sched.first_cycle_multipass_backtrack
22977 = core2i7_first_cycle_multipass_backtrack;
22978 targetm.sched.first_cycle_multipass_end
22979 = core2i7_first_cycle_multipass_end;
22980 targetm.sched.first_cycle_multipass_fini
22981 = core2i7_first_cycle_multipass_fini;
22983 /* Set decoder parameters. */
22984 core2i7_secondary_decoder_max_insn_size = 8;
22985 core2i7_ifetch_block_size = 16;
22986 core2i7_ifetch_block_max_insns = 6;
22990 targetm.sched.dfa_post_advance_cycle = NULL;
22991 targetm.sched.first_cycle_multipass_init = NULL;
22992 targetm.sched.first_cycle_multipass_begin = NULL;
22993 targetm.sched.first_cycle_multipass_issue = NULL;
22994 targetm.sched.first_cycle_multipass_backtrack = NULL;
22995 targetm.sched.first_cycle_multipass_end = NULL;
22996 targetm.sched.first_cycle_multipass_fini = NULL;
23002 /* Compute the alignment given to a constant that is being placed in memory.
23003 EXP is the constant and ALIGN is the alignment that the object would
23005 The value of this function is used instead of that alignment to align
23009 ix86_constant_alignment (tree exp, int align)
23011 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
23012 || TREE_CODE (exp) == INTEGER_CST)
23014 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
23016 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
23019 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
23020 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
23021 return BITS_PER_WORD;
23026 /* Compute the alignment for a static variable.
23027 TYPE is the data type, and ALIGN is the alignment that
23028 the object would ordinarily have. The value of this function is used
23029 instead of that alignment to align the object. */
23032 ix86_data_alignment (tree type, int align)
23034 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
23036 if (AGGREGATE_TYPE_P (type)
23037 && TYPE_SIZE (type)
23038 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23039 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
23040 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
23041 && align < max_align)
23044 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23045 to 16byte boundary. */
23048 if (AGGREGATE_TYPE_P (type)
23049 && TYPE_SIZE (type)
23050 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23051 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
23052 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23056 if (TREE_CODE (type) == ARRAY_TYPE)
23058 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23060 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23063 else if (TREE_CODE (type) == COMPLEX_TYPE)
23066 if (TYPE_MODE (type) == DCmode && align < 64)
23068 if ((TYPE_MODE (type) == XCmode
23069 || TYPE_MODE (type) == TCmode) && align < 128)
23072 else if ((TREE_CODE (type) == RECORD_TYPE
23073 || TREE_CODE (type) == UNION_TYPE
23074 || TREE_CODE (type) == QUAL_UNION_TYPE)
23075 && TYPE_FIELDS (type))
23077 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23079 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23082 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23083 || TREE_CODE (type) == INTEGER_TYPE)
23085 if (TYPE_MODE (type) == DFmode && align < 64)
23087 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23094 /* Compute the alignment for a local variable or a stack slot. EXP is
23095 the data type or decl itself, MODE is the widest mode available and
23096 ALIGN is the alignment that the object would ordinarily have. The
23097 value of this macro is used instead of that alignment to align the
23101 ix86_local_alignment (tree exp, enum machine_mode mode,
23102 unsigned int align)
23106 if (exp && DECL_P (exp))
23108 type = TREE_TYPE (exp);
23117 /* Don't do dynamic stack realignment for long long objects with
23118 -mpreferred-stack-boundary=2. */
23121 && ix86_preferred_stack_boundary < 64
23122 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23123 && (!type || !TYPE_USER_ALIGN (type))
23124 && (!decl || !DECL_USER_ALIGN (decl)))
23127 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23128 register in MODE. We will return the largest alignment of XF
23132 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23133 align = GET_MODE_ALIGNMENT (DFmode);
23137 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23138 to 16byte boundary. Exact wording is:
23140 An array uses the same alignment as its elements, except that a local or
23141 global array variable of length at least 16 bytes or
23142 a C99 variable-length array variable always has alignment of at least 16 bytes.
23144 This was added to allow use of aligned SSE instructions at arrays. This
23145 rule is meant for static storage (where compiler can not do the analysis
23146 by itself). We follow it for automatic variables only when convenient.
23147 We fully control everything in the function compiled and functions from
23148 other unit can not rely on the alignment.
23150 Exclude va_list type. It is the common case of local array where
23151 we can not benefit from the alignment. */
23152 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23155 if (AGGREGATE_TYPE_P (type)
23156 && (va_list_type_node == NULL_TREE
23157 || (TYPE_MAIN_VARIANT (type)
23158 != TYPE_MAIN_VARIANT (va_list_type_node)))
23159 && TYPE_SIZE (type)
23160 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23161 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23162 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23165 if (TREE_CODE (type) == ARRAY_TYPE)
23167 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23169 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23172 else if (TREE_CODE (type) == COMPLEX_TYPE)
23174 if (TYPE_MODE (type) == DCmode && align < 64)
23176 if ((TYPE_MODE (type) == XCmode
23177 || TYPE_MODE (type) == TCmode) && align < 128)
23180 else if ((TREE_CODE (type) == RECORD_TYPE
23181 || TREE_CODE (type) == UNION_TYPE
23182 || TREE_CODE (type) == QUAL_UNION_TYPE)
23183 && TYPE_FIELDS (type))
23185 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23187 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23190 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23191 || TREE_CODE (type) == INTEGER_TYPE)
23194 if (TYPE_MODE (type) == DFmode && align < 64)
23196 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23202 /* Compute the minimum required alignment for dynamic stack realignment
23203 purposes for a local variable, parameter or a stack slot. EXP is
23204 the data type or decl itself, MODE is its mode and ALIGN is the
23205 alignment that the object would ordinarily have. */
23208 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23209 unsigned int align)
23213 if (exp && DECL_P (exp))
23215 type = TREE_TYPE (exp);
23224 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23227 /* Don't do dynamic stack realignment for long long objects with
23228 -mpreferred-stack-boundary=2. */
23229 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23230 && (!type || !TYPE_USER_ALIGN (type))
23231 && (!decl || !DECL_USER_ALIGN (decl)))
23237 /* Find a location for the static chain incoming to a nested function.
23238 This is a register, unless all free registers are used by arguments. */
23241 ix86_static_chain (const_tree fndecl, bool incoming_p)
23245 if (!DECL_STATIC_CHAIN (fndecl))
23250 /* We always use R10 in 64-bit mode. */
23258 /* By default in 32-bit mode we use ECX to pass the static chain. */
23261 fntype = TREE_TYPE (fndecl);
23262 ccvt = ix86_get_callcvt (fntype);
23263 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
23265 /* Fastcall functions use ecx/edx for arguments, which leaves
23266 us with EAX for the static chain.
23267 Thiscall functions use ecx for arguments, which also
23268 leaves us with EAX for the static chain. */
23271 else if (ix86_function_regparm (fntype, fndecl) == 3)
23273 /* For regparm 3, we have no free call-clobbered registers in
23274 which to store the static chain. In order to implement this,
23275 we have the trampoline push the static chain to the stack.
23276 However, we can't push a value below the return address when
23277 we call the nested function directly, so we have to use an
23278 alternate entry point. For this we use ESI, and have the
23279 alternate entry point push ESI, so that things appear the
23280 same once we're executing the nested function. */
23283 if (fndecl == current_function_decl)
23284 ix86_static_chain_on_stack = true;
23285 return gen_frame_mem (SImode,
23286 plus_constant (arg_pointer_rtx, -8));
23292 return gen_rtx_REG (Pmode, regno);
23295 /* Emit RTL insns to initialize the variable parts of a trampoline.
23296 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23297 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23298 to be passed to the target function. */
23301 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23305 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23312 /* Depending on the static chain location, either load a register
23313 with a constant, or push the constant to the stack. All of the
23314 instructions are the same size. */
23315 chain = ix86_static_chain (fndecl, true);
23318 if (REGNO (chain) == CX_REG)
23320 else if (REGNO (chain) == AX_REG)
23323 gcc_unreachable ();
23328 mem = adjust_address (m_tramp, QImode, 0);
23329 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23331 mem = adjust_address (m_tramp, SImode, 1);
23332 emit_move_insn (mem, chain_value);
23334 /* Compute offset from the end of the jmp to the target function.
23335 In the case in which the trampoline stores the static chain on
23336 the stack, we need to skip the first insn which pushes the
23337 (call-saved) register static chain; this push is 1 byte. */
23338 disp = expand_binop (SImode, sub_optab, fnaddr,
23339 plus_constant (XEXP (m_tramp, 0),
23340 MEM_P (chain) ? 9 : 10),
23341 NULL_RTX, 1, OPTAB_DIRECT);
23343 mem = adjust_address (m_tramp, QImode, 5);
23344 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23346 mem = adjust_address (m_tramp, SImode, 6);
23347 emit_move_insn (mem, disp);
23353 /* Load the function address to r11. Try to load address using
23354 the shorter movl instead of movabs. We may want to support
23355 movq for kernel mode, but kernel does not use trampolines at
23357 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23359 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23361 mem = adjust_address (m_tramp, HImode, offset);
23362 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23364 mem = adjust_address (m_tramp, SImode, offset + 2);
23365 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23370 mem = adjust_address (m_tramp, HImode, offset);
23371 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23373 mem = adjust_address (m_tramp, DImode, offset + 2);
23374 emit_move_insn (mem, fnaddr);
23378 /* Load static chain using movabs to r10. */
23379 mem = adjust_address (m_tramp, HImode, offset);
23380 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23382 mem = adjust_address (m_tramp, DImode, offset + 2);
23383 emit_move_insn (mem, chain_value);
23386 /* Jump to r11; the last (unused) byte is a nop, only there to
23387 pad the write out to a single 32-bit store. */
23388 mem = adjust_address (m_tramp, SImode, offset);
23389 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23392 gcc_assert (offset <= TRAMPOLINE_SIZE);
23395 #ifdef ENABLE_EXECUTE_STACK
23396 #ifdef CHECK_EXECUTE_STACK_ENABLED
23397 if (CHECK_EXECUTE_STACK_ENABLED)
23399 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23400 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23404 /* The following file contains several enumerations and data structures
23405 built from the definitions in i386-builtin-types.def. */
23407 #include "i386-builtin-types.inc"
23409 /* Table for the ix86 builtin non-function types. */
23410 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23412 /* Retrieve an element from the above table, building some of
23413 the types lazily. */
23416 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23418 unsigned int index;
23421 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23423 type = ix86_builtin_type_tab[(int) tcode];
23427 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23428 if (tcode <= IX86_BT_LAST_VECT)
23430 enum machine_mode mode;
23432 index = tcode - IX86_BT_LAST_PRIM - 1;
23433 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23434 mode = ix86_builtin_type_vect_mode[index];
23436 type = build_vector_type_for_mode (itype, mode);
23442 index = tcode - IX86_BT_LAST_VECT - 1;
23443 if (tcode <= IX86_BT_LAST_PTR)
23444 quals = TYPE_UNQUALIFIED;
23446 quals = TYPE_QUAL_CONST;
23448 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23449 if (quals != TYPE_UNQUALIFIED)
23450 itype = build_qualified_type (itype, quals);
23452 type = build_pointer_type (itype);
23455 ix86_builtin_type_tab[(int) tcode] = type;
23459 /* Table for the ix86 builtin function types. */
23460 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23462 /* Retrieve an element from the above table, building some of
23463 the types lazily. */
23466 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23470 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23472 type = ix86_builtin_func_type_tab[(int) tcode];
23476 if (tcode <= IX86_BT_LAST_FUNC)
23478 unsigned start = ix86_builtin_func_start[(int) tcode];
23479 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23480 tree rtype, atype, args = void_list_node;
23483 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23484 for (i = after - 1; i > start; --i)
23486 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23487 args = tree_cons (NULL, atype, args);
23490 type = build_function_type (rtype, args);
23494 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23495 enum ix86_builtin_func_type icode;
23497 icode = ix86_builtin_func_alias_base[index];
23498 type = ix86_get_builtin_func_type (icode);
23501 ix86_builtin_func_type_tab[(int) tcode] = type;
23506 /* Codes for all the SSE/MMX builtins. */
23509 IX86_BUILTIN_ADDPS,
23510 IX86_BUILTIN_ADDSS,
23511 IX86_BUILTIN_DIVPS,
23512 IX86_BUILTIN_DIVSS,
23513 IX86_BUILTIN_MULPS,
23514 IX86_BUILTIN_MULSS,
23515 IX86_BUILTIN_SUBPS,
23516 IX86_BUILTIN_SUBSS,
23518 IX86_BUILTIN_CMPEQPS,
23519 IX86_BUILTIN_CMPLTPS,
23520 IX86_BUILTIN_CMPLEPS,
23521 IX86_BUILTIN_CMPGTPS,
23522 IX86_BUILTIN_CMPGEPS,
23523 IX86_BUILTIN_CMPNEQPS,
23524 IX86_BUILTIN_CMPNLTPS,
23525 IX86_BUILTIN_CMPNLEPS,
23526 IX86_BUILTIN_CMPNGTPS,
23527 IX86_BUILTIN_CMPNGEPS,
23528 IX86_BUILTIN_CMPORDPS,
23529 IX86_BUILTIN_CMPUNORDPS,
23530 IX86_BUILTIN_CMPEQSS,
23531 IX86_BUILTIN_CMPLTSS,
23532 IX86_BUILTIN_CMPLESS,
23533 IX86_BUILTIN_CMPNEQSS,
23534 IX86_BUILTIN_CMPNLTSS,
23535 IX86_BUILTIN_CMPNLESS,
23536 IX86_BUILTIN_CMPNGTSS,
23537 IX86_BUILTIN_CMPNGESS,
23538 IX86_BUILTIN_CMPORDSS,
23539 IX86_BUILTIN_CMPUNORDSS,
23541 IX86_BUILTIN_COMIEQSS,
23542 IX86_BUILTIN_COMILTSS,
23543 IX86_BUILTIN_COMILESS,
23544 IX86_BUILTIN_COMIGTSS,
23545 IX86_BUILTIN_COMIGESS,
23546 IX86_BUILTIN_COMINEQSS,
23547 IX86_BUILTIN_UCOMIEQSS,
23548 IX86_BUILTIN_UCOMILTSS,
23549 IX86_BUILTIN_UCOMILESS,
23550 IX86_BUILTIN_UCOMIGTSS,
23551 IX86_BUILTIN_UCOMIGESS,
23552 IX86_BUILTIN_UCOMINEQSS,
23554 IX86_BUILTIN_CVTPI2PS,
23555 IX86_BUILTIN_CVTPS2PI,
23556 IX86_BUILTIN_CVTSI2SS,
23557 IX86_BUILTIN_CVTSI642SS,
23558 IX86_BUILTIN_CVTSS2SI,
23559 IX86_BUILTIN_CVTSS2SI64,
23560 IX86_BUILTIN_CVTTPS2PI,
23561 IX86_BUILTIN_CVTTSS2SI,
23562 IX86_BUILTIN_CVTTSS2SI64,
23564 IX86_BUILTIN_MAXPS,
23565 IX86_BUILTIN_MAXSS,
23566 IX86_BUILTIN_MINPS,
23567 IX86_BUILTIN_MINSS,
23569 IX86_BUILTIN_LOADUPS,
23570 IX86_BUILTIN_STOREUPS,
23571 IX86_BUILTIN_MOVSS,
23573 IX86_BUILTIN_MOVHLPS,
23574 IX86_BUILTIN_MOVLHPS,
23575 IX86_BUILTIN_LOADHPS,
23576 IX86_BUILTIN_LOADLPS,
23577 IX86_BUILTIN_STOREHPS,
23578 IX86_BUILTIN_STORELPS,
23580 IX86_BUILTIN_MASKMOVQ,
23581 IX86_BUILTIN_MOVMSKPS,
23582 IX86_BUILTIN_PMOVMSKB,
23584 IX86_BUILTIN_MOVNTPS,
23585 IX86_BUILTIN_MOVNTQ,
23587 IX86_BUILTIN_LOADDQU,
23588 IX86_BUILTIN_STOREDQU,
23590 IX86_BUILTIN_PACKSSWB,
23591 IX86_BUILTIN_PACKSSDW,
23592 IX86_BUILTIN_PACKUSWB,
23594 IX86_BUILTIN_PADDB,
23595 IX86_BUILTIN_PADDW,
23596 IX86_BUILTIN_PADDD,
23597 IX86_BUILTIN_PADDQ,
23598 IX86_BUILTIN_PADDSB,
23599 IX86_BUILTIN_PADDSW,
23600 IX86_BUILTIN_PADDUSB,
23601 IX86_BUILTIN_PADDUSW,
23602 IX86_BUILTIN_PSUBB,
23603 IX86_BUILTIN_PSUBW,
23604 IX86_BUILTIN_PSUBD,
23605 IX86_BUILTIN_PSUBQ,
23606 IX86_BUILTIN_PSUBSB,
23607 IX86_BUILTIN_PSUBSW,
23608 IX86_BUILTIN_PSUBUSB,
23609 IX86_BUILTIN_PSUBUSW,
23612 IX86_BUILTIN_PANDN,
23616 IX86_BUILTIN_PAVGB,
23617 IX86_BUILTIN_PAVGW,
23619 IX86_BUILTIN_PCMPEQB,
23620 IX86_BUILTIN_PCMPEQW,
23621 IX86_BUILTIN_PCMPEQD,
23622 IX86_BUILTIN_PCMPGTB,
23623 IX86_BUILTIN_PCMPGTW,
23624 IX86_BUILTIN_PCMPGTD,
23626 IX86_BUILTIN_PMADDWD,
23628 IX86_BUILTIN_PMAXSW,
23629 IX86_BUILTIN_PMAXUB,
23630 IX86_BUILTIN_PMINSW,
23631 IX86_BUILTIN_PMINUB,
23633 IX86_BUILTIN_PMULHUW,
23634 IX86_BUILTIN_PMULHW,
23635 IX86_BUILTIN_PMULLW,
23637 IX86_BUILTIN_PSADBW,
23638 IX86_BUILTIN_PSHUFW,
23640 IX86_BUILTIN_PSLLW,
23641 IX86_BUILTIN_PSLLD,
23642 IX86_BUILTIN_PSLLQ,
23643 IX86_BUILTIN_PSRAW,
23644 IX86_BUILTIN_PSRAD,
23645 IX86_BUILTIN_PSRLW,
23646 IX86_BUILTIN_PSRLD,
23647 IX86_BUILTIN_PSRLQ,
23648 IX86_BUILTIN_PSLLWI,
23649 IX86_BUILTIN_PSLLDI,
23650 IX86_BUILTIN_PSLLQI,
23651 IX86_BUILTIN_PSRAWI,
23652 IX86_BUILTIN_PSRADI,
23653 IX86_BUILTIN_PSRLWI,
23654 IX86_BUILTIN_PSRLDI,
23655 IX86_BUILTIN_PSRLQI,
23657 IX86_BUILTIN_PUNPCKHBW,
23658 IX86_BUILTIN_PUNPCKHWD,
23659 IX86_BUILTIN_PUNPCKHDQ,
23660 IX86_BUILTIN_PUNPCKLBW,
23661 IX86_BUILTIN_PUNPCKLWD,
23662 IX86_BUILTIN_PUNPCKLDQ,
23664 IX86_BUILTIN_SHUFPS,
23666 IX86_BUILTIN_RCPPS,
23667 IX86_BUILTIN_RCPSS,
23668 IX86_BUILTIN_RSQRTPS,
23669 IX86_BUILTIN_RSQRTPS_NR,
23670 IX86_BUILTIN_RSQRTSS,
23671 IX86_BUILTIN_RSQRTF,
23672 IX86_BUILTIN_SQRTPS,
23673 IX86_BUILTIN_SQRTPS_NR,
23674 IX86_BUILTIN_SQRTSS,
23676 IX86_BUILTIN_UNPCKHPS,
23677 IX86_BUILTIN_UNPCKLPS,
23679 IX86_BUILTIN_ANDPS,
23680 IX86_BUILTIN_ANDNPS,
23682 IX86_BUILTIN_XORPS,
23685 IX86_BUILTIN_LDMXCSR,
23686 IX86_BUILTIN_STMXCSR,
23687 IX86_BUILTIN_SFENCE,
23689 /* 3DNow! Original */
23690 IX86_BUILTIN_FEMMS,
23691 IX86_BUILTIN_PAVGUSB,
23692 IX86_BUILTIN_PF2ID,
23693 IX86_BUILTIN_PFACC,
23694 IX86_BUILTIN_PFADD,
23695 IX86_BUILTIN_PFCMPEQ,
23696 IX86_BUILTIN_PFCMPGE,
23697 IX86_BUILTIN_PFCMPGT,
23698 IX86_BUILTIN_PFMAX,
23699 IX86_BUILTIN_PFMIN,
23700 IX86_BUILTIN_PFMUL,
23701 IX86_BUILTIN_PFRCP,
23702 IX86_BUILTIN_PFRCPIT1,
23703 IX86_BUILTIN_PFRCPIT2,
23704 IX86_BUILTIN_PFRSQIT1,
23705 IX86_BUILTIN_PFRSQRT,
23706 IX86_BUILTIN_PFSUB,
23707 IX86_BUILTIN_PFSUBR,
23708 IX86_BUILTIN_PI2FD,
23709 IX86_BUILTIN_PMULHRW,
23711 /* 3DNow! Athlon Extensions */
23712 IX86_BUILTIN_PF2IW,
23713 IX86_BUILTIN_PFNACC,
23714 IX86_BUILTIN_PFPNACC,
23715 IX86_BUILTIN_PI2FW,
23716 IX86_BUILTIN_PSWAPDSI,
23717 IX86_BUILTIN_PSWAPDSF,
23720 IX86_BUILTIN_ADDPD,
23721 IX86_BUILTIN_ADDSD,
23722 IX86_BUILTIN_DIVPD,
23723 IX86_BUILTIN_DIVSD,
23724 IX86_BUILTIN_MULPD,
23725 IX86_BUILTIN_MULSD,
23726 IX86_BUILTIN_SUBPD,
23727 IX86_BUILTIN_SUBSD,
23729 IX86_BUILTIN_CMPEQPD,
23730 IX86_BUILTIN_CMPLTPD,
23731 IX86_BUILTIN_CMPLEPD,
23732 IX86_BUILTIN_CMPGTPD,
23733 IX86_BUILTIN_CMPGEPD,
23734 IX86_BUILTIN_CMPNEQPD,
23735 IX86_BUILTIN_CMPNLTPD,
23736 IX86_BUILTIN_CMPNLEPD,
23737 IX86_BUILTIN_CMPNGTPD,
23738 IX86_BUILTIN_CMPNGEPD,
23739 IX86_BUILTIN_CMPORDPD,
23740 IX86_BUILTIN_CMPUNORDPD,
23741 IX86_BUILTIN_CMPEQSD,
23742 IX86_BUILTIN_CMPLTSD,
23743 IX86_BUILTIN_CMPLESD,
23744 IX86_BUILTIN_CMPNEQSD,
23745 IX86_BUILTIN_CMPNLTSD,
23746 IX86_BUILTIN_CMPNLESD,
23747 IX86_BUILTIN_CMPORDSD,
23748 IX86_BUILTIN_CMPUNORDSD,
23750 IX86_BUILTIN_COMIEQSD,
23751 IX86_BUILTIN_COMILTSD,
23752 IX86_BUILTIN_COMILESD,
23753 IX86_BUILTIN_COMIGTSD,
23754 IX86_BUILTIN_COMIGESD,
23755 IX86_BUILTIN_COMINEQSD,
23756 IX86_BUILTIN_UCOMIEQSD,
23757 IX86_BUILTIN_UCOMILTSD,
23758 IX86_BUILTIN_UCOMILESD,
23759 IX86_BUILTIN_UCOMIGTSD,
23760 IX86_BUILTIN_UCOMIGESD,
23761 IX86_BUILTIN_UCOMINEQSD,
23763 IX86_BUILTIN_MAXPD,
23764 IX86_BUILTIN_MAXSD,
23765 IX86_BUILTIN_MINPD,
23766 IX86_BUILTIN_MINSD,
23768 IX86_BUILTIN_ANDPD,
23769 IX86_BUILTIN_ANDNPD,
23771 IX86_BUILTIN_XORPD,
23773 IX86_BUILTIN_SQRTPD,
23774 IX86_BUILTIN_SQRTSD,
23776 IX86_BUILTIN_UNPCKHPD,
23777 IX86_BUILTIN_UNPCKLPD,
23779 IX86_BUILTIN_SHUFPD,
23781 IX86_BUILTIN_LOADUPD,
23782 IX86_BUILTIN_STOREUPD,
23783 IX86_BUILTIN_MOVSD,
23785 IX86_BUILTIN_LOADHPD,
23786 IX86_BUILTIN_LOADLPD,
23788 IX86_BUILTIN_CVTDQ2PD,
23789 IX86_BUILTIN_CVTDQ2PS,
23791 IX86_BUILTIN_CVTPD2DQ,
23792 IX86_BUILTIN_CVTPD2PI,
23793 IX86_BUILTIN_CVTPD2PS,
23794 IX86_BUILTIN_CVTTPD2DQ,
23795 IX86_BUILTIN_CVTTPD2PI,
23797 IX86_BUILTIN_CVTPI2PD,
23798 IX86_BUILTIN_CVTSI2SD,
23799 IX86_BUILTIN_CVTSI642SD,
23801 IX86_BUILTIN_CVTSD2SI,
23802 IX86_BUILTIN_CVTSD2SI64,
23803 IX86_BUILTIN_CVTSD2SS,
23804 IX86_BUILTIN_CVTSS2SD,
23805 IX86_BUILTIN_CVTTSD2SI,
23806 IX86_BUILTIN_CVTTSD2SI64,
23808 IX86_BUILTIN_CVTPS2DQ,
23809 IX86_BUILTIN_CVTPS2PD,
23810 IX86_BUILTIN_CVTTPS2DQ,
23812 IX86_BUILTIN_MOVNTI,
23813 IX86_BUILTIN_MOVNTPD,
23814 IX86_BUILTIN_MOVNTDQ,
23816 IX86_BUILTIN_MOVQ128,
23819 IX86_BUILTIN_MASKMOVDQU,
23820 IX86_BUILTIN_MOVMSKPD,
23821 IX86_BUILTIN_PMOVMSKB128,
23823 IX86_BUILTIN_PACKSSWB128,
23824 IX86_BUILTIN_PACKSSDW128,
23825 IX86_BUILTIN_PACKUSWB128,
23827 IX86_BUILTIN_PADDB128,
23828 IX86_BUILTIN_PADDW128,
23829 IX86_BUILTIN_PADDD128,
23830 IX86_BUILTIN_PADDQ128,
23831 IX86_BUILTIN_PADDSB128,
23832 IX86_BUILTIN_PADDSW128,
23833 IX86_BUILTIN_PADDUSB128,
23834 IX86_BUILTIN_PADDUSW128,
23835 IX86_BUILTIN_PSUBB128,
23836 IX86_BUILTIN_PSUBW128,
23837 IX86_BUILTIN_PSUBD128,
23838 IX86_BUILTIN_PSUBQ128,
23839 IX86_BUILTIN_PSUBSB128,
23840 IX86_BUILTIN_PSUBSW128,
23841 IX86_BUILTIN_PSUBUSB128,
23842 IX86_BUILTIN_PSUBUSW128,
23844 IX86_BUILTIN_PAND128,
23845 IX86_BUILTIN_PANDN128,
23846 IX86_BUILTIN_POR128,
23847 IX86_BUILTIN_PXOR128,
23849 IX86_BUILTIN_PAVGB128,
23850 IX86_BUILTIN_PAVGW128,
23852 IX86_BUILTIN_PCMPEQB128,
23853 IX86_BUILTIN_PCMPEQW128,
23854 IX86_BUILTIN_PCMPEQD128,
23855 IX86_BUILTIN_PCMPGTB128,
23856 IX86_BUILTIN_PCMPGTW128,
23857 IX86_BUILTIN_PCMPGTD128,
23859 IX86_BUILTIN_PMADDWD128,
23861 IX86_BUILTIN_PMAXSW128,
23862 IX86_BUILTIN_PMAXUB128,
23863 IX86_BUILTIN_PMINSW128,
23864 IX86_BUILTIN_PMINUB128,
23866 IX86_BUILTIN_PMULUDQ,
23867 IX86_BUILTIN_PMULUDQ128,
23868 IX86_BUILTIN_PMULHUW128,
23869 IX86_BUILTIN_PMULHW128,
23870 IX86_BUILTIN_PMULLW128,
23872 IX86_BUILTIN_PSADBW128,
23873 IX86_BUILTIN_PSHUFHW,
23874 IX86_BUILTIN_PSHUFLW,
23875 IX86_BUILTIN_PSHUFD,
23877 IX86_BUILTIN_PSLLDQI128,
23878 IX86_BUILTIN_PSLLWI128,
23879 IX86_BUILTIN_PSLLDI128,
23880 IX86_BUILTIN_PSLLQI128,
23881 IX86_BUILTIN_PSRAWI128,
23882 IX86_BUILTIN_PSRADI128,
23883 IX86_BUILTIN_PSRLDQI128,
23884 IX86_BUILTIN_PSRLWI128,
23885 IX86_BUILTIN_PSRLDI128,
23886 IX86_BUILTIN_PSRLQI128,
23888 IX86_BUILTIN_PSLLDQ128,
23889 IX86_BUILTIN_PSLLW128,
23890 IX86_BUILTIN_PSLLD128,
23891 IX86_BUILTIN_PSLLQ128,
23892 IX86_BUILTIN_PSRAW128,
23893 IX86_BUILTIN_PSRAD128,
23894 IX86_BUILTIN_PSRLW128,
23895 IX86_BUILTIN_PSRLD128,
23896 IX86_BUILTIN_PSRLQ128,
23898 IX86_BUILTIN_PUNPCKHBW128,
23899 IX86_BUILTIN_PUNPCKHWD128,
23900 IX86_BUILTIN_PUNPCKHDQ128,
23901 IX86_BUILTIN_PUNPCKHQDQ128,
23902 IX86_BUILTIN_PUNPCKLBW128,
23903 IX86_BUILTIN_PUNPCKLWD128,
23904 IX86_BUILTIN_PUNPCKLDQ128,
23905 IX86_BUILTIN_PUNPCKLQDQ128,
23907 IX86_BUILTIN_CLFLUSH,
23908 IX86_BUILTIN_MFENCE,
23909 IX86_BUILTIN_LFENCE,
23911 IX86_BUILTIN_BSRSI,
23912 IX86_BUILTIN_BSRDI,
23913 IX86_BUILTIN_RDPMC,
23914 IX86_BUILTIN_RDTSC,
23915 IX86_BUILTIN_RDTSCP,
23916 IX86_BUILTIN_ROLQI,
23917 IX86_BUILTIN_ROLHI,
23918 IX86_BUILTIN_RORQI,
23919 IX86_BUILTIN_RORHI,
23922 IX86_BUILTIN_ADDSUBPS,
23923 IX86_BUILTIN_HADDPS,
23924 IX86_BUILTIN_HSUBPS,
23925 IX86_BUILTIN_MOVSHDUP,
23926 IX86_BUILTIN_MOVSLDUP,
23927 IX86_BUILTIN_ADDSUBPD,
23928 IX86_BUILTIN_HADDPD,
23929 IX86_BUILTIN_HSUBPD,
23930 IX86_BUILTIN_LDDQU,
23932 IX86_BUILTIN_MONITOR,
23933 IX86_BUILTIN_MWAIT,
23936 IX86_BUILTIN_PHADDW,
23937 IX86_BUILTIN_PHADDD,
23938 IX86_BUILTIN_PHADDSW,
23939 IX86_BUILTIN_PHSUBW,
23940 IX86_BUILTIN_PHSUBD,
23941 IX86_BUILTIN_PHSUBSW,
23942 IX86_BUILTIN_PMADDUBSW,
23943 IX86_BUILTIN_PMULHRSW,
23944 IX86_BUILTIN_PSHUFB,
23945 IX86_BUILTIN_PSIGNB,
23946 IX86_BUILTIN_PSIGNW,
23947 IX86_BUILTIN_PSIGND,
23948 IX86_BUILTIN_PALIGNR,
23949 IX86_BUILTIN_PABSB,
23950 IX86_BUILTIN_PABSW,
23951 IX86_BUILTIN_PABSD,
23953 IX86_BUILTIN_PHADDW128,
23954 IX86_BUILTIN_PHADDD128,
23955 IX86_BUILTIN_PHADDSW128,
23956 IX86_BUILTIN_PHSUBW128,
23957 IX86_BUILTIN_PHSUBD128,
23958 IX86_BUILTIN_PHSUBSW128,
23959 IX86_BUILTIN_PMADDUBSW128,
23960 IX86_BUILTIN_PMULHRSW128,
23961 IX86_BUILTIN_PSHUFB128,
23962 IX86_BUILTIN_PSIGNB128,
23963 IX86_BUILTIN_PSIGNW128,
23964 IX86_BUILTIN_PSIGND128,
23965 IX86_BUILTIN_PALIGNR128,
23966 IX86_BUILTIN_PABSB128,
23967 IX86_BUILTIN_PABSW128,
23968 IX86_BUILTIN_PABSD128,
23970 /* AMDFAM10 - SSE4A New Instructions. */
23971 IX86_BUILTIN_MOVNTSD,
23972 IX86_BUILTIN_MOVNTSS,
23973 IX86_BUILTIN_EXTRQI,
23974 IX86_BUILTIN_EXTRQ,
23975 IX86_BUILTIN_INSERTQI,
23976 IX86_BUILTIN_INSERTQ,
23979 IX86_BUILTIN_BLENDPD,
23980 IX86_BUILTIN_BLENDPS,
23981 IX86_BUILTIN_BLENDVPD,
23982 IX86_BUILTIN_BLENDVPS,
23983 IX86_BUILTIN_PBLENDVB128,
23984 IX86_BUILTIN_PBLENDW128,
23989 IX86_BUILTIN_INSERTPS128,
23991 IX86_BUILTIN_MOVNTDQA,
23992 IX86_BUILTIN_MPSADBW128,
23993 IX86_BUILTIN_PACKUSDW128,
23994 IX86_BUILTIN_PCMPEQQ,
23995 IX86_BUILTIN_PHMINPOSUW128,
23997 IX86_BUILTIN_PMAXSB128,
23998 IX86_BUILTIN_PMAXSD128,
23999 IX86_BUILTIN_PMAXUD128,
24000 IX86_BUILTIN_PMAXUW128,
24002 IX86_BUILTIN_PMINSB128,
24003 IX86_BUILTIN_PMINSD128,
24004 IX86_BUILTIN_PMINUD128,
24005 IX86_BUILTIN_PMINUW128,
24007 IX86_BUILTIN_PMOVSXBW128,
24008 IX86_BUILTIN_PMOVSXBD128,
24009 IX86_BUILTIN_PMOVSXBQ128,
24010 IX86_BUILTIN_PMOVSXWD128,
24011 IX86_BUILTIN_PMOVSXWQ128,
24012 IX86_BUILTIN_PMOVSXDQ128,
24014 IX86_BUILTIN_PMOVZXBW128,
24015 IX86_BUILTIN_PMOVZXBD128,
24016 IX86_BUILTIN_PMOVZXBQ128,
24017 IX86_BUILTIN_PMOVZXWD128,
24018 IX86_BUILTIN_PMOVZXWQ128,
24019 IX86_BUILTIN_PMOVZXDQ128,
24021 IX86_BUILTIN_PMULDQ128,
24022 IX86_BUILTIN_PMULLD128,
24024 IX86_BUILTIN_ROUNDPD,
24025 IX86_BUILTIN_ROUNDPS,
24026 IX86_BUILTIN_ROUNDSD,
24027 IX86_BUILTIN_ROUNDSS,
24029 IX86_BUILTIN_FLOORPD,
24030 IX86_BUILTIN_CEILPD,
24031 IX86_BUILTIN_TRUNCPD,
24032 IX86_BUILTIN_RINTPD,
24033 IX86_BUILTIN_FLOORPS,
24034 IX86_BUILTIN_CEILPS,
24035 IX86_BUILTIN_TRUNCPS,
24036 IX86_BUILTIN_RINTPS,
24038 IX86_BUILTIN_PTESTZ,
24039 IX86_BUILTIN_PTESTC,
24040 IX86_BUILTIN_PTESTNZC,
24042 IX86_BUILTIN_VEC_INIT_V2SI,
24043 IX86_BUILTIN_VEC_INIT_V4HI,
24044 IX86_BUILTIN_VEC_INIT_V8QI,
24045 IX86_BUILTIN_VEC_EXT_V2DF,
24046 IX86_BUILTIN_VEC_EXT_V2DI,
24047 IX86_BUILTIN_VEC_EXT_V4SF,
24048 IX86_BUILTIN_VEC_EXT_V4SI,
24049 IX86_BUILTIN_VEC_EXT_V8HI,
24050 IX86_BUILTIN_VEC_EXT_V2SI,
24051 IX86_BUILTIN_VEC_EXT_V4HI,
24052 IX86_BUILTIN_VEC_EXT_V16QI,
24053 IX86_BUILTIN_VEC_SET_V2DI,
24054 IX86_BUILTIN_VEC_SET_V4SF,
24055 IX86_BUILTIN_VEC_SET_V4SI,
24056 IX86_BUILTIN_VEC_SET_V8HI,
24057 IX86_BUILTIN_VEC_SET_V4HI,
24058 IX86_BUILTIN_VEC_SET_V16QI,
24060 IX86_BUILTIN_VEC_PACK_SFIX,
24063 IX86_BUILTIN_CRC32QI,
24064 IX86_BUILTIN_CRC32HI,
24065 IX86_BUILTIN_CRC32SI,
24066 IX86_BUILTIN_CRC32DI,
24068 IX86_BUILTIN_PCMPESTRI128,
24069 IX86_BUILTIN_PCMPESTRM128,
24070 IX86_BUILTIN_PCMPESTRA128,
24071 IX86_BUILTIN_PCMPESTRC128,
24072 IX86_BUILTIN_PCMPESTRO128,
24073 IX86_BUILTIN_PCMPESTRS128,
24074 IX86_BUILTIN_PCMPESTRZ128,
24075 IX86_BUILTIN_PCMPISTRI128,
24076 IX86_BUILTIN_PCMPISTRM128,
24077 IX86_BUILTIN_PCMPISTRA128,
24078 IX86_BUILTIN_PCMPISTRC128,
24079 IX86_BUILTIN_PCMPISTRO128,
24080 IX86_BUILTIN_PCMPISTRS128,
24081 IX86_BUILTIN_PCMPISTRZ128,
24083 IX86_BUILTIN_PCMPGTQ,
24085 /* AES instructions */
24086 IX86_BUILTIN_AESENC128,
24087 IX86_BUILTIN_AESENCLAST128,
24088 IX86_BUILTIN_AESDEC128,
24089 IX86_BUILTIN_AESDECLAST128,
24090 IX86_BUILTIN_AESIMC128,
24091 IX86_BUILTIN_AESKEYGENASSIST128,
24093 /* PCLMUL instruction */
24094 IX86_BUILTIN_PCLMULQDQ128,
24097 IX86_BUILTIN_ADDPD256,
24098 IX86_BUILTIN_ADDPS256,
24099 IX86_BUILTIN_ADDSUBPD256,
24100 IX86_BUILTIN_ADDSUBPS256,
24101 IX86_BUILTIN_ANDPD256,
24102 IX86_BUILTIN_ANDPS256,
24103 IX86_BUILTIN_ANDNPD256,
24104 IX86_BUILTIN_ANDNPS256,
24105 IX86_BUILTIN_BLENDPD256,
24106 IX86_BUILTIN_BLENDPS256,
24107 IX86_BUILTIN_BLENDVPD256,
24108 IX86_BUILTIN_BLENDVPS256,
24109 IX86_BUILTIN_DIVPD256,
24110 IX86_BUILTIN_DIVPS256,
24111 IX86_BUILTIN_DPPS256,
24112 IX86_BUILTIN_HADDPD256,
24113 IX86_BUILTIN_HADDPS256,
24114 IX86_BUILTIN_HSUBPD256,
24115 IX86_BUILTIN_HSUBPS256,
24116 IX86_BUILTIN_MAXPD256,
24117 IX86_BUILTIN_MAXPS256,
24118 IX86_BUILTIN_MINPD256,
24119 IX86_BUILTIN_MINPS256,
24120 IX86_BUILTIN_MULPD256,
24121 IX86_BUILTIN_MULPS256,
24122 IX86_BUILTIN_ORPD256,
24123 IX86_BUILTIN_ORPS256,
24124 IX86_BUILTIN_SHUFPD256,
24125 IX86_BUILTIN_SHUFPS256,
24126 IX86_BUILTIN_SUBPD256,
24127 IX86_BUILTIN_SUBPS256,
24128 IX86_BUILTIN_XORPD256,
24129 IX86_BUILTIN_XORPS256,
24130 IX86_BUILTIN_CMPSD,
24131 IX86_BUILTIN_CMPSS,
24132 IX86_BUILTIN_CMPPD,
24133 IX86_BUILTIN_CMPPS,
24134 IX86_BUILTIN_CMPPD256,
24135 IX86_BUILTIN_CMPPS256,
24136 IX86_BUILTIN_CVTDQ2PD256,
24137 IX86_BUILTIN_CVTDQ2PS256,
24138 IX86_BUILTIN_CVTPD2PS256,
24139 IX86_BUILTIN_CVTPS2DQ256,
24140 IX86_BUILTIN_CVTPS2PD256,
24141 IX86_BUILTIN_CVTTPD2DQ256,
24142 IX86_BUILTIN_CVTPD2DQ256,
24143 IX86_BUILTIN_CVTTPS2DQ256,
24144 IX86_BUILTIN_EXTRACTF128PD256,
24145 IX86_BUILTIN_EXTRACTF128PS256,
24146 IX86_BUILTIN_EXTRACTF128SI256,
24147 IX86_BUILTIN_VZEROALL,
24148 IX86_BUILTIN_VZEROUPPER,
24149 IX86_BUILTIN_VPERMILVARPD,
24150 IX86_BUILTIN_VPERMILVARPS,
24151 IX86_BUILTIN_VPERMILVARPD256,
24152 IX86_BUILTIN_VPERMILVARPS256,
24153 IX86_BUILTIN_VPERMILPD,
24154 IX86_BUILTIN_VPERMILPS,
24155 IX86_BUILTIN_VPERMILPD256,
24156 IX86_BUILTIN_VPERMILPS256,
24157 IX86_BUILTIN_VPERMIL2PD,
24158 IX86_BUILTIN_VPERMIL2PS,
24159 IX86_BUILTIN_VPERMIL2PD256,
24160 IX86_BUILTIN_VPERMIL2PS256,
24161 IX86_BUILTIN_VPERM2F128PD256,
24162 IX86_BUILTIN_VPERM2F128PS256,
24163 IX86_BUILTIN_VPERM2F128SI256,
24164 IX86_BUILTIN_VBROADCASTSS,
24165 IX86_BUILTIN_VBROADCASTSD256,
24166 IX86_BUILTIN_VBROADCASTSS256,
24167 IX86_BUILTIN_VBROADCASTPD256,
24168 IX86_BUILTIN_VBROADCASTPS256,
24169 IX86_BUILTIN_VINSERTF128PD256,
24170 IX86_BUILTIN_VINSERTF128PS256,
24171 IX86_BUILTIN_VINSERTF128SI256,
24172 IX86_BUILTIN_LOADUPD256,
24173 IX86_BUILTIN_LOADUPS256,
24174 IX86_BUILTIN_STOREUPD256,
24175 IX86_BUILTIN_STOREUPS256,
24176 IX86_BUILTIN_LDDQU256,
24177 IX86_BUILTIN_MOVNTDQ256,
24178 IX86_BUILTIN_MOVNTPD256,
24179 IX86_BUILTIN_MOVNTPS256,
24180 IX86_BUILTIN_LOADDQU256,
24181 IX86_BUILTIN_STOREDQU256,
24182 IX86_BUILTIN_MASKLOADPD,
24183 IX86_BUILTIN_MASKLOADPS,
24184 IX86_BUILTIN_MASKSTOREPD,
24185 IX86_BUILTIN_MASKSTOREPS,
24186 IX86_BUILTIN_MASKLOADPD256,
24187 IX86_BUILTIN_MASKLOADPS256,
24188 IX86_BUILTIN_MASKSTOREPD256,
24189 IX86_BUILTIN_MASKSTOREPS256,
24190 IX86_BUILTIN_MOVSHDUP256,
24191 IX86_BUILTIN_MOVSLDUP256,
24192 IX86_BUILTIN_MOVDDUP256,
24194 IX86_BUILTIN_SQRTPD256,
24195 IX86_BUILTIN_SQRTPS256,
24196 IX86_BUILTIN_SQRTPS_NR256,
24197 IX86_BUILTIN_RSQRTPS256,
24198 IX86_BUILTIN_RSQRTPS_NR256,
24200 IX86_BUILTIN_RCPPS256,
24202 IX86_BUILTIN_ROUNDPD256,
24203 IX86_BUILTIN_ROUNDPS256,
24205 IX86_BUILTIN_FLOORPD256,
24206 IX86_BUILTIN_CEILPD256,
24207 IX86_BUILTIN_TRUNCPD256,
24208 IX86_BUILTIN_RINTPD256,
24209 IX86_BUILTIN_FLOORPS256,
24210 IX86_BUILTIN_CEILPS256,
24211 IX86_BUILTIN_TRUNCPS256,
24212 IX86_BUILTIN_RINTPS256,
24214 IX86_BUILTIN_UNPCKHPD256,
24215 IX86_BUILTIN_UNPCKLPD256,
24216 IX86_BUILTIN_UNPCKHPS256,
24217 IX86_BUILTIN_UNPCKLPS256,
24219 IX86_BUILTIN_SI256_SI,
24220 IX86_BUILTIN_PS256_PS,
24221 IX86_BUILTIN_PD256_PD,
24222 IX86_BUILTIN_SI_SI256,
24223 IX86_BUILTIN_PS_PS256,
24224 IX86_BUILTIN_PD_PD256,
24226 IX86_BUILTIN_VTESTZPD,
24227 IX86_BUILTIN_VTESTCPD,
24228 IX86_BUILTIN_VTESTNZCPD,
24229 IX86_BUILTIN_VTESTZPS,
24230 IX86_BUILTIN_VTESTCPS,
24231 IX86_BUILTIN_VTESTNZCPS,
24232 IX86_BUILTIN_VTESTZPD256,
24233 IX86_BUILTIN_VTESTCPD256,
24234 IX86_BUILTIN_VTESTNZCPD256,
24235 IX86_BUILTIN_VTESTZPS256,
24236 IX86_BUILTIN_VTESTCPS256,
24237 IX86_BUILTIN_VTESTNZCPS256,
24238 IX86_BUILTIN_PTESTZ256,
24239 IX86_BUILTIN_PTESTC256,
24240 IX86_BUILTIN_PTESTNZC256,
24242 IX86_BUILTIN_MOVMSKPD256,
24243 IX86_BUILTIN_MOVMSKPS256,
24245 /* TFmode support builtins. */
24247 IX86_BUILTIN_HUGE_VALQ,
24248 IX86_BUILTIN_FABSQ,
24249 IX86_BUILTIN_COPYSIGNQ,
24251 /* Vectorizer support builtins. */
24252 IX86_BUILTIN_CPYSGNPS,
24253 IX86_BUILTIN_CPYSGNPD,
24254 IX86_BUILTIN_CPYSGNPS256,
24255 IX86_BUILTIN_CPYSGNPD256,
24257 IX86_BUILTIN_CVTUDQ2PS,
24259 IX86_BUILTIN_VEC_PERM_V2DF,
24260 IX86_BUILTIN_VEC_PERM_V4SF,
24261 IX86_BUILTIN_VEC_PERM_V2DI,
24262 IX86_BUILTIN_VEC_PERM_V4SI,
24263 IX86_BUILTIN_VEC_PERM_V8HI,
24264 IX86_BUILTIN_VEC_PERM_V16QI,
24265 IX86_BUILTIN_VEC_PERM_V2DI_U,
24266 IX86_BUILTIN_VEC_PERM_V4SI_U,
24267 IX86_BUILTIN_VEC_PERM_V8HI_U,
24268 IX86_BUILTIN_VEC_PERM_V16QI_U,
24269 IX86_BUILTIN_VEC_PERM_V4DF,
24270 IX86_BUILTIN_VEC_PERM_V8SF,
24272 /* FMA4 and XOP instructions. */
24273 IX86_BUILTIN_VFMADDSS,
24274 IX86_BUILTIN_VFMADDSD,
24275 IX86_BUILTIN_VFMADDPS,
24276 IX86_BUILTIN_VFMADDPD,
24277 IX86_BUILTIN_VFMADDPS256,
24278 IX86_BUILTIN_VFMADDPD256,
24279 IX86_BUILTIN_VFMADDSUBPS,
24280 IX86_BUILTIN_VFMADDSUBPD,
24281 IX86_BUILTIN_VFMADDSUBPS256,
24282 IX86_BUILTIN_VFMADDSUBPD256,
24284 IX86_BUILTIN_VPCMOV,
24285 IX86_BUILTIN_VPCMOV_V2DI,
24286 IX86_BUILTIN_VPCMOV_V4SI,
24287 IX86_BUILTIN_VPCMOV_V8HI,
24288 IX86_BUILTIN_VPCMOV_V16QI,
24289 IX86_BUILTIN_VPCMOV_V4SF,
24290 IX86_BUILTIN_VPCMOV_V2DF,
24291 IX86_BUILTIN_VPCMOV256,
24292 IX86_BUILTIN_VPCMOV_V4DI256,
24293 IX86_BUILTIN_VPCMOV_V8SI256,
24294 IX86_BUILTIN_VPCMOV_V16HI256,
24295 IX86_BUILTIN_VPCMOV_V32QI256,
24296 IX86_BUILTIN_VPCMOV_V8SF256,
24297 IX86_BUILTIN_VPCMOV_V4DF256,
24299 IX86_BUILTIN_VPPERM,
24301 IX86_BUILTIN_VPMACSSWW,
24302 IX86_BUILTIN_VPMACSWW,
24303 IX86_BUILTIN_VPMACSSWD,
24304 IX86_BUILTIN_VPMACSWD,
24305 IX86_BUILTIN_VPMACSSDD,
24306 IX86_BUILTIN_VPMACSDD,
24307 IX86_BUILTIN_VPMACSSDQL,
24308 IX86_BUILTIN_VPMACSSDQH,
24309 IX86_BUILTIN_VPMACSDQL,
24310 IX86_BUILTIN_VPMACSDQH,
24311 IX86_BUILTIN_VPMADCSSWD,
24312 IX86_BUILTIN_VPMADCSWD,
24314 IX86_BUILTIN_VPHADDBW,
24315 IX86_BUILTIN_VPHADDBD,
24316 IX86_BUILTIN_VPHADDBQ,
24317 IX86_BUILTIN_VPHADDWD,
24318 IX86_BUILTIN_VPHADDWQ,
24319 IX86_BUILTIN_VPHADDDQ,
24320 IX86_BUILTIN_VPHADDUBW,
24321 IX86_BUILTIN_VPHADDUBD,
24322 IX86_BUILTIN_VPHADDUBQ,
24323 IX86_BUILTIN_VPHADDUWD,
24324 IX86_BUILTIN_VPHADDUWQ,
24325 IX86_BUILTIN_VPHADDUDQ,
24326 IX86_BUILTIN_VPHSUBBW,
24327 IX86_BUILTIN_VPHSUBWD,
24328 IX86_BUILTIN_VPHSUBDQ,
24330 IX86_BUILTIN_VPROTB,
24331 IX86_BUILTIN_VPROTW,
24332 IX86_BUILTIN_VPROTD,
24333 IX86_BUILTIN_VPROTQ,
24334 IX86_BUILTIN_VPROTB_IMM,
24335 IX86_BUILTIN_VPROTW_IMM,
24336 IX86_BUILTIN_VPROTD_IMM,
24337 IX86_BUILTIN_VPROTQ_IMM,
24339 IX86_BUILTIN_VPSHLB,
24340 IX86_BUILTIN_VPSHLW,
24341 IX86_BUILTIN_VPSHLD,
24342 IX86_BUILTIN_VPSHLQ,
24343 IX86_BUILTIN_VPSHAB,
24344 IX86_BUILTIN_VPSHAW,
24345 IX86_BUILTIN_VPSHAD,
24346 IX86_BUILTIN_VPSHAQ,
24348 IX86_BUILTIN_VFRCZSS,
24349 IX86_BUILTIN_VFRCZSD,
24350 IX86_BUILTIN_VFRCZPS,
24351 IX86_BUILTIN_VFRCZPD,
24352 IX86_BUILTIN_VFRCZPS256,
24353 IX86_BUILTIN_VFRCZPD256,
24355 IX86_BUILTIN_VPCOMEQUB,
24356 IX86_BUILTIN_VPCOMNEUB,
24357 IX86_BUILTIN_VPCOMLTUB,
24358 IX86_BUILTIN_VPCOMLEUB,
24359 IX86_BUILTIN_VPCOMGTUB,
24360 IX86_BUILTIN_VPCOMGEUB,
24361 IX86_BUILTIN_VPCOMFALSEUB,
24362 IX86_BUILTIN_VPCOMTRUEUB,
24364 IX86_BUILTIN_VPCOMEQUW,
24365 IX86_BUILTIN_VPCOMNEUW,
24366 IX86_BUILTIN_VPCOMLTUW,
24367 IX86_BUILTIN_VPCOMLEUW,
24368 IX86_BUILTIN_VPCOMGTUW,
24369 IX86_BUILTIN_VPCOMGEUW,
24370 IX86_BUILTIN_VPCOMFALSEUW,
24371 IX86_BUILTIN_VPCOMTRUEUW,
24373 IX86_BUILTIN_VPCOMEQUD,
24374 IX86_BUILTIN_VPCOMNEUD,
24375 IX86_BUILTIN_VPCOMLTUD,
24376 IX86_BUILTIN_VPCOMLEUD,
24377 IX86_BUILTIN_VPCOMGTUD,
24378 IX86_BUILTIN_VPCOMGEUD,
24379 IX86_BUILTIN_VPCOMFALSEUD,
24380 IX86_BUILTIN_VPCOMTRUEUD,
24382 IX86_BUILTIN_VPCOMEQUQ,
24383 IX86_BUILTIN_VPCOMNEUQ,
24384 IX86_BUILTIN_VPCOMLTUQ,
24385 IX86_BUILTIN_VPCOMLEUQ,
24386 IX86_BUILTIN_VPCOMGTUQ,
24387 IX86_BUILTIN_VPCOMGEUQ,
24388 IX86_BUILTIN_VPCOMFALSEUQ,
24389 IX86_BUILTIN_VPCOMTRUEUQ,
24391 IX86_BUILTIN_VPCOMEQB,
24392 IX86_BUILTIN_VPCOMNEB,
24393 IX86_BUILTIN_VPCOMLTB,
24394 IX86_BUILTIN_VPCOMLEB,
24395 IX86_BUILTIN_VPCOMGTB,
24396 IX86_BUILTIN_VPCOMGEB,
24397 IX86_BUILTIN_VPCOMFALSEB,
24398 IX86_BUILTIN_VPCOMTRUEB,
24400 IX86_BUILTIN_VPCOMEQW,
24401 IX86_BUILTIN_VPCOMNEW,
24402 IX86_BUILTIN_VPCOMLTW,
24403 IX86_BUILTIN_VPCOMLEW,
24404 IX86_BUILTIN_VPCOMGTW,
24405 IX86_BUILTIN_VPCOMGEW,
24406 IX86_BUILTIN_VPCOMFALSEW,
24407 IX86_BUILTIN_VPCOMTRUEW,
24409 IX86_BUILTIN_VPCOMEQD,
24410 IX86_BUILTIN_VPCOMNED,
24411 IX86_BUILTIN_VPCOMLTD,
24412 IX86_BUILTIN_VPCOMLED,
24413 IX86_BUILTIN_VPCOMGTD,
24414 IX86_BUILTIN_VPCOMGED,
24415 IX86_BUILTIN_VPCOMFALSED,
24416 IX86_BUILTIN_VPCOMTRUED,
24418 IX86_BUILTIN_VPCOMEQQ,
24419 IX86_BUILTIN_VPCOMNEQ,
24420 IX86_BUILTIN_VPCOMLTQ,
24421 IX86_BUILTIN_VPCOMLEQ,
24422 IX86_BUILTIN_VPCOMGTQ,
24423 IX86_BUILTIN_VPCOMGEQ,
24424 IX86_BUILTIN_VPCOMFALSEQ,
24425 IX86_BUILTIN_VPCOMTRUEQ,
24427 /* LWP instructions. */
24428 IX86_BUILTIN_LLWPCB,
24429 IX86_BUILTIN_SLWPCB,
24430 IX86_BUILTIN_LWPVAL32,
24431 IX86_BUILTIN_LWPVAL64,
24432 IX86_BUILTIN_LWPINS32,
24433 IX86_BUILTIN_LWPINS64,
24437 /* BMI instructions. */
24438 IX86_BUILTIN_BEXTR32,
24439 IX86_BUILTIN_BEXTR64,
24442 /* TBM instructions. */
24443 IX86_BUILTIN_BEXTRI32,
24444 IX86_BUILTIN_BEXTRI64,
24447 /* FSGSBASE instructions. */
24448 IX86_BUILTIN_RDFSBASE32,
24449 IX86_BUILTIN_RDFSBASE64,
24450 IX86_BUILTIN_RDGSBASE32,
24451 IX86_BUILTIN_RDGSBASE64,
24452 IX86_BUILTIN_WRFSBASE32,
24453 IX86_BUILTIN_WRFSBASE64,
24454 IX86_BUILTIN_WRGSBASE32,
24455 IX86_BUILTIN_WRGSBASE64,
24457 /* RDRND instructions. */
24458 IX86_BUILTIN_RDRAND16_STEP,
24459 IX86_BUILTIN_RDRAND32_STEP,
24460 IX86_BUILTIN_RDRAND64_STEP,
24462 /* F16C instructions. */
24463 IX86_BUILTIN_CVTPH2PS,
24464 IX86_BUILTIN_CVTPH2PS256,
24465 IX86_BUILTIN_CVTPS2PH,
24466 IX86_BUILTIN_CVTPS2PH256,
24468 /* CFString built-in for darwin */
24469 IX86_BUILTIN_CFSTRING,
24474 /* Table for the ix86 builtin decls. */
24475 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24477 /* Table of all of the builtin functions that are possible with different ISA's
24478 but are waiting to be built until a function is declared to use that
24480 struct builtin_isa {
24481 const char *name; /* function name */
24482 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24483 int isa; /* isa_flags this builtin is defined for */
24484 bool const_p; /* true if the declaration is constant */
24485 bool set_and_not_built_p;
24488 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24491 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24492 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24493 function decl in the ix86_builtins array. Returns the function decl or
24494 NULL_TREE, if the builtin was not added.
24496 If the front end has a special hook for builtin functions, delay adding
24497 builtin functions that aren't in the current ISA until the ISA is changed
24498 with function specific optimization. Doing so, can save about 300K for the
24499 default compiler. When the builtin is expanded, check at that time whether
24502 If the front end doesn't have a special hook, record all builtins, even if
24503 it isn't an instruction set in the current ISA in case the user uses
24504 function specific options for a different ISA, so that we don't get scope
24505 errors if a builtin is added in the middle of a function scope. */
24508 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24509 enum ix86_builtins code)
24511 tree decl = NULL_TREE;
24513 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24515 ix86_builtins_isa[(int) code].isa = mask;
24517 mask &= ~OPTION_MASK_ISA_64BIT;
24519 || (mask & ix86_isa_flags) != 0
24520 || (lang_hooks.builtin_function
24521 == lang_hooks.builtin_function_ext_scope))
24524 tree type = ix86_get_builtin_func_type (tcode);
24525 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24527 ix86_builtins[(int) code] = decl;
24528 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24532 ix86_builtins[(int) code] = NULL_TREE;
24533 ix86_builtins_isa[(int) code].tcode = tcode;
24534 ix86_builtins_isa[(int) code].name = name;
24535 ix86_builtins_isa[(int) code].const_p = false;
24536 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24543 /* Like def_builtin, but also marks the function decl "const". */
24546 def_builtin_const (int mask, const char *name,
24547 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24549 tree decl = def_builtin (mask, name, tcode, code);
24551 TREE_READONLY (decl) = 1;
24553 ix86_builtins_isa[(int) code].const_p = true;
24558 /* Add any new builtin functions for a given ISA that may not have been
24559 declared. This saves a bit of space compared to adding all of the
24560 declarations to the tree, even if we didn't use them. */
24563 ix86_add_new_builtins (int isa)
24567 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24569 if ((ix86_builtins_isa[i].isa & isa) != 0
24570 && ix86_builtins_isa[i].set_and_not_built_p)
24574 /* Don't define the builtin again. */
24575 ix86_builtins_isa[i].set_and_not_built_p = false;
24577 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24578 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24579 type, i, BUILT_IN_MD, NULL,
24582 ix86_builtins[i] = decl;
24583 if (ix86_builtins_isa[i].const_p)
24584 TREE_READONLY (decl) = 1;
24589 /* Bits for builtin_description.flag. */
24591 /* Set when we don't support the comparison natively, and should
24592 swap_comparison in order to support it. */
24593 #define BUILTIN_DESC_SWAP_OPERANDS 1
24595 struct builtin_description
24597 const unsigned int mask;
24598 const enum insn_code icode;
24599 const char *const name;
24600 const enum ix86_builtins code;
24601 const enum rtx_code comparison;
24605 static const struct builtin_description bdesc_comi[] =
24607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24633 static const struct builtin_description bdesc_pcmpestr[] =
24636 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24637 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24638 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24639 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24640 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24641 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24642 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24645 static const struct builtin_description bdesc_pcmpistr[] =
24648 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24649 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24650 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24651 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24652 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24653 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24654 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24657 /* Special builtins with variable number of arguments. */
24658 static const struct builtin_description bdesc_special_args[] =
24660 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24661 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24664 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24667 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24672 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24674 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24679 /* SSE or 3DNow!A */
24680 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24681 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24698 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24704 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24705 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24711 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24712 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24713 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24738 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24739 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24740 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24741 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24742 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24743 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24746 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24747 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24748 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24749 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24750 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24751 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24752 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24753 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24756 /* Builtins with variable number of arguments. */
24757 static const struct builtin_description bdesc_args[] =
24759 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24760 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24761 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24762 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24763 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24764 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24765 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24768 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24769 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24782 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24832 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24833 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24834 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24835 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24837 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24838 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24839 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24840 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24841 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24842 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24843 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24844 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24845 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24846 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24847 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24848 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24849 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24850 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24851 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24854 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24855 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24856 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24857 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24858 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24859 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24864 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24866 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24870 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24873 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24877 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24878 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24879 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24909 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24910 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24914 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24916 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24917 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24919 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24924 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24925 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24929 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24931 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24937 /* SSE MMX or 3Dnow!A */
24938 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24939 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24940 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24942 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24943 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24944 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24945 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24947 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24948 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24950 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24955 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24956 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24957 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24958 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24959 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24962 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24965 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24966 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24985 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24986 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24993 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24994 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
25014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
25016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
25019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25023 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25027 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25029 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25032 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25035 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25036 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25038 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
25040 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25041 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25042 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25043 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25044 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25045 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25046 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25047 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25058 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25059 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
25061 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25063 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25064 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25076 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25077 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25078 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25081 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25082 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25083 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25084 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25085 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25086 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25087 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25088 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
25094 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
25097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
25098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
25102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
25103 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
25104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
25105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
25107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25108 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25109 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25110 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25111 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25112 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25113 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25116 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25117 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25118 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25119 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25120 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25121 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25123 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25124 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25125 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25126 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25134 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25135 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25140 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25141 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25144 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25145 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25147 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25148 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25149 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25150 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25151 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25152 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25155 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25156 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25157 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25158 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25159 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25160 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25162 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25163 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25164 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25171 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25172 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25188 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25189 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25192 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25193 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25194 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25196 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25197 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25198 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25199 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25200 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25201 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25206 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25207 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25210 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25212 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25213 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25231 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25232 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25233 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25234 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25236 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25237 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25238 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25239 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25241 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25242 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25243 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25244 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25246 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25247 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25248 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25251 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25252 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25253 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25254 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25255 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25258 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25259 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25260 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25261 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25264 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25265 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25267 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25268 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25269 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25270 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25273 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25276 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25277 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25280 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25284 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25291 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25292 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25293 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25294 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25295 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25296 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25297 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25298 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25299 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25300 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25301 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25347 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25349 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25351 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25376 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25377 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25378 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25399 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25400 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25402 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25405 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25406 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25407 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25410 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25411 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25414 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25415 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25416 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25417 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25420 /* FMA4 and XOP. */
25421 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25422 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25423 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25424 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25425 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25426 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25427 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25428 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25429 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25430 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25431 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25432 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25433 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25434 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25435 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25436 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25437 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25438 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25439 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25440 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25441 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25442 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25443 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25444 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25445 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25446 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25447 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25448 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25449 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25450 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25451 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25452 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25453 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25454 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25455 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25456 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25457 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25458 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25459 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25460 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25461 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25462 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25463 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25464 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25465 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25466 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25467 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25468 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25469 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25470 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25471 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25472 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25474 static const struct builtin_description bdesc_multi_arg[] =
25476 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25477 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25478 UNKNOWN, (int)MULTI_ARG_3_SF },
25479 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25480 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25481 UNKNOWN, (int)MULTI_ARG_3_DF },
25483 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25484 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25485 UNKNOWN, (int)MULTI_ARG_3_SF },
25486 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25487 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25488 UNKNOWN, (int)MULTI_ARG_3_DF },
25489 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25490 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25491 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25492 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25493 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25494 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25496 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25497 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25498 UNKNOWN, (int)MULTI_ARG_3_SF },
25499 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25500 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25501 UNKNOWN, (int)MULTI_ARG_3_DF },
25502 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25503 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25504 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25505 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25506 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25507 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25669 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25670 in the current target ISA to allow the user to compile particular modules
25671 with different target specific options that differ from the command line
25674 ix86_init_mmx_sse_builtins (void)
25676 const struct builtin_description * d;
25677 enum ix86_builtin_func_type ftype;
25680 /* Add all special builtins with variable number of operands. */
25681 for (i = 0, d = bdesc_special_args;
25682 i < ARRAY_SIZE (bdesc_special_args);
25688 ftype = (enum ix86_builtin_func_type) d->flag;
25689 def_builtin (d->mask, d->name, ftype, d->code);
25692 /* Add all builtins with variable number of operands. */
25693 for (i = 0, d = bdesc_args;
25694 i < ARRAY_SIZE (bdesc_args);
25700 ftype = (enum ix86_builtin_func_type) d->flag;
25701 def_builtin_const (d->mask, d->name, ftype, d->code);
25704 /* pcmpestr[im] insns. */
25705 for (i = 0, d = bdesc_pcmpestr;
25706 i < ARRAY_SIZE (bdesc_pcmpestr);
25709 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25710 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25712 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25713 def_builtin_const (d->mask, d->name, ftype, d->code);
25716 /* pcmpistr[im] insns. */
25717 for (i = 0, d = bdesc_pcmpistr;
25718 i < ARRAY_SIZE (bdesc_pcmpistr);
25721 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25722 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25724 ftype = INT_FTYPE_V16QI_V16QI_INT;
25725 def_builtin_const (d->mask, d->name, ftype, d->code);
25728 /* comi/ucomi insns. */
25729 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25731 if (d->mask == OPTION_MASK_ISA_SSE2)
25732 ftype = INT_FTYPE_V2DF_V2DF;
25734 ftype = INT_FTYPE_V4SF_V4SF;
25735 def_builtin_const (d->mask, d->name, ftype, d->code);
25739 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25740 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25741 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25742 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25744 /* SSE or 3DNow!A */
25745 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25746 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25747 IX86_BUILTIN_MASKMOVQ);
25750 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25751 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25753 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25754 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25755 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25756 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25759 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25760 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25761 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25762 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25765 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25766 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25767 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25768 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25769 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25770 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25771 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25772 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25773 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25774 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25775 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25776 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25779 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25780 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25783 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25784 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25785 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25786 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25787 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25788 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25789 IX86_BUILTIN_RDRAND64_STEP);
25791 /* MMX access to the vec_init patterns. */
25792 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25793 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25795 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25796 V4HI_FTYPE_HI_HI_HI_HI,
25797 IX86_BUILTIN_VEC_INIT_V4HI);
25799 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25800 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25801 IX86_BUILTIN_VEC_INIT_V8QI);
25803 /* Access to the vec_extract patterns. */
25804 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25805 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25806 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25807 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25808 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25809 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25810 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25811 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25812 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25813 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25815 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25816 "__builtin_ia32_vec_ext_v4hi",
25817 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25819 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25820 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25822 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25823 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25825 /* Access to the vec_set patterns. */
25826 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25827 "__builtin_ia32_vec_set_v2di",
25828 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25830 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25831 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25833 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25834 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25836 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25837 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25839 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25840 "__builtin_ia32_vec_set_v4hi",
25841 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25843 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25844 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25846 /* Add FMA4 multi-arg argument instructions */
25847 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25852 ftype = (enum ix86_builtin_func_type) d->flag;
25853 def_builtin_const (d->mask, d->name, ftype, d->code);
25857 /* Internal method for ix86_init_builtins. */
25860 ix86_init_builtins_va_builtins_abi (void)
25862 tree ms_va_ref, sysv_va_ref;
25863 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25864 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25865 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25866 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25870 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25871 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25872 ms_va_ref = build_reference_type (ms_va_list_type_node);
25874 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25877 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25878 fnvoid_va_start_ms =
25879 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25880 fnvoid_va_end_sysv =
25881 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25882 fnvoid_va_start_sysv =
25883 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25885 fnvoid_va_copy_ms =
25886 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25888 fnvoid_va_copy_sysv =
25889 build_function_type_list (void_type_node, sysv_va_ref,
25890 sysv_va_ref, NULL_TREE);
25892 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25893 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25894 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25895 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25896 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25897 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25898 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25899 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25900 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25901 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25902 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25903 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25907 ix86_init_builtin_types (void)
25909 tree float128_type_node, float80_type_node;
25911 /* The __float80 type. */
25912 float80_type_node = long_double_type_node;
25913 if (TYPE_MODE (float80_type_node) != XFmode)
25915 /* The __float80 type. */
25916 float80_type_node = make_node (REAL_TYPE);
25918 TYPE_PRECISION (float80_type_node) = 80;
25919 layout_type (float80_type_node);
25921 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25923 /* The __float128 type. */
25924 float128_type_node = make_node (REAL_TYPE);
25925 TYPE_PRECISION (float128_type_node) = 128;
25926 layout_type (float128_type_node);
25927 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25929 /* This macro is built by i386-builtin-types.awk. */
25930 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25934 ix86_init_builtins (void)
25938 ix86_init_builtin_types ();
25940 /* TFmode support builtins. */
25941 def_builtin_const (0, "__builtin_infq",
25942 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25943 def_builtin_const (0, "__builtin_huge_valq",
25944 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25946 /* We will expand them to normal call if SSE2 isn't available since
25947 they are used by libgcc. */
25948 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25949 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25950 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25951 TREE_READONLY (t) = 1;
25952 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25954 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25955 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25956 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25957 TREE_READONLY (t) = 1;
25958 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25960 ix86_init_mmx_sse_builtins ();
25963 ix86_init_builtins_va_builtins_abi ();
25965 #ifdef SUBTARGET_INIT_BUILTINS
25966 SUBTARGET_INIT_BUILTINS;
25970 /* Return the ix86 builtin for CODE. */
25973 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25975 if (code >= IX86_BUILTIN_MAX)
25976 return error_mark_node;
25978 return ix86_builtins[code];
25981 /* Errors in the source file can cause expand_expr to return const0_rtx
25982 where we expect a vector. To avoid crashing, use one of the vector
25983 clear instructions. */
25985 safe_vector_operand (rtx x, enum machine_mode mode)
25987 if (x == const0_rtx)
25988 x = CONST0_RTX (mode);
25992 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25995 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25998 tree arg0 = CALL_EXPR_ARG (exp, 0);
25999 tree arg1 = CALL_EXPR_ARG (exp, 1);
26000 rtx op0 = expand_normal (arg0);
26001 rtx op1 = expand_normal (arg1);
26002 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26003 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26004 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
26006 if (VECTOR_MODE_P (mode0))
26007 op0 = safe_vector_operand (op0, mode0);
26008 if (VECTOR_MODE_P (mode1))
26009 op1 = safe_vector_operand (op1, mode1);
26011 if (optimize || !target
26012 || GET_MODE (target) != tmode
26013 || !insn_data[icode].operand[0].predicate (target, tmode))
26014 target = gen_reg_rtx (tmode);
26016 if (GET_MODE (op1) == SImode && mode1 == TImode)
26018 rtx x = gen_reg_rtx (V4SImode);
26019 emit_insn (gen_sse2_loadd (x, op1));
26020 op1 = gen_lowpart (TImode, x);
26023 if (!insn_data[icode].operand[1].predicate (op0, mode0))
26024 op0 = copy_to_mode_reg (mode0, op0);
26025 if (!insn_data[icode].operand[2].predicate (op1, mode1))
26026 op1 = copy_to_mode_reg (mode1, op1);
26028 pat = GEN_FCN (icode) (target, op0, op1);
26037 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
26040 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
26041 enum ix86_builtin_func_type m_type,
26042 enum rtx_code sub_code)
26047 bool comparison_p = false;
26049 bool last_arg_constant = false;
26050 int num_memory = 0;
26053 enum machine_mode mode;
26056 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26060 case MULTI_ARG_4_DF2_DI_I:
26061 case MULTI_ARG_4_DF2_DI_I1:
26062 case MULTI_ARG_4_SF2_SI_I:
26063 case MULTI_ARG_4_SF2_SI_I1:
26065 last_arg_constant = true;
26068 case MULTI_ARG_3_SF:
26069 case MULTI_ARG_3_DF:
26070 case MULTI_ARG_3_SF2:
26071 case MULTI_ARG_3_DF2:
26072 case MULTI_ARG_3_DI:
26073 case MULTI_ARG_3_SI:
26074 case MULTI_ARG_3_SI_DI:
26075 case MULTI_ARG_3_HI:
26076 case MULTI_ARG_3_HI_SI:
26077 case MULTI_ARG_3_QI:
26078 case MULTI_ARG_3_DI2:
26079 case MULTI_ARG_3_SI2:
26080 case MULTI_ARG_3_HI2:
26081 case MULTI_ARG_3_QI2:
26085 case MULTI_ARG_2_SF:
26086 case MULTI_ARG_2_DF:
26087 case MULTI_ARG_2_DI:
26088 case MULTI_ARG_2_SI:
26089 case MULTI_ARG_2_HI:
26090 case MULTI_ARG_2_QI:
26094 case MULTI_ARG_2_DI_IMM:
26095 case MULTI_ARG_2_SI_IMM:
26096 case MULTI_ARG_2_HI_IMM:
26097 case MULTI_ARG_2_QI_IMM:
26099 last_arg_constant = true;
26102 case MULTI_ARG_1_SF:
26103 case MULTI_ARG_1_DF:
26104 case MULTI_ARG_1_SF2:
26105 case MULTI_ARG_1_DF2:
26106 case MULTI_ARG_1_DI:
26107 case MULTI_ARG_1_SI:
26108 case MULTI_ARG_1_HI:
26109 case MULTI_ARG_1_QI:
26110 case MULTI_ARG_1_SI_DI:
26111 case MULTI_ARG_1_HI_DI:
26112 case MULTI_ARG_1_HI_SI:
26113 case MULTI_ARG_1_QI_DI:
26114 case MULTI_ARG_1_QI_SI:
26115 case MULTI_ARG_1_QI_HI:
26119 case MULTI_ARG_2_DI_CMP:
26120 case MULTI_ARG_2_SI_CMP:
26121 case MULTI_ARG_2_HI_CMP:
26122 case MULTI_ARG_2_QI_CMP:
26124 comparison_p = true;
26127 case MULTI_ARG_2_SF_TF:
26128 case MULTI_ARG_2_DF_TF:
26129 case MULTI_ARG_2_DI_TF:
26130 case MULTI_ARG_2_SI_TF:
26131 case MULTI_ARG_2_HI_TF:
26132 case MULTI_ARG_2_QI_TF:
26138 gcc_unreachable ();
26141 if (optimize || !target
26142 || GET_MODE (target) != tmode
26143 || !insn_data[icode].operand[0].predicate (target, tmode))
26144 target = gen_reg_rtx (tmode);
26146 gcc_assert (nargs <= 4);
26148 for (i = 0; i < nargs; i++)
26150 tree arg = CALL_EXPR_ARG (exp, i);
26151 rtx op = expand_normal (arg);
26152 int adjust = (comparison_p) ? 1 : 0;
26153 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26155 if (last_arg_constant && i == nargs-1)
26157 if (!CONST_INT_P (op))
26159 error ("last argument must be an immediate");
26160 return gen_reg_rtx (tmode);
26165 if (VECTOR_MODE_P (mode))
26166 op = safe_vector_operand (op, mode);
26168 /* If we aren't optimizing, only allow one memory operand to be
26170 if (memory_operand (op, mode))
26173 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26176 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26178 op = force_reg (mode, op);
26182 args[i].mode = mode;
26188 pat = GEN_FCN (icode) (target, args[0].op);
26193 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26194 GEN_INT ((int)sub_code));
26195 else if (! comparison_p)
26196 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26199 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26203 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26208 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26212 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26216 gcc_unreachable ();
26226 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26227 insns with vec_merge. */
26230 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26234 tree arg0 = CALL_EXPR_ARG (exp, 0);
26235 rtx op1, op0 = expand_normal (arg0);
26236 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26237 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26239 if (optimize || !target
26240 || GET_MODE (target) != tmode
26241 || !insn_data[icode].operand[0].predicate (target, tmode))
26242 target = gen_reg_rtx (tmode);
26244 if (VECTOR_MODE_P (mode0))
26245 op0 = safe_vector_operand (op0, mode0);
26247 if ((optimize && !register_operand (op0, mode0))
26248 || !insn_data[icode].operand[1].predicate (op0, mode0))
26249 op0 = copy_to_mode_reg (mode0, op0);
26252 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26253 op1 = copy_to_mode_reg (mode0, op1);
26255 pat = GEN_FCN (icode) (target, op0, op1);
26262 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26265 ix86_expand_sse_compare (const struct builtin_description *d,
26266 tree exp, rtx target, bool swap)
26269 tree arg0 = CALL_EXPR_ARG (exp, 0);
26270 tree arg1 = CALL_EXPR_ARG (exp, 1);
26271 rtx op0 = expand_normal (arg0);
26272 rtx op1 = expand_normal (arg1);
26274 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26275 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26276 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26277 enum rtx_code comparison = d->comparison;
26279 if (VECTOR_MODE_P (mode0))
26280 op0 = safe_vector_operand (op0, mode0);
26281 if (VECTOR_MODE_P (mode1))
26282 op1 = safe_vector_operand (op1, mode1);
26284 /* Swap operands if we have a comparison that isn't available in
26288 rtx tmp = gen_reg_rtx (mode1);
26289 emit_move_insn (tmp, op1);
26294 if (optimize || !target
26295 || GET_MODE (target) != tmode
26296 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26297 target = gen_reg_rtx (tmode);
26299 if ((optimize && !register_operand (op0, mode0))
26300 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26301 op0 = copy_to_mode_reg (mode0, op0);
26302 if ((optimize && !register_operand (op1, mode1))
26303 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26304 op1 = copy_to_mode_reg (mode1, op1);
26306 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26307 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26314 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26317 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26321 tree arg0 = CALL_EXPR_ARG (exp, 0);
26322 tree arg1 = CALL_EXPR_ARG (exp, 1);
26323 rtx op0 = expand_normal (arg0);
26324 rtx op1 = expand_normal (arg1);
26325 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26326 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26327 enum rtx_code comparison = d->comparison;
26329 if (VECTOR_MODE_P (mode0))
26330 op0 = safe_vector_operand (op0, mode0);
26331 if (VECTOR_MODE_P (mode1))
26332 op1 = safe_vector_operand (op1, mode1);
26334 /* Swap operands if we have a comparison that isn't available in
26336 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26343 target = gen_reg_rtx (SImode);
26344 emit_move_insn (target, const0_rtx);
26345 target = gen_rtx_SUBREG (QImode, target, 0);
26347 if ((optimize && !register_operand (op0, mode0))
26348 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26349 op0 = copy_to_mode_reg (mode0, op0);
26350 if ((optimize && !register_operand (op1, mode1))
26351 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26352 op1 = copy_to_mode_reg (mode1, op1);
26354 pat = GEN_FCN (d->icode) (op0, op1);
26358 emit_insn (gen_rtx_SET (VOIDmode,
26359 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26360 gen_rtx_fmt_ee (comparison, QImode,
26364 return SUBREG_REG (target);
26367 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26370 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26374 tree arg0 = CALL_EXPR_ARG (exp, 0);
26375 rtx op1, op0 = expand_normal (arg0);
26376 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26377 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26379 if (optimize || target == 0
26380 || GET_MODE (target) != tmode
26381 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26382 target = gen_reg_rtx (tmode);
26384 if (VECTOR_MODE_P (mode0))
26385 op0 = safe_vector_operand (op0, mode0);
26387 if ((optimize && !register_operand (op0, mode0))
26388 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26389 op0 = copy_to_mode_reg (mode0, op0);
26391 op1 = GEN_INT (d->comparison);
26393 pat = GEN_FCN (d->icode) (target, op0, op1);
26400 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26403 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26407 tree arg0 = CALL_EXPR_ARG (exp, 0);
26408 tree arg1 = CALL_EXPR_ARG (exp, 1);
26409 rtx op0 = expand_normal (arg0);
26410 rtx op1 = expand_normal (arg1);
26411 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26412 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26413 enum rtx_code comparison = d->comparison;
26415 if (VECTOR_MODE_P (mode0))
26416 op0 = safe_vector_operand (op0, mode0);
26417 if (VECTOR_MODE_P (mode1))
26418 op1 = safe_vector_operand (op1, mode1);
26420 target = gen_reg_rtx (SImode);
26421 emit_move_insn (target, const0_rtx);
26422 target = gen_rtx_SUBREG (QImode, target, 0);
26424 if ((optimize && !register_operand (op0, mode0))
26425 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26426 op0 = copy_to_mode_reg (mode0, op0);
26427 if ((optimize && !register_operand (op1, mode1))
26428 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26429 op1 = copy_to_mode_reg (mode1, op1);
26431 pat = GEN_FCN (d->icode) (op0, op1);
26435 emit_insn (gen_rtx_SET (VOIDmode,
26436 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26437 gen_rtx_fmt_ee (comparison, QImode,
26441 return SUBREG_REG (target);
26444 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26447 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26448 tree exp, rtx target)
26451 tree arg0 = CALL_EXPR_ARG (exp, 0);
26452 tree arg1 = CALL_EXPR_ARG (exp, 1);
26453 tree arg2 = CALL_EXPR_ARG (exp, 2);
26454 tree arg3 = CALL_EXPR_ARG (exp, 3);
26455 tree arg4 = CALL_EXPR_ARG (exp, 4);
26456 rtx scratch0, scratch1;
26457 rtx op0 = expand_normal (arg0);
26458 rtx op1 = expand_normal (arg1);
26459 rtx op2 = expand_normal (arg2);
26460 rtx op3 = expand_normal (arg3);
26461 rtx op4 = expand_normal (arg4);
26462 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26464 tmode0 = insn_data[d->icode].operand[0].mode;
26465 tmode1 = insn_data[d->icode].operand[1].mode;
26466 modev2 = insn_data[d->icode].operand[2].mode;
26467 modei3 = insn_data[d->icode].operand[3].mode;
26468 modev4 = insn_data[d->icode].operand[4].mode;
26469 modei5 = insn_data[d->icode].operand[5].mode;
26470 modeimm = insn_data[d->icode].operand[6].mode;
26472 if (VECTOR_MODE_P (modev2))
26473 op0 = safe_vector_operand (op0, modev2);
26474 if (VECTOR_MODE_P (modev4))
26475 op2 = safe_vector_operand (op2, modev4);
26477 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26478 op0 = copy_to_mode_reg (modev2, op0);
26479 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26480 op1 = copy_to_mode_reg (modei3, op1);
26481 if ((optimize && !register_operand (op2, modev4))
26482 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26483 op2 = copy_to_mode_reg (modev4, op2);
26484 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26485 op3 = copy_to_mode_reg (modei5, op3);
26487 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26489 error ("the fifth argument must be a 8-bit immediate");
26493 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26495 if (optimize || !target
26496 || GET_MODE (target) != tmode0
26497 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26498 target = gen_reg_rtx (tmode0);
26500 scratch1 = gen_reg_rtx (tmode1);
26502 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26504 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26506 if (optimize || !target
26507 || GET_MODE (target) != tmode1
26508 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26509 target = gen_reg_rtx (tmode1);
26511 scratch0 = gen_reg_rtx (tmode0);
26513 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26517 gcc_assert (d->flag);
26519 scratch0 = gen_reg_rtx (tmode0);
26520 scratch1 = gen_reg_rtx (tmode1);
26522 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26532 target = gen_reg_rtx (SImode);
26533 emit_move_insn (target, const0_rtx);
26534 target = gen_rtx_SUBREG (QImode, target, 0);
26537 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26538 gen_rtx_fmt_ee (EQ, QImode,
26539 gen_rtx_REG ((enum machine_mode) d->flag,
26542 return SUBREG_REG (target);
26549 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26552 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26553 tree exp, rtx target)
26556 tree arg0 = CALL_EXPR_ARG (exp, 0);
26557 tree arg1 = CALL_EXPR_ARG (exp, 1);
26558 tree arg2 = CALL_EXPR_ARG (exp, 2);
26559 rtx scratch0, scratch1;
26560 rtx op0 = expand_normal (arg0);
26561 rtx op1 = expand_normal (arg1);
26562 rtx op2 = expand_normal (arg2);
26563 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26565 tmode0 = insn_data[d->icode].operand[0].mode;
26566 tmode1 = insn_data[d->icode].operand[1].mode;
26567 modev2 = insn_data[d->icode].operand[2].mode;
26568 modev3 = insn_data[d->icode].operand[3].mode;
26569 modeimm = insn_data[d->icode].operand[4].mode;
26571 if (VECTOR_MODE_P (modev2))
26572 op0 = safe_vector_operand (op0, modev2);
26573 if (VECTOR_MODE_P (modev3))
26574 op1 = safe_vector_operand (op1, modev3);
26576 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26577 op0 = copy_to_mode_reg (modev2, op0);
26578 if ((optimize && !register_operand (op1, modev3))
26579 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26580 op1 = copy_to_mode_reg (modev3, op1);
26582 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26584 error ("the third argument must be a 8-bit immediate");
26588 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26590 if (optimize || !target
26591 || GET_MODE (target) != tmode0
26592 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26593 target = gen_reg_rtx (tmode0);
26595 scratch1 = gen_reg_rtx (tmode1);
26597 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26599 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26601 if (optimize || !target
26602 || GET_MODE (target) != tmode1
26603 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26604 target = gen_reg_rtx (tmode1);
26606 scratch0 = gen_reg_rtx (tmode0);
26608 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26612 gcc_assert (d->flag);
26614 scratch0 = gen_reg_rtx (tmode0);
26615 scratch1 = gen_reg_rtx (tmode1);
26617 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26627 target = gen_reg_rtx (SImode);
26628 emit_move_insn (target, const0_rtx);
26629 target = gen_rtx_SUBREG (QImode, target, 0);
26632 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26633 gen_rtx_fmt_ee (EQ, QImode,
26634 gen_rtx_REG ((enum machine_mode) d->flag,
26637 return SUBREG_REG (target);
26643 /* Subroutine of ix86_expand_builtin to take care of insns with
26644 variable number of operands. */
26647 ix86_expand_args_builtin (const struct builtin_description *d,
26648 tree exp, rtx target)
26650 rtx pat, real_target;
26651 unsigned int i, nargs;
26652 unsigned int nargs_constant = 0;
26653 int num_memory = 0;
26657 enum machine_mode mode;
26659 bool last_arg_count = false;
26660 enum insn_code icode = d->icode;
26661 const struct insn_data_d *insn_p = &insn_data[icode];
26662 enum machine_mode tmode = insn_p->operand[0].mode;
26663 enum machine_mode rmode = VOIDmode;
26665 enum rtx_code comparison = d->comparison;
26667 switch ((enum ix86_builtin_func_type) d->flag)
26669 case V2DF_FTYPE_V2DF_ROUND:
26670 case V4DF_FTYPE_V4DF_ROUND:
26671 case V4SF_FTYPE_V4SF_ROUND:
26672 case V8SF_FTYPE_V8SF_ROUND:
26673 return ix86_expand_sse_round (d, exp, target);
26674 case INT_FTYPE_V8SF_V8SF_PTEST:
26675 case INT_FTYPE_V4DI_V4DI_PTEST:
26676 case INT_FTYPE_V4DF_V4DF_PTEST:
26677 case INT_FTYPE_V4SF_V4SF_PTEST:
26678 case INT_FTYPE_V2DI_V2DI_PTEST:
26679 case INT_FTYPE_V2DF_V2DF_PTEST:
26680 return ix86_expand_sse_ptest (d, exp, target);
26681 case FLOAT128_FTYPE_FLOAT128:
26682 case FLOAT_FTYPE_FLOAT:
26683 case INT_FTYPE_INT:
26684 case UINT64_FTYPE_INT:
26685 case UINT16_FTYPE_UINT16:
26686 case INT64_FTYPE_INT64:
26687 case INT64_FTYPE_V4SF:
26688 case INT64_FTYPE_V2DF:
26689 case INT_FTYPE_V16QI:
26690 case INT_FTYPE_V8QI:
26691 case INT_FTYPE_V8SF:
26692 case INT_FTYPE_V4DF:
26693 case INT_FTYPE_V4SF:
26694 case INT_FTYPE_V2DF:
26695 case V16QI_FTYPE_V16QI:
26696 case V8SI_FTYPE_V8SF:
26697 case V8SI_FTYPE_V4SI:
26698 case V8HI_FTYPE_V8HI:
26699 case V8HI_FTYPE_V16QI:
26700 case V8QI_FTYPE_V8QI:
26701 case V8SF_FTYPE_V8SF:
26702 case V8SF_FTYPE_V8SI:
26703 case V8SF_FTYPE_V4SF:
26704 case V8SF_FTYPE_V8HI:
26705 case V4SI_FTYPE_V4SI:
26706 case V4SI_FTYPE_V16QI:
26707 case V4SI_FTYPE_V4SF:
26708 case V4SI_FTYPE_V8SI:
26709 case V4SI_FTYPE_V8HI:
26710 case V4SI_FTYPE_V4DF:
26711 case V4SI_FTYPE_V2DF:
26712 case V4HI_FTYPE_V4HI:
26713 case V4DF_FTYPE_V4DF:
26714 case V4DF_FTYPE_V4SI:
26715 case V4DF_FTYPE_V4SF:
26716 case V4DF_FTYPE_V2DF:
26717 case V4SF_FTYPE_V4SF:
26718 case V4SF_FTYPE_V4SI:
26719 case V4SF_FTYPE_V8SF:
26720 case V4SF_FTYPE_V4DF:
26721 case V4SF_FTYPE_V8HI:
26722 case V4SF_FTYPE_V2DF:
26723 case V2DI_FTYPE_V2DI:
26724 case V2DI_FTYPE_V16QI:
26725 case V2DI_FTYPE_V8HI:
26726 case V2DI_FTYPE_V4SI:
26727 case V2DF_FTYPE_V2DF:
26728 case V2DF_FTYPE_V4SI:
26729 case V2DF_FTYPE_V4DF:
26730 case V2DF_FTYPE_V4SF:
26731 case V2DF_FTYPE_V2SI:
26732 case V2SI_FTYPE_V2SI:
26733 case V2SI_FTYPE_V4SF:
26734 case V2SI_FTYPE_V2SF:
26735 case V2SI_FTYPE_V2DF:
26736 case V2SF_FTYPE_V2SF:
26737 case V2SF_FTYPE_V2SI:
26740 case V4SF_FTYPE_V4SF_VEC_MERGE:
26741 case V2DF_FTYPE_V2DF_VEC_MERGE:
26742 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26743 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26744 case V16QI_FTYPE_V16QI_V16QI:
26745 case V16QI_FTYPE_V8HI_V8HI:
26746 case V8QI_FTYPE_V8QI_V8QI:
26747 case V8QI_FTYPE_V4HI_V4HI:
26748 case V8HI_FTYPE_V8HI_V8HI:
26749 case V8HI_FTYPE_V16QI_V16QI:
26750 case V8HI_FTYPE_V4SI_V4SI:
26751 case V8SF_FTYPE_V8SF_V8SF:
26752 case V8SF_FTYPE_V8SF_V8SI:
26753 case V4SI_FTYPE_V4SI_V4SI:
26754 case V4SI_FTYPE_V8HI_V8HI:
26755 case V4SI_FTYPE_V4SF_V4SF:
26756 case V4SI_FTYPE_V2DF_V2DF:
26757 case V4HI_FTYPE_V4HI_V4HI:
26758 case V4HI_FTYPE_V8QI_V8QI:
26759 case V4HI_FTYPE_V2SI_V2SI:
26760 case V4DF_FTYPE_V4DF_V4DF:
26761 case V4DF_FTYPE_V4DF_V4DI:
26762 case V4SF_FTYPE_V4SF_V4SF:
26763 case V4SF_FTYPE_V4SF_V4SI:
26764 case V4SF_FTYPE_V4SF_V2SI:
26765 case V4SF_FTYPE_V4SF_V2DF:
26766 case V4SF_FTYPE_V4SF_DI:
26767 case V4SF_FTYPE_V4SF_SI:
26768 case V2DI_FTYPE_V2DI_V2DI:
26769 case V2DI_FTYPE_V16QI_V16QI:
26770 case V2DI_FTYPE_V4SI_V4SI:
26771 case V2DI_FTYPE_V2DI_V16QI:
26772 case V2DI_FTYPE_V2DF_V2DF:
26773 case V2SI_FTYPE_V2SI_V2SI:
26774 case V2SI_FTYPE_V4HI_V4HI:
26775 case V2SI_FTYPE_V2SF_V2SF:
26776 case V2DF_FTYPE_V2DF_V2DF:
26777 case V2DF_FTYPE_V2DF_V4SF:
26778 case V2DF_FTYPE_V2DF_V2DI:
26779 case V2DF_FTYPE_V2DF_DI:
26780 case V2DF_FTYPE_V2DF_SI:
26781 case V2SF_FTYPE_V2SF_V2SF:
26782 case V1DI_FTYPE_V1DI_V1DI:
26783 case V1DI_FTYPE_V8QI_V8QI:
26784 case V1DI_FTYPE_V2SI_V2SI:
26785 if (comparison == UNKNOWN)
26786 return ix86_expand_binop_builtin (icode, exp, target);
26789 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26790 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26791 gcc_assert (comparison != UNKNOWN);
26795 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26796 case V8HI_FTYPE_V8HI_SI_COUNT:
26797 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26798 case V4SI_FTYPE_V4SI_SI_COUNT:
26799 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26800 case V4HI_FTYPE_V4HI_SI_COUNT:
26801 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26802 case V2DI_FTYPE_V2DI_SI_COUNT:
26803 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26804 case V2SI_FTYPE_V2SI_SI_COUNT:
26805 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26806 case V1DI_FTYPE_V1DI_SI_COUNT:
26808 last_arg_count = true;
26810 case UINT64_FTYPE_UINT64_UINT64:
26811 case UINT_FTYPE_UINT_UINT:
26812 case UINT_FTYPE_UINT_USHORT:
26813 case UINT_FTYPE_UINT_UCHAR:
26814 case UINT16_FTYPE_UINT16_INT:
26815 case UINT8_FTYPE_UINT8_INT:
26818 case V2DI_FTYPE_V2DI_INT_CONVERT:
26821 nargs_constant = 1;
26823 case V8HI_FTYPE_V8HI_INT:
26824 case V8HI_FTYPE_V8SF_INT:
26825 case V8HI_FTYPE_V4SF_INT:
26826 case V8SF_FTYPE_V8SF_INT:
26827 case V4SI_FTYPE_V4SI_INT:
26828 case V4SI_FTYPE_V8SI_INT:
26829 case V4HI_FTYPE_V4HI_INT:
26830 case V4DF_FTYPE_V4DF_INT:
26831 case V4SF_FTYPE_V4SF_INT:
26832 case V4SF_FTYPE_V8SF_INT:
26833 case V2DI_FTYPE_V2DI_INT:
26834 case V2DF_FTYPE_V2DF_INT:
26835 case V2DF_FTYPE_V4DF_INT:
26837 nargs_constant = 1;
26839 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26840 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26841 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26842 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26843 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26846 case V16QI_FTYPE_V16QI_V16QI_INT:
26847 case V8HI_FTYPE_V8HI_V8HI_INT:
26848 case V8SI_FTYPE_V8SI_V8SI_INT:
26849 case V8SI_FTYPE_V8SI_V4SI_INT:
26850 case V8SF_FTYPE_V8SF_V8SF_INT:
26851 case V8SF_FTYPE_V8SF_V4SF_INT:
26852 case V4SI_FTYPE_V4SI_V4SI_INT:
26853 case V4DF_FTYPE_V4DF_V4DF_INT:
26854 case V4DF_FTYPE_V4DF_V2DF_INT:
26855 case V4SF_FTYPE_V4SF_V4SF_INT:
26856 case V2DI_FTYPE_V2DI_V2DI_INT:
26857 case V2DF_FTYPE_V2DF_V2DF_INT:
26859 nargs_constant = 1;
26861 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26864 nargs_constant = 1;
26866 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26869 nargs_constant = 1;
26871 case V2DI_FTYPE_V2DI_UINT_UINT:
26873 nargs_constant = 2;
26875 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26876 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26877 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26878 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26880 nargs_constant = 1;
26882 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26884 nargs_constant = 2;
26887 gcc_unreachable ();
26890 gcc_assert (nargs <= ARRAY_SIZE (args));
26892 if (comparison != UNKNOWN)
26894 gcc_assert (nargs == 2);
26895 return ix86_expand_sse_compare (d, exp, target, swap);
26898 if (rmode == VOIDmode || rmode == tmode)
26902 || GET_MODE (target) != tmode
26903 || !insn_p->operand[0].predicate (target, tmode))
26904 target = gen_reg_rtx (tmode);
26905 real_target = target;
26909 target = gen_reg_rtx (rmode);
26910 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26913 for (i = 0; i < nargs; i++)
26915 tree arg = CALL_EXPR_ARG (exp, i);
26916 rtx op = expand_normal (arg);
26917 enum machine_mode mode = insn_p->operand[i + 1].mode;
26918 bool match = insn_p->operand[i + 1].predicate (op, mode);
26920 if (last_arg_count && (i + 1) == nargs)
26922 /* SIMD shift insns take either an 8-bit immediate or
26923 register as count. But builtin functions take int as
26924 count. If count doesn't match, we put it in register. */
26927 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26928 if (!insn_p->operand[i + 1].predicate (op, mode))
26929 op = copy_to_reg (op);
26932 else if ((nargs - i) <= nargs_constant)
26937 case CODE_FOR_sse4_1_roundpd:
26938 case CODE_FOR_sse4_1_roundps:
26939 case CODE_FOR_sse4_1_roundsd:
26940 case CODE_FOR_sse4_1_roundss:
26941 case CODE_FOR_sse4_1_blendps:
26942 case CODE_FOR_avx_blendpd256:
26943 case CODE_FOR_avx_vpermilv4df:
26944 case CODE_FOR_avx_roundpd256:
26945 case CODE_FOR_avx_roundps256:
26946 error ("the last argument must be a 4-bit immediate");
26949 case CODE_FOR_sse4_1_blendpd:
26950 case CODE_FOR_avx_vpermilv2df:
26951 case CODE_FOR_xop_vpermil2v2df3:
26952 case CODE_FOR_xop_vpermil2v4sf3:
26953 case CODE_FOR_xop_vpermil2v4df3:
26954 case CODE_FOR_xop_vpermil2v8sf3:
26955 error ("the last argument must be a 2-bit immediate");
26958 case CODE_FOR_avx_vextractf128v4df:
26959 case CODE_FOR_avx_vextractf128v8sf:
26960 case CODE_FOR_avx_vextractf128v8si:
26961 case CODE_FOR_avx_vinsertf128v4df:
26962 case CODE_FOR_avx_vinsertf128v8sf:
26963 case CODE_FOR_avx_vinsertf128v8si:
26964 error ("the last argument must be a 1-bit immediate");
26967 case CODE_FOR_avx_vmcmpv2df3:
26968 case CODE_FOR_avx_vmcmpv4sf3:
26969 case CODE_FOR_avx_cmpv2df3:
26970 case CODE_FOR_avx_cmpv4sf3:
26971 case CODE_FOR_avx_cmpv4df3:
26972 case CODE_FOR_avx_cmpv8sf3:
26973 error ("the last argument must be a 5-bit immediate");
26977 switch (nargs_constant)
26980 if ((nargs - i) == nargs_constant)
26982 error ("the next to last argument must be an 8-bit immediate");
26986 error ("the last argument must be an 8-bit immediate");
26989 gcc_unreachable ();
26996 if (VECTOR_MODE_P (mode))
26997 op = safe_vector_operand (op, mode);
26999 /* If we aren't optimizing, only allow one memory operand to
27001 if (memory_operand (op, mode))
27004 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
27006 if (optimize || !match || num_memory > 1)
27007 op = copy_to_mode_reg (mode, op);
27011 op = copy_to_reg (op);
27012 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
27017 args[i].mode = mode;
27023 pat = GEN_FCN (icode) (real_target, args[0].op);
27026 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
27029 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27033 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
27034 args[2].op, args[3].op);
27037 gcc_unreachable ();
27047 /* Subroutine of ix86_expand_builtin to take care of special insns
27048 with variable number of operands. */
27051 ix86_expand_special_args_builtin (const struct builtin_description *d,
27052 tree exp, rtx target)
27056 unsigned int i, nargs, arg_adjust, memory;
27060 enum machine_mode mode;
27062 enum insn_code icode = d->icode;
27063 bool last_arg_constant = false;
27064 const struct insn_data_d *insn_p = &insn_data[icode];
27065 enum machine_mode tmode = insn_p->operand[0].mode;
27066 enum { load, store } klass;
27068 switch ((enum ix86_builtin_func_type) d->flag)
27070 case VOID_FTYPE_VOID:
27071 if (icode == CODE_FOR_avx_vzeroupper)
27072 target = GEN_INT (vzeroupper_intrinsic);
27073 emit_insn (GEN_FCN (icode) (target));
27075 case VOID_FTYPE_UINT64:
27076 case VOID_FTYPE_UNSIGNED:
27082 case UINT64_FTYPE_VOID:
27083 case UNSIGNED_FTYPE_VOID:
27088 case UINT64_FTYPE_PUNSIGNED:
27089 case V2DI_FTYPE_PV2DI:
27090 case V32QI_FTYPE_PCCHAR:
27091 case V16QI_FTYPE_PCCHAR:
27092 case V8SF_FTYPE_PCV4SF:
27093 case V8SF_FTYPE_PCFLOAT:
27094 case V4SF_FTYPE_PCFLOAT:
27095 case V4DF_FTYPE_PCV2DF:
27096 case V4DF_FTYPE_PCDOUBLE:
27097 case V2DF_FTYPE_PCDOUBLE:
27098 case VOID_FTYPE_PVOID:
27103 case VOID_FTYPE_PV2SF_V4SF:
27104 case VOID_FTYPE_PV4DI_V4DI:
27105 case VOID_FTYPE_PV2DI_V2DI:
27106 case VOID_FTYPE_PCHAR_V32QI:
27107 case VOID_FTYPE_PCHAR_V16QI:
27108 case VOID_FTYPE_PFLOAT_V8SF:
27109 case VOID_FTYPE_PFLOAT_V4SF:
27110 case VOID_FTYPE_PDOUBLE_V4DF:
27111 case VOID_FTYPE_PDOUBLE_V2DF:
27112 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27113 case VOID_FTYPE_PINT_INT:
27116 /* Reserve memory operand for target. */
27117 memory = ARRAY_SIZE (args);
27119 case V4SF_FTYPE_V4SF_PCV2SF:
27120 case V2DF_FTYPE_V2DF_PCDOUBLE:
27125 case V8SF_FTYPE_PCV8SF_V8SI:
27126 case V4DF_FTYPE_PCV4DF_V4DI:
27127 case V4SF_FTYPE_PCV4SF_V4SI:
27128 case V2DF_FTYPE_PCV2DF_V2DI:
27133 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27134 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27135 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27136 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27139 /* Reserve memory operand for target. */
27140 memory = ARRAY_SIZE (args);
27142 case VOID_FTYPE_UINT_UINT_UINT:
27143 case VOID_FTYPE_UINT64_UINT_UINT:
27144 case UCHAR_FTYPE_UINT_UINT_UINT:
27145 case UCHAR_FTYPE_UINT64_UINT_UINT:
27148 memory = ARRAY_SIZE (args);
27149 last_arg_constant = true;
27152 gcc_unreachable ();
27155 gcc_assert (nargs <= ARRAY_SIZE (args));
27157 if (klass == store)
27159 arg = CALL_EXPR_ARG (exp, 0);
27160 op = expand_normal (arg);
27161 gcc_assert (target == 0);
27163 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27165 target = force_reg (tmode, op);
27173 || GET_MODE (target) != tmode
27174 || !insn_p->operand[0].predicate (target, tmode))
27175 target = gen_reg_rtx (tmode);
27178 for (i = 0; i < nargs; i++)
27180 enum machine_mode mode = insn_p->operand[i + 1].mode;
27183 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27184 op = expand_normal (arg);
27185 match = insn_p->operand[i + 1].predicate (op, mode);
27187 if (last_arg_constant && (i + 1) == nargs)
27191 if (icode == CODE_FOR_lwp_lwpvalsi3
27192 || icode == CODE_FOR_lwp_lwpinssi3
27193 || icode == CODE_FOR_lwp_lwpvaldi3
27194 || icode == CODE_FOR_lwp_lwpinsdi3)
27195 error ("the last argument must be a 32-bit immediate");
27197 error ("the last argument must be an 8-bit immediate");
27205 /* This must be the memory operand. */
27206 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27207 gcc_assert (GET_MODE (op) == mode
27208 || GET_MODE (op) == VOIDmode);
27212 /* This must be register. */
27213 if (VECTOR_MODE_P (mode))
27214 op = safe_vector_operand (op, mode);
27216 gcc_assert (GET_MODE (op) == mode
27217 || GET_MODE (op) == VOIDmode);
27218 op = copy_to_mode_reg (mode, op);
27223 args[i].mode = mode;
27229 pat = GEN_FCN (icode) (target);
27232 pat = GEN_FCN (icode) (target, args[0].op);
27235 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27238 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27241 gcc_unreachable ();
27247 return klass == store ? 0 : target;
27250 /* Return the integer constant in ARG. Constrain it to be in the range
27251 of the subparts of VEC_TYPE; issue an error if not. */
27254 get_element_number (tree vec_type, tree arg)
27256 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27258 if (!host_integerp (arg, 1)
27259 || (elt = tree_low_cst (arg, 1), elt > max))
27261 error ("selector must be an integer constant in the range 0..%wi", max);
27268 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27269 ix86_expand_vector_init. We DO have language-level syntax for this, in
27270 the form of (type){ init-list }. Except that since we can't place emms
27271 instructions from inside the compiler, we can't allow the use of MMX
27272 registers unless the user explicitly asks for it. So we do *not* define
27273 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27274 we have builtins invoked by mmintrin.h that gives us license to emit
27275 these sorts of instructions. */
27278 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27280 enum machine_mode tmode = TYPE_MODE (type);
27281 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27282 int i, n_elt = GET_MODE_NUNITS (tmode);
27283 rtvec v = rtvec_alloc (n_elt);
27285 gcc_assert (VECTOR_MODE_P (tmode));
27286 gcc_assert (call_expr_nargs (exp) == n_elt);
27288 for (i = 0; i < n_elt; ++i)
27290 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27291 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27294 if (!target || !register_operand (target, tmode))
27295 target = gen_reg_rtx (tmode);
27297 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27301 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27302 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27303 had a language-level syntax for referencing vector elements. */
27306 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27308 enum machine_mode tmode, mode0;
27313 arg0 = CALL_EXPR_ARG (exp, 0);
27314 arg1 = CALL_EXPR_ARG (exp, 1);
27316 op0 = expand_normal (arg0);
27317 elt = get_element_number (TREE_TYPE (arg0), arg1);
27319 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27320 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27321 gcc_assert (VECTOR_MODE_P (mode0));
27323 op0 = force_reg (mode0, op0);
27325 if (optimize || !target || !register_operand (target, tmode))
27326 target = gen_reg_rtx (tmode);
27328 ix86_expand_vector_extract (true, target, op0, elt);
27333 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27334 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27335 a language-level syntax for referencing vector elements. */
27338 ix86_expand_vec_set_builtin (tree exp)
27340 enum machine_mode tmode, mode1;
27341 tree arg0, arg1, arg2;
27343 rtx op0, op1, target;
27345 arg0 = CALL_EXPR_ARG (exp, 0);
27346 arg1 = CALL_EXPR_ARG (exp, 1);
27347 arg2 = CALL_EXPR_ARG (exp, 2);
27349 tmode = TYPE_MODE (TREE_TYPE (arg0));
27350 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27351 gcc_assert (VECTOR_MODE_P (tmode));
27353 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27354 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27355 elt = get_element_number (TREE_TYPE (arg0), arg2);
27357 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27358 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27360 op0 = force_reg (tmode, op0);
27361 op1 = force_reg (mode1, op1);
27363 /* OP0 is the source of these builtin functions and shouldn't be
27364 modified. Create a copy, use it and return it as target. */
27365 target = gen_reg_rtx (tmode);
27366 emit_move_insn (target, op0);
27367 ix86_expand_vector_set (true, target, op1, elt);
27372 /* Expand an expression EXP that calls a built-in function,
27373 with result going to TARGET if that's convenient
27374 (and in mode MODE if that's convenient).
27375 SUBTARGET may be used as the target for computing one of EXP's operands.
27376 IGNORE is nonzero if the value is to be ignored. */
27379 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27380 enum machine_mode mode ATTRIBUTE_UNUSED,
27381 int ignore ATTRIBUTE_UNUSED)
27383 const struct builtin_description *d;
27385 enum insn_code icode;
27386 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27387 tree arg0, arg1, arg2;
27388 rtx op0, op1, op2, pat;
27389 enum machine_mode mode0, mode1, mode2;
27390 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27392 /* Determine whether the builtin function is available under the current ISA.
27393 Originally the builtin was not created if it wasn't applicable to the
27394 current ISA based on the command line switches. With function specific
27395 options, we need to check in the context of the function making the call
27396 whether it is supported. */
27397 if (ix86_builtins_isa[fcode].isa
27398 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27400 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27401 NULL, NULL, false);
27404 error ("%qE needs unknown isa option", fndecl);
27407 gcc_assert (opts != NULL);
27408 error ("%qE needs isa option %s", fndecl, opts);
27416 case IX86_BUILTIN_MASKMOVQ:
27417 case IX86_BUILTIN_MASKMOVDQU:
27418 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27419 ? CODE_FOR_mmx_maskmovq
27420 : CODE_FOR_sse2_maskmovdqu);
27421 /* Note the arg order is different from the operand order. */
27422 arg1 = CALL_EXPR_ARG (exp, 0);
27423 arg2 = CALL_EXPR_ARG (exp, 1);
27424 arg0 = CALL_EXPR_ARG (exp, 2);
27425 op0 = expand_normal (arg0);
27426 op1 = expand_normal (arg1);
27427 op2 = expand_normal (arg2);
27428 mode0 = insn_data[icode].operand[0].mode;
27429 mode1 = insn_data[icode].operand[1].mode;
27430 mode2 = insn_data[icode].operand[2].mode;
27432 op0 = force_reg (Pmode, op0);
27433 op0 = gen_rtx_MEM (mode1, op0);
27435 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27436 op0 = copy_to_mode_reg (mode0, op0);
27437 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27438 op1 = copy_to_mode_reg (mode1, op1);
27439 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27440 op2 = copy_to_mode_reg (mode2, op2);
27441 pat = GEN_FCN (icode) (op0, op1, op2);
27447 case IX86_BUILTIN_LDMXCSR:
27448 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27449 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27450 emit_move_insn (target, op0);
27451 emit_insn (gen_sse_ldmxcsr (target));
27454 case IX86_BUILTIN_STMXCSR:
27455 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27456 emit_insn (gen_sse_stmxcsr (target));
27457 return copy_to_mode_reg (SImode, target);
27459 case IX86_BUILTIN_CLFLUSH:
27460 arg0 = CALL_EXPR_ARG (exp, 0);
27461 op0 = expand_normal (arg0);
27462 icode = CODE_FOR_sse2_clflush;
27463 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27464 op0 = copy_to_mode_reg (Pmode, op0);
27466 emit_insn (gen_sse2_clflush (op0));
27469 case IX86_BUILTIN_MONITOR:
27470 arg0 = CALL_EXPR_ARG (exp, 0);
27471 arg1 = CALL_EXPR_ARG (exp, 1);
27472 arg2 = CALL_EXPR_ARG (exp, 2);
27473 op0 = expand_normal (arg0);
27474 op1 = expand_normal (arg1);
27475 op2 = expand_normal (arg2);
27477 op0 = copy_to_mode_reg (Pmode, op0);
27479 op1 = copy_to_mode_reg (SImode, op1);
27481 op2 = copy_to_mode_reg (SImode, op2);
27482 emit_insn (ix86_gen_monitor (op0, op1, op2));
27485 case IX86_BUILTIN_MWAIT:
27486 arg0 = CALL_EXPR_ARG (exp, 0);
27487 arg1 = CALL_EXPR_ARG (exp, 1);
27488 op0 = expand_normal (arg0);
27489 op1 = expand_normal (arg1);
27491 op0 = copy_to_mode_reg (SImode, op0);
27493 op1 = copy_to_mode_reg (SImode, op1);
27494 emit_insn (gen_sse3_mwait (op0, op1));
27497 case IX86_BUILTIN_VEC_INIT_V2SI:
27498 case IX86_BUILTIN_VEC_INIT_V4HI:
27499 case IX86_BUILTIN_VEC_INIT_V8QI:
27500 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27502 case IX86_BUILTIN_VEC_EXT_V2DF:
27503 case IX86_BUILTIN_VEC_EXT_V2DI:
27504 case IX86_BUILTIN_VEC_EXT_V4SF:
27505 case IX86_BUILTIN_VEC_EXT_V4SI:
27506 case IX86_BUILTIN_VEC_EXT_V8HI:
27507 case IX86_BUILTIN_VEC_EXT_V2SI:
27508 case IX86_BUILTIN_VEC_EXT_V4HI:
27509 case IX86_BUILTIN_VEC_EXT_V16QI:
27510 return ix86_expand_vec_ext_builtin (exp, target);
27512 case IX86_BUILTIN_VEC_SET_V2DI:
27513 case IX86_BUILTIN_VEC_SET_V4SF:
27514 case IX86_BUILTIN_VEC_SET_V4SI:
27515 case IX86_BUILTIN_VEC_SET_V8HI:
27516 case IX86_BUILTIN_VEC_SET_V4HI:
27517 case IX86_BUILTIN_VEC_SET_V16QI:
27518 return ix86_expand_vec_set_builtin (exp);
27520 case IX86_BUILTIN_VEC_PERM_V2DF:
27521 case IX86_BUILTIN_VEC_PERM_V4SF:
27522 case IX86_BUILTIN_VEC_PERM_V2DI:
27523 case IX86_BUILTIN_VEC_PERM_V4SI:
27524 case IX86_BUILTIN_VEC_PERM_V8HI:
27525 case IX86_BUILTIN_VEC_PERM_V16QI:
27526 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27527 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27528 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27529 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27530 case IX86_BUILTIN_VEC_PERM_V4DF:
27531 case IX86_BUILTIN_VEC_PERM_V8SF:
27532 return ix86_expand_vec_perm_builtin (exp);
27534 case IX86_BUILTIN_INFQ:
27535 case IX86_BUILTIN_HUGE_VALQ:
27537 REAL_VALUE_TYPE inf;
27541 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27543 tmp = validize_mem (force_const_mem (mode, tmp));
27546 target = gen_reg_rtx (mode);
27548 emit_move_insn (target, tmp);
27552 case IX86_BUILTIN_LLWPCB:
27553 arg0 = CALL_EXPR_ARG (exp, 0);
27554 op0 = expand_normal (arg0);
27555 icode = CODE_FOR_lwp_llwpcb;
27556 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27557 op0 = copy_to_mode_reg (Pmode, op0);
27558 emit_insn (gen_lwp_llwpcb (op0));
27561 case IX86_BUILTIN_SLWPCB:
27562 icode = CODE_FOR_lwp_slwpcb;
27564 || !insn_data[icode].operand[0].predicate (target, Pmode))
27565 target = gen_reg_rtx (Pmode);
27566 emit_insn (gen_lwp_slwpcb (target));
27569 case IX86_BUILTIN_BEXTRI32:
27570 case IX86_BUILTIN_BEXTRI64:
27571 arg0 = CALL_EXPR_ARG (exp, 0);
27572 arg1 = CALL_EXPR_ARG (exp, 1);
27573 op0 = expand_normal (arg0);
27574 op1 = expand_normal (arg1);
27575 icode = (fcode == IX86_BUILTIN_BEXTRI32
27576 ? CODE_FOR_tbm_bextri_si
27577 : CODE_FOR_tbm_bextri_di);
27578 if (!CONST_INT_P (op1))
27580 error ("last argument must be an immediate");
27585 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27586 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27587 op1 = GEN_INT (length);
27588 op2 = GEN_INT (lsb_index);
27589 pat = GEN_FCN (icode) (target, op0, op1, op2);
27595 case IX86_BUILTIN_RDRAND16_STEP:
27596 icode = CODE_FOR_rdrandhi_1;
27600 case IX86_BUILTIN_RDRAND32_STEP:
27601 icode = CODE_FOR_rdrandsi_1;
27605 case IX86_BUILTIN_RDRAND64_STEP:
27606 icode = CODE_FOR_rdranddi_1;
27610 op0 = gen_reg_rtx (mode0);
27611 emit_insn (GEN_FCN (icode) (op0));
27613 op1 = gen_reg_rtx (SImode);
27614 emit_move_insn (op1, CONST1_RTX (SImode));
27616 /* Emit SImode conditional move. */
27617 if (mode0 == HImode)
27619 op2 = gen_reg_rtx (SImode);
27620 emit_insn (gen_zero_extendhisi2 (op2, op0));
27622 else if (mode0 == SImode)
27625 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27627 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27629 emit_insn (gen_rtx_SET (VOIDmode, op1,
27630 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27631 emit_move_insn (target, op1);
27633 arg0 = CALL_EXPR_ARG (exp, 0);
27634 op1 = expand_normal (arg0);
27635 if (!address_operand (op1, VOIDmode))
27636 op1 = copy_addr_to_reg (op1);
27637 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27644 for (i = 0, d = bdesc_special_args;
27645 i < ARRAY_SIZE (bdesc_special_args);
27647 if (d->code == fcode)
27648 return ix86_expand_special_args_builtin (d, exp, target);
27650 for (i = 0, d = bdesc_args;
27651 i < ARRAY_SIZE (bdesc_args);
27653 if (d->code == fcode)
27656 case IX86_BUILTIN_FABSQ:
27657 case IX86_BUILTIN_COPYSIGNQ:
27659 /* Emit a normal call if SSE2 isn't available. */
27660 return expand_call (exp, target, ignore);
27662 return ix86_expand_args_builtin (d, exp, target);
27665 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27666 if (d->code == fcode)
27667 return ix86_expand_sse_comi (d, exp, target);
27669 for (i = 0, d = bdesc_pcmpestr;
27670 i < ARRAY_SIZE (bdesc_pcmpestr);
27672 if (d->code == fcode)
27673 return ix86_expand_sse_pcmpestr (d, exp, target);
27675 for (i = 0, d = bdesc_pcmpistr;
27676 i < ARRAY_SIZE (bdesc_pcmpistr);
27678 if (d->code == fcode)
27679 return ix86_expand_sse_pcmpistr (d, exp, target);
27681 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27682 if (d->code == fcode)
27683 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27684 (enum ix86_builtin_func_type)
27685 d->flag, d->comparison);
27687 gcc_unreachable ();
27690 /* Returns a function decl for a vectorized version of the builtin function
27691 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27692 if it is not available. */
27695 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27698 enum machine_mode in_mode, out_mode;
27700 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27702 if (TREE_CODE (type_out) != VECTOR_TYPE
27703 || TREE_CODE (type_in) != VECTOR_TYPE
27704 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27707 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27708 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27709 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27710 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27714 case BUILT_IN_SQRT:
27715 if (out_mode == DFmode && in_mode == DFmode)
27717 if (out_n == 2 && in_n == 2)
27718 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27719 else if (out_n == 4 && in_n == 4)
27720 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27724 case BUILT_IN_SQRTF:
27725 if (out_mode == SFmode && in_mode == SFmode)
27727 if (out_n == 4 && in_n == 4)
27728 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27729 else if (out_n == 8 && in_n == 8)
27730 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27734 case BUILT_IN_LRINT:
27735 if (out_mode == SImode && out_n == 4
27736 && in_mode == DFmode && in_n == 2)
27737 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27740 case BUILT_IN_LRINTF:
27741 if (out_mode == SImode && in_mode == SFmode)
27743 if (out_n == 4 && in_n == 4)
27744 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27745 else if (out_n == 8 && in_n == 8)
27746 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27750 case BUILT_IN_COPYSIGN:
27751 if (out_mode == DFmode && in_mode == DFmode)
27753 if (out_n == 2 && in_n == 2)
27754 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27755 else if (out_n == 4 && in_n == 4)
27756 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27760 case BUILT_IN_COPYSIGNF:
27761 if (out_mode == SFmode && in_mode == SFmode)
27763 if (out_n == 4 && in_n == 4)
27764 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27765 else if (out_n == 8 && in_n == 8)
27766 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27770 case BUILT_IN_FLOOR:
27771 /* The round insn does not trap on denormals. */
27772 if (flag_trapping_math || !TARGET_ROUND)
27775 if (out_mode == DFmode && in_mode == DFmode)
27777 if (out_n == 2 && in_n == 2)
27778 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27779 else if (out_n == 4 && in_n == 4)
27780 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27784 case BUILT_IN_FLOORF:
27785 /* The round insn does not trap on denormals. */
27786 if (flag_trapping_math || !TARGET_ROUND)
27789 if (out_mode == SFmode && in_mode == SFmode)
27791 if (out_n == 4 && in_n == 4)
27792 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27793 else if (out_n == 8 && in_n == 8)
27794 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27798 case BUILT_IN_CEIL:
27799 /* The round insn does not trap on denormals. */
27800 if (flag_trapping_math || !TARGET_ROUND)
27803 if (out_mode == DFmode && in_mode == DFmode)
27805 if (out_n == 2 && in_n == 2)
27806 return ix86_builtins[IX86_BUILTIN_CEILPD];
27807 else if (out_n == 4 && in_n == 4)
27808 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27812 case BUILT_IN_CEILF:
27813 /* The round insn does not trap on denormals. */
27814 if (flag_trapping_math || !TARGET_ROUND)
27817 if (out_mode == SFmode && in_mode == SFmode)
27819 if (out_n == 4 && in_n == 4)
27820 return ix86_builtins[IX86_BUILTIN_CEILPS];
27821 else if (out_n == 8 && in_n == 8)
27822 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27826 case BUILT_IN_TRUNC:
27827 /* The round insn does not trap on denormals. */
27828 if (flag_trapping_math || !TARGET_ROUND)
27831 if (out_mode == DFmode && in_mode == DFmode)
27833 if (out_n == 2 && in_n == 2)
27834 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27835 else if (out_n == 4 && in_n == 4)
27836 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27840 case BUILT_IN_TRUNCF:
27841 /* The round insn does not trap on denormals. */
27842 if (flag_trapping_math || !TARGET_ROUND)
27845 if (out_mode == SFmode && in_mode == SFmode)
27847 if (out_n == 4 && in_n == 4)
27848 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27849 else if (out_n == 8 && in_n == 8)
27850 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27854 case BUILT_IN_RINT:
27855 /* The round insn does not trap on denormals. */
27856 if (flag_trapping_math || !TARGET_ROUND)
27859 if (out_mode == DFmode && in_mode == DFmode)
27861 if (out_n == 2 && in_n == 2)
27862 return ix86_builtins[IX86_BUILTIN_RINTPD];
27863 else if (out_n == 4 && in_n == 4)
27864 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27868 case BUILT_IN_RINTF:
27869 /* The round insn does not trap on denormals. */
27870 if (flag_trapping_math || !TARGET_ROUND)
27873 if (out_mode == SFmode && in_mode == SFmode)
27875 if (out_n == 4 && in_n == 4)
27876 return ix86_builtins[IX86_BUILTIN_RINTPS];
27877 else if (out_n == 8 && in_n == 8)
27878 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27883 if (out_mode == DFmode && in_mode == DFmode)
27885 if (out_n == 2 && in_n == 2)
27886 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27887 if (out_n == 4 && in_n == 4)
27888 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27892 case BUILT_IN_FMAF:
27893 if (out_mode == SFmode && in_mode == SFmode)
27895 if (out_n == 4 && in_n == 4)
27896 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27897 if (out_n == 8 && in_n == 8)
27898 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27906 /* Dispatch to a handler for a vectorization library. */
27907 if (ix86_veclib_handler)
27908 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27914 /* Handler for an SVML-style interface to
27915 a library with vectorized intrinsics. */
27918 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27921 tree fntype, new_fndecl, args;
27924 enum machine_mode el_mode, in_mode;
27927 /* The SVML is suitable for unsafe math only. */
27928 if (!flag_unsafe_math_optimizations)
27931 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27932 n = TYPE_VECTOR_SUBPARTS (type_out);
27933 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27934 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27935 if (el_mode != in_mode
27943 case BUILT_IN_LOG10:
27945 case BUILT_IN_TANH:
27947 case BUILT_IN_ATAN:
27948 case BUILT_IN_ATAN2:
27949 case BUILT_IN_ATANH:
27950 case BUILT_IN_CBRT:
27951 case BUILT_IN_SINH:
27953 case BUILT_IN_ASINH:
27954 case BUILT_IN_ASIN:
27955 case BUILT_IN_COSH:
27957 case BUILT_IN_ACOSH:
27958 case BUILT_IN_ACOS:
27959 if (el_mode != DFmode || n != 2)
27963 case BUILT_IN_EXPF:
27964 case BUILT_IN_LOGF:
27965 case BUILT_IN_LOG10F:
27966 case BUILT_IN_POWF:
27967 case BUILT_IN_TANHF:
27968 case BUILT_IN_TANF:
27969 case BUILT_IN_ATANF:
27970 case BUILT_IN_ATAN2F:
27971 case BUILT_IN_ATANHF:
27972 case BUILT_IN_CBRTF:
27973 case BUILT_IN_SINHF:
27974 case BUILT_IN_SINF:
27975 case BUILT_IN_ASINHF:
27976 case BUILT_IN_ASINF:
27977 case BUILT_IN_COSHF:
27978 case BUILT_IN_COSF:
27979 case BUILT_IN_ACOSHF:
27980 case BUILT_IN_ACOSF:
27981 if (el_mode != SFmode || n != 4)
27989 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27991 if (fn == BUILT_IN_LOGF)
27992 strcpy (name, "vmlsLn4");
27993 else if (fn == BUILT_IN_LOG)
27994 strcpy (name, "vmldLn2");
27997 sprintf (name, "vmls%s", bname+10);
27998 name[strlen (name)-1] = '4';
28001 sprintf (name, "vmld%s2", bname+10);
28003 /* Convert to uppercase. */
28007 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28008 args = TREE_CHAIN (args))
28012 fntype = build_function_type_list (type_out, type_in, NULL);
28014 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28016 /* Build a function declaration for the vectorized function. */
28017 new_fndecl = build_decl (BUILTINS_LOCATION,
28018 FUNCTION_DECL, get_identifier (name), fntype);
28019 TREE_PUBLIC (new_fndecl) = 1;
28020 DECL_EXTERNAL (new_fndecl) = 1;
28021 DECL_IS_NOVOPS (new_fndecl) = 1;
28022 TREE_READONLY (new_fndecl) = 1;
28027 /* Handler for an ACML-style interface to
28028 a library with vectorized intrinsics. */
28031 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
28033 char name[20] = "__vr.._";
28034 tree fntype, new_fndecl, args;
28037 enum machine_mode el_mode, in_mode;
28040 /* The ACML is 64bits only and suitable for unsafe math only as
28041 it does not correctly support parts of IEEE with the required
28042 precision such as denormals. */
28044 || !flag_unsafe_math_optimizations)
28047 el_mode = TYPE_MODE (TREE_TYPE (type_out));
28048 n = TYPE_VECTOR_SUBPARTS (type_out);
28049 in_mode = TYPE_MODE (TREE_TYPE (type_in));
28050 in_n = TYPE_VECTOR_SUBPARTS (type_in);
28051 if (el_mode != in_mode
28061 case BUILT_IN_LOG2:
28062 case BUILT_IN_LOG10:
28065 if (el_mode != DFmode
28070 case BUILT_IN_SINF:
28071 case BUILT_IN_COSF:
28072 case BUILT_IN_EXPF:
28073 case BUILT_IN_POWF:
28074 case BUILT_IN_LOGF:
28075 case BUILT_IN_LOG2F:
28076 case BUILT_IN_LOG10F:
28079 if (el_mode != SFmode
28088 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
28089 sprintf (name + 7, "%s", bname+10);
28092 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
28093 args = TREE_CHAIN (args))
28097 fntype = build_function_type_list (type_out, type_in, NULL);
28099 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
28101 /* Build a function declaration for the vectorized function. */
28102 new_fndecl = build_decl (BUILTINS_LOCATION,
28103 FUNCTION_DECL, get_identifier (name), fntype);
28104 TREE_PUBLIC (new_fndecl) = 1;
28105 DECL_EXTERNAL (new_fndecl) = 1;
28106 DECL_IS_NOVOPS (new_fndecl) = 1;
28107 TREE_READONLY (new_fndecl) = 1;
28113 /* Returns a decl of a function that implements conversion of an integer vector
28114 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28115 are the types involved when converting according to CODE.
28116 Return NULL_TREE if it is not available. */
28119 ix86_vectorize_builtin_conversion (unsigned int code,
28120 tree dest_type, tree src_type)
28128 switch (TYPE_MODE (src_type))
28131 switch (TYPE_MODE (dest_type))
28134 return (TYPE_UNSIGNED (src_type)
28135 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28136 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28138 return (TYPE_UNSIGNED (src_type)
28140 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28146 switch (TYPE_MODE (dest_type))
28149 return (TYPE_UNSIGNED (src_type)
28151 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28160 case FIX_TRUNC_EXPR:
28161 switch (TYPE_MODE (dest_type))
28164 switch (TYPE_MODE (src_type))
28167 return (TYPE_UNSIGNED (dest_type)
28169 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28171 return (TYPE_UNSIGNED (dest_type)
28173 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28180 switch (TYPE_MODE (src_type))
28183 return (TYPE_UNSIGNED (dest_type)
28185 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28202 /* Returns a code for a target-specific builtin that implements
28203 reciprocal of the function, or NULL_TREE if not available. */
28206 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28207 bool sqrt ATTRIBUTE_UNUSED)
28209 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28210 && flag_finite_math_only && !flag_trapping_math
28211 && flag_unsafe_math_optimizations))
28215 /* Machine dependent builtins. */
28218 /* Vectorized version of sqrt to rsqrt conversion. */
28219 case IX86_BUILTIN_SQRTPS_NR:
28220 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28222 case IX86_BUILTIN_SQRTPS_NR256:
28223 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28229 /* Normal builtins. */
28232 /* Sqrt to rsqrt conversion. */
28233 case BUILT_IN_SQRTF:
28234 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28241 /* Helper for avx_vpermilps256_operand et al. This is also used by
28242 the expansion functions to turn the parallel back into a mask.
28243 The return value is 0 for no match and the imm8+1 for a match. */
28246 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28248 unsigned i, nelt = GET_MODE_NUNITS (mode);
28250 unsigned char ipar[8];
28252 if (XVECLEN (par, 0) != (int) nelt)
28255 /* Validate that all of the elements are constants, and not totally
28256 out of range. Copy the data into an integral array to make the
28257 subsequent checks easier. */
28258 for (i = 0; i < nelt; ++i)
28260 rtx er = XVECEXP (par, 0, i);
28261 unsigned HOST_WIDE_INT ei;
28263 if (!CONST_INT_P (er))
28274 /* In the 256-bit DFmode case, we can only move elements within
28276 for (i = 0; i < 2; ++i)
28280 mask |= ipar[i] << i;
28282 for (i = 2; i < 4; ++i)
28286 mask |= (ipar[i] - 2) << i;
28291 /* In the 256-bit SFmode case, we have full freedom of movement
28292 within the low 128-bit lane, but the high 128-bit lane must
28293 mirror the exact same pattern. */
28294 for (i = 0; i < 4; ++i)
28295 if (ipar[i] + 4 != ipar[i + 4])
28302 /* In the 128-bit case, we've full freedom in the placement of
28303 the elements from the source operand. */
28304 for (i = 0; i < nelt; ++i)
28305 mask |= ipar[i] << (i * (nelt / 2));
28309 gcc_unreachable ();
28312 /* Make sure success has a non-zero value by adding one. */
28316 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28317 the expansion functions to turn the parallel back into a mask.
28318 The return value is 0 for no match and the imm8+1 for a match. */
28321 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28323 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28325 unsigned char ipar[8];
28327 if (XVECLEN (par, 0) != (int) nelt)
28330 /* Validate that all of the elements are constants, and not totally
28331 out of range. Copy the data into an integral array to make the
28332 subsequent checks easier. */
28333 for (i = 0; i < nelt; ++i)
28335 rtx er = XVECEXP (par, 0, i);
28336 unsigned HOST_WIDE_INT ei;
28338 if (!CONST_INT_P (er))
28341 if (ei >= 2 * nelt)
28346 /* Validate that the halves of the permute are halves. */
28347 for (i = 0; i < nelt2 - 1; ++i)
28348 if (ipar[i] + 1 != ipar[i + 1])
28350 for (i = nelt2; i < nelt - 1; ++i)
28351 if (ipar[i] + 1 != ipar[i + 1])
28354 /* Reconstruct the mask. */
28355 for (i = 0; i < 2; ++i)
28357 unsigned e = ipar[i * nelt2];
28361 mask |= e << (i * 4);
28364 /* Make sure success has a non-zero value by adding one. */
28369 /* Store OPERAND to the memory after reload is completed. This means
28370 that we can't easily use assign_stack_local. */
28372 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28376 gcc_assert (reload_completed);
28377 if (ix86_using_red_zone ())
28379 result = gen_rtx_MEM (mode,
28380 gen_rtx_PLUS (Pmode,
28382 GEN_INT (-RED_ZONE_SIZE)));
28383 emit_move_insn (result, operand);
28385 else if (TARGET_64BIT)
28391 operand = gen_lowpart (DImode, operand);
28395 gen_rtx_SET (VOIDmode,
28396 gen_rtx_MEM (DImode,
28397 gen_rtx_PRE_DEC (DImode,
28398 stack_pointer_rtx)),
28402 gcc_unreachable ();
28404 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28413 split_double_mode (mode, &operand, 1, operands, operands + 1);
28415 gen_rtx_SET (VOIDmode,
28416 gen_rtx_MEM (SImode,
28417 gen_rtx_PRE_DEC (Pmode,
28418 stack_pointer_rtx)),
28421 gen_rtx_SET (VOIDmode,
28422 gen_rtx_MEM (SImode,
28423 gen_rtx_PRE_DEC (Pmode,
28424 stack_pointer_rtx)),
28429 /* Store HImodes as SImodes. */
28430 operand = gen_lowpart (SImode, operand);
28434 gen_rtx_SET (VOIDmode,
28435 gen_rtx_MEM (GET_MODE (operand),
28436 gen_rtx_PRE_DEC (SImode,
28437 stack_pointer_rtx)),
28441 gcc_unreachable ();
28443 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28448 /* Free operand from the memory. */
28450 ix86_free_from_memory (enum machine_mode mode)
28452 if (!ix86_using_red_zone ())
28456 if (mode == DImode || TARGET_64BIT)
28460 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28461 to pop or add instruction if registers are available. */
28462 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28463 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28468 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28470 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28471 QImode must go into class Q_REGS.
28472 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28473 movdf to do mem-to-mem moves through integer regs. */
28476 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28478 enum machine_mode mode = GET_MODE (x);
28480 /* We're only allowed to return a subclass of CLASS. Many of the
28481 following checks fail for NO_REGS, so eliminate that early. */
28482 if (regclass == NO_REGS)
28485 /* All classes can load zeros. */
28486 if (x == CONST0_RTX (mode))
28489 /* Force constants into memory if we are loading a (nonzero) constant into
28490 an MMX or SSE register. This is because there are no MMX/SSE instructions
28491 to load from a constant. */
28493 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28496 /* Prefer SSE regs only, if we can use them for math. */
28497 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28498 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28500 /* Floating-point constants need more complex checks. */
28501 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28503 /* General regs can load everything. */
28504 if (reg_class_subset_p (regclass, GENERAL_REGS))
28507 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28508 zero above. We only want to wind up preferring 80387 registers if
28509 we plan on doing computation with them. */
28511 && standard_80387_constant_p (x))
28513 /* Limit class to non-sse. */
28514 if (regclass == FLOAT_SSE_REGS)
28516 if (regclass == FP_TOP_SSE_REGS)
28518 if (regclass == FP_SECOND_SSE_REGS)
28519 return FP_SECOND_REG;
28520 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28527 /* Generally when we see PLUS here, it's the function invariant
28528 (plus soft-fp const_int). Which can only be computed into general
28530 if (GET_CODE (x) == PLUS)
28531 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28533 /* QImode constants are easy to load, but non-constant QImode data
28534 must go into Q_REGS. */
28535 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28537 if (reg_class_subset_p (regclass, Q_REGS))
28539 if (reg_class_subset_p (Q_REGS, regclass))
28547 /* Discourage putting floating-point values in SSE registers unless
28548 SSE math is being used, and likewise for the 387 registers. */
28550 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28552 enum machine_mode mode = GET_MODE (x);
28554 /* Restrict the output reload class to the register bank that we are doing
28555 math on. If we would like not to return a subset of CLASS, reject this
28556 alternative: if reload cannot do this, it will still use its choice. */
28557 mode = GET_MODE (x);
28558 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28559 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28561 if (X87_FLOAT_MODE_P (mode))
28563 if (regclass == FP_TOP_SSE_REGS)
28565 else if (regclass == FP_SECOND_SSE_REGS)
28566 return FP_SECOND_REG;
28568 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28575 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28576 enum machine_mode mode,
28577 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28579 /* QImode spills from non-QI registers require
28580 intermediate register on 32bit targets. */
28582 && !in_p && mode == QImode
28583 && (rclass == GENERAL_REGS
28584 || rclass == LEGACY_REGS
28585 || rclass == INDEX_REGS))
28594 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28595 regno = true_regnum (x);
28597 /* Return Q_REGS if the operand is in memory. */
28602 /* This condition handles corner case where an expression involving
28603 pointers gets vectorized. We're trying to use the address of a
28604 stack slot as a vector initializer.
28606 (set (reg:V2DI 74 [ vect_cst_.2 ])
28607 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28609 Eventually frame gets turned into sp+offset like this:
28611 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28612 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28613 (const_int 392 [0x188]))))
28615 That later gets turned into:
28617 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28618 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28619 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28621 We'll have the following reload recorded:
28623 Reload 0: reload_in (DI) =
28624 (plus:DI (reg/f:DI 7 sp)
28625 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28626 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28627 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28628 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28629 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28630 reload_reg_rtx: (reg:V2DI 22 xmm1)
28632 Which isn't going to work since SSE instructions can't handle scalar
28633 additions. Returning GENERAL_REGS forces the addition into integer
28634 register and reload can handle subsequent reloads without problems. */
28636 if (in_p && GET_CODE (x) == PLUS
28637 && SSE_CLASS_P (rclass)
28638 && SCALAR_INT_MODE_P (mode))
28639 return GENERAL_REGS;
28644 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28647 ix86_class_likely_spilled_p (reg_class_t rclass)
28658 case SSE_FIRST_REG:
28660 case FP_SECOND_REG:
28670 /* If we are copying between general and FP registers, we need a memory
28671 location. The same is true for SSE and MMX registers.
28673 To optimize register_move_cost performance, allow inline variant.
28675 The macro can't work reliably when one of the CLASSES is class containing
28676 registers from multiple units (SSE, MMX, integer). We avoid this by never
28677 combining those units in single alternative in the machine description.
28678 Ensure that this constraint holds to avoid unexpected surprises.
28680 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28681 enforce these sanity checks. */
28684 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28685 enum machine_mode mode, int strict)
28687 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28688 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28689 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28690 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28691 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28692 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28694 gcc_assert (!strict);
28698 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28701 /* ??? This is a lie. We do have moves between mmx/general, and for
28702 mmx/sse2. But by saying we need secondary memory we discourage the
28703 register allocator from using the mmx registers unless needed. */
28704 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28707 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28709 /* SSE1 doesn't have any direct moves from other classes. */
28713 /* If the target says that inter-unit moves are more expensive
28714 than moving through memory, then don't generate them. */
28715 if (!TARGET_INTER_UNIT_MOVES)
28718 /* Between SSE and general, we have moves no larger than word size. */
28719 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28727 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28728 enum machine_mode mode, int strict)
28730 return inline_secondary_memory_needed (class1, class2, mode, strict);
28733 /* Return true if the registers in CLASS cannot represent the change from
28734 modes FROM to TO. */
28737 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28738 enum reg_class regclass)
28743 /* x87 registers can't do subreg at all, as all values are reformatted
28744 to extended precision. */
28745 if (MAYBE_FLOAT_CLASS_P (regclass))
28748 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28750 /* Vector registers do not support QI or HImode loads. If we don't
28751 disallow a change to these modes, reload will assume it's ok to
28752 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28753 the vec_dupv4hi pattern. */
28754 if (GET_MODE_SIZE (from) < 4)
28757 /* Vector registers do not support subreg with nonzero offsets, which
28758 are otherwise valid for integer registers. Since we can't see
28759 whether we have a nonzero offset from here, prohibit all
28760 nonparadoxical subregs changing size. */
28761 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28768 /* Return the cost of moving data of mode M between a
28769 register and memory. A value of 2 is the default; this cost is
28770 relative to those in `REGISTER_MOVE_COST'.
28772 This function is used extensively by register_move_cost that is used to
28773 build tables at startup. Make it inline in this case.
28774 When IN is 2, return maximum of in and out move cost.
28776 If moving between registers and memory is more expensive than
28777 between two registers, you should define this macro to express the
28780 Model also increased moving costs of QImode registers in non
28784 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28788 if (FLOAT_CLASS_P (regclass))
28806 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28807 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28809 if (SSE_CLASS_P (regclass))
28812 switch (GET_MODE_SIZE (mode))
28827 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28828 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28830 if (MMX_CLASS_P (regclass))
28833 switch (GET_MODE_SIZE (mode))
28845 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28846 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28848 switch (GET_MODE_SIZE (mode))
28851 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28854 return ix86_cost->int_store[0];
28855 if (TARGET_PARTIAL_REG_DEPENDENCY
28856 && optimize_function_for_speed_p (cfun))
28857 cost = ix86_cost->movzbl_load;
28859 cost = ix86_cost->int_load[0];
28861 return MAX (cost, ix86_cost->int_store[0]);
28867 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28869 return ix86_cost->movzbl_load;
28871 return ix86_cost->int_store[0] + 4;
28876 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28877 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28879 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28880 if (mode == TFmode)
28883 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28885 cost = ix86_cost->int_load[2];
28887 cost = ix86_cost->int_store[2];
28888 return (cost * (((int) GET_MODE_SIZE (mode)
28889 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28894 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28897 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28901 /* Return the cost of moving data from a register in class CLASS1 to
28902 one in class CLASS2.
28904 It is not required that the cost always equal 2 when FROM is the same as TO;
28905 on some machines it is expensive to move between registers if they are not
28906 general registers. */
28909 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28910 reg_class_t class2_i)
28912 enum reg_class class1 = (enum reg_class) class1_i;
28913 enum reg_class class2 = (enum reg_class) class2_i;
28915 /* In case we require secondary memory, compute cost of the store followed
28916 by load. In order to avoid bad register allocation choices, we need
28917 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28919 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28923 cost += inline_memory_move_cost (mode, class1, 2);
28924 cost += inline_memory_move_cost (mode, class2, 2);
28926 /* In case of copying from general_purpose_register we may emit multiple
28927 stores followed by single load causing memory size mismatch stall.
28928 Count this as arbitrarily high cost of 20. */
28929 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28932 /* In the case of FP/MMX moves, the registers actually overlap, and we
28933 have to switch modes in order to treat them differently. */
28934 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28935 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28941 /* Moves between SSE/MMX and integer unit are expensive. */
28942 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28943 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28945 /* ??? By keeping returned value relatively high, we limit the number
28946 of moves between integer and MMX/SSE registers for all targets.
28947 Additionally, high value prevents problem with x86_modes_tieable_p(),
28948 where integer modes in MMX/SSE registers are not tieable
28949 because of missing QImode and HImode moves to, from or between
28950 MMX/SSE registers. */
28951 return MAX (8, ix86_cost->mmxsse_to_integer);
28953 if (MAYBE_FLOAT_CLASS_P (class1))
28954 return ix86_cost->fp_move;
28955 if (MAYBE_SSE_CLASS_P (class1))
28956 return ix86_cost->sse_move;
28957 if (MAYBE_MMX_CLASS_P (class1))
28958 return ix86_cost->mmx_move;
28962 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28965 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28967 /* Flags and only flags can only hold CCmode values. */
28968 if (CC_REGNO_P (regno))
28969 return GET_MODE_CLASS (mode) == MODE_CC;
28970 if (GET_MODE_CLASS (mode) == MODE_CC
28971 || GET_MODE_CLASS (mode) == MODE_RANDOM
28972 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28974 if (FP_REGNO_P (regno))
28975 return VALID_FP_MODE_P (mode);
28976 if (SSE_REGNO_P (regno))
28978 /* We implement the move patterns for all vector modes into and
28979 out of SSE registers, even when no operation instructions
28980 are available. OImode move is available only when AVX is
28982 return ((TARGET_AVX && mode == OImode)
28983 || VALID_AVX256_REG_MODE (mode)
28984 || VALID_SSE_REG_MODE (mode)
28985 || VALID_SSE2_REG_MODE (mode)
28986 || VALID_MMX_REG_MODE (mode)
28987 || VALID_MMX_REG_MODE_3DNOW (mode));
28989 if (MMX_REGNO_P (regno))
28991 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28992 so if the register is available at all, then we can move data of
28993 the given mode into or out of it. */
28994 return (VALID_MMX_REG_MODE (mode)
28995 || VALID_MMX_REG_MODE_3DNOW (mode));
28998 if (mode == QImode)
29000 /* Take care for QImode values - they can be in non-QI regs,
29001 but then they do cause partial register stalls. */
29002 if (regno <= BX_REG || TARGET_64BIT)
29004 if (!TARGET_PARTIAL_REG_STALL)
29006 return reload_in_progress || reload_completed;
29008 /* We handle both integer and floats in the general purpose registers. */
29009 else if (VALID_INT_MODE_P (mode))
29011 else if (VALID_FP_MODE_P (mode))
29013 else if (VALID_DFP_MODE_P (mode))
29015 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
29016 on to use that value in smaller contexts, this can easily force a
29017 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
29018 supporting DImode, allow it. */
29019 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
29025 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
29026 tieable integer mode. */
29029 ix86_tieable_integer_mode_p (enum machine_mode mode)
29038 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
29041 return TARGET_64BIT;
29048 /* Return true if MODE1 is accessible in a register that can hold MODE2
29049 without copying. That is, all register classes that can hold MODE2
29050 can also hold MODE1. */
29053 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
29055 if (mode1 == mode2)
29058 if (ix86_tieable_integer_mode_p (mode1)
29059 && ix86_tieable_integer_mode_p (mode2))
29062 /* MODE2 being XFmode implies fp stack or general regs, which means we
29063 can tie any smaller floating point modes to it. Note that we do not
29064 tie this with TFmode. */
29065 if (mode2 == XFmode)
29066 return mode1 == SFmode || mode1 == DFmode;
29068 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
29069 that we can tie it with SFmode. */
29070 if (mode2 == DFmode)
29071 return mode1 == SFmode;
29073 /* If MODE2 is only appropriate for an SSE register, then tie with
29074 any other mode acceptable to SSE registers. */
29075 if (GET_MODE_SIZE (mode2) == 16
29076 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
29077 return (GET_MODE_SIZE (mode1) == 16
29078 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
29080 /* If MODE2 is appropriate for an MMX register, then tie
29081 with any other mode acceptable to MMX registers. */
29082 if (GET_MODE_SIZE (mode2) == 8
29083 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
29084 return (GET_MODE_SIZE (mode1) == 8
29085 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
29090 /* Compute a (partial) cost for rtx X. Return true if the complete
29091 cost has been computed, and false if subexpressions should be
29092 scanned. In either case, *TOTAL contains the cost result. */
29095 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29097 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29098 enum machine_mode mode = GET_MODE (x);
29099 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29107 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29109 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29111 else if (flag_pic && SYMBOLIC_CONST (x)
29113 || (!GET_CODE (x) != LABEL_REF
29114 && (GET_CODE (x) != SYMBOL_REF
29115 || !SYMBOL_REF_LOCAL_P (x)))))
29122 if (mode == VOIDmode)
29125 switch (standard_80387_constant_p (x))
29130 default: /* Other constants */
29135 /* Start with (MEM (SYMBOL_REF)), since that's where
29136 it'll probably end up. Add a penalty for size. */
29137 *total = (COSTS_N_INSNS (1)
29138 + (flag_pic != 0 && !TARGET_64BIT)
29139 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29145 /* The zero extensions is often completely free on x86_64, so make
29146 it as cheap as possible. */
29147 if (TARGET_64BIT && mode == DImode
29148 && GET_MODE (XEXP (x, 0)) == SImode)
29150 else if (TARGET_ZERO_EXTEND_WITH_AND)
29151 *total = cost->add;
29153 *total = cost->movzx;
29157 *total = cost->movsx;
29161 if (CONST_INT_P (XEXP (x, 1))
29162 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29164 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29167 *total = cost->add;
29170 if ((value == 2 || value == 3)
29171 && cost->lea <= cost->shift_const)
29173 *total = cost->lea;
29183 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29185 if (CONST_INT_P (XEXP (x, 1)))
29187 if (INTVAL (XEXP (x, 1)) > 32)
29188 *total = cost->shift_const + COSTS_N_INSNS (2);
29190 *total = cost->shift_const * 2;
29194 if (GET_CODE (XEXP (x, 1)) == AND)
29195 *total = cost->shift_var * 2;
29197 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29202 if (CONST_INT_P (XEXP (x, 1)))
29203 *total = cost->shift_const;
29205 *total = cost->shift_var;
29213 gcc_assert (FLOAT_MODE_P (mode));
29214 gcc_assert (TARGET_FMA || TARGET_FMA4);
29216 /* ??? SSE scalar/vector cost should be used here. */
29217 /* ??? Bald assumption that fma has the same cost as fmul. */
29218 *total = cost->fmul;
29219 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29221 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29223 if (GET_CODE (sub) == NEG)
29225 *total += rtx_cost (sub, FMA, speed);
29228 if (GET_CODE (sub) == NEG)
29230 *total += rtx_cost (sub, FMA, speed);
29235 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29237 /* ??? SSE scalar cost should be used here. */
29238 *total = cost->fmul;
29241 else if (X87_FLOAT_MODE_P (mode))
29243 *total = cost->fmul;
29246 else if (FLOAT_MODE_P (mode))
29248 /* ??? SSE vector cost should be used here. */
29249 *total = cost->fmul;
29254 rtx op0 = XEXP (x, 0);
29255 rtx op1 = XEXP (x, 1);
29257 if (CONST_INT_P (XEXP (x, 1)))
29259 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29260 for (nbits = 0; value != 0; value &= value - 1)
29264 /* This is arbitrary. */
29267 /* Compute costs correctly for widening multiplication. */
29268 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29269 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29270 == GET_MODE_SIZE (mode))
29272 int is_mulwiden = 0;
29273 enum machine_mode inner_mode = GET_MODE (op0);
29275 if (GET_CODE (op0) == GET_CODE (op1))
29276 is_mulwiden = 1, op1 = XEXP (op1, 0);
29277 else if (CONST_INT_P (op1))
29279 if (GET_CODE (op0) == SIGN_EXTEND)
29280 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29283 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29287 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29290 *total = (cost->mult_init[MODE_INDEX (mode)]
29291 + nbits * cost->mult_bit
29292 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29301 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29302 /* ??? SSE cost should be used here. */
29303 *total = cost->fdiv;
29304 else if (X87_FLOAT_MODE_P (mode))
29305 *total = cost->fdiv;
29306 else if (FLOAT_MODE_P (mode))
29307 /* ??? SSE vector cost should be used here. */
29308 *total = cost->fdiv;
29310 *total = cost->divide[MODE_INDEX (mode)];
29314 if (GET_MODE_CLASS (mode) == MODE_INT
29315 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29317 if (GET_CODE (XEXP (x, 0)) == PLUS
29318 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29319 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29320 && CONSTANT_P (XEXP (x, 1)))
29322 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29323 if (val == 2 || val == 4 || val == 8)
29325 *total = cost->lea;
29326 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29327 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29328 outer_code, speed);
29329 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29333 else if (GET_CODE (XEXP (x, 0)) == MULT
29334 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29336 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29337 if (val == 2 || val == 4 || val == 8)
29339 *total = cost->lea;
29340 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29341 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29345 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29347 *total = cost->lea;
29348 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29349 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29350 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29357 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29359 /* ??? SSE cost should be used here. */
29360 *total = cost->fadd;
29363 else if (X87_FLOAT_MODE_P (mode))
29365 *total = cost->fadd;
29368 else if (FLOAT_MODE_P (mode))
29370 /* ??? SSE vector cost should be used here. */
29371 *total = cost->fadd;
29379 if (!TARGET_64BIT && mode == DImode)
29381 *total = (cost->add * 2
29382 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29383 << (GET_MODE (XEXP (x, 0)) != DImode))
29384 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29385 << (GET_MODE (XEXP (x, 1)) != DImode)));
29391 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29393 /* ??? SSE cost should be used here. */
29394 *total = cost->fchs;
29397 else if (X87_FLOAT_MODE_P (mode))
29399 *total = cost->fchs;
29402 else if (FLOAT_MODE_P (mode))
29404 /* ??? SSE vector cost should be used here. */
29405 *total = cost->fchs;
29411 if (!TARGET_64BIT && mode == DImode)
29412 *total = cost->add * 2;
29414 *total = cost->add;
29418 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29419 && XEXP (XEXP (x, 0), 1) == const1_rtx
29420 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29421 && XEXP (x, 1) == const0_rtx)
29423 /* This kind of construct is implemented using test[bwl].
29424 Treat it as if we had an AND. */
29425 *total = (cost->add
29426 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29427 + rtx_cost (const1_rtx, outer_code, speed));
29433 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29438 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29439 /* ??? SSE cost should be used here. */
29440 *total = cost->fabs;
29441 else if (X87_FLOAT_MODE_P (mode))
29442 *total = cost->fabs;
29443 else if (FLOAT_MODE_P (mode))
29444 /* ??? SSE vector cost should be used here. */
29445 *total = cost->fabs;
29449 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29450 /* ??? SSE cost should be used here. */
29451 *total = cost->fsqrt;
29452 else if (X87_FLOAT_MODE_P (mode))
29453 *total = cost->fsqrt;
29454 else if (FLOAT_MODE_P (mode))
29455 /* ??? SSE vector cost should be used here. */
29456 *total = cost->fsqrt;
29460 if (XINT (x, 1) == UNSPEC_TP)
29467 case VEC_DUPLICATE:
29468 /* ??? Assume all of these vector manipulation patterns are
29469 recognizable. In which case they all pretty much have the
29471 *total = COSTS_N_INSNS (1);
29481 static int current_machopic_label_num;
29483 /* Given a symbol name and its associated stub, write out the
29484 definition of the stub. */
29487 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29489 unsigned int length;
29490 char *binder_name, *symbol_name, lazy_ptr_name[32];
29491 int label = ++current_machopic_label_num;
29493 /* For 64-bit we shouldn't get here. */
29494 gcc_assert (!TARGET_64BIT);
29496 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29497 symb = targetm.strip_name_encoding (symb);
29499 length = strlen (stub);
29500 binder_name = XALLOCAVEC (char, length + 32);
29501 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29503 length = strlen (symb);
29504 symbol_name = XALLOCAVEC (char, length + 32);
29505 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29507 sprintf (lazy_ptr_name, "L%d$lz", label);
29509 if (MACHOPIC_ATT_STUB)
29510 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29511 else if (MACHOPIC_PURE)
29513 if (TARGET_DEEP_BRANCH_PREDICTION)
29514 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29516 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29519 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29521 fprintf (file, "%s:\n", stub);
29522 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29524 if (MACHOPIC_ATT_STUB)
29526 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29528 else if (MACHOPIC_PURE)
29531 if (TARGET_DEEP_BRANCH_PREDICTION)
29533 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29534 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29535 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29536 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29540 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29541 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29542 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29544 fprintf (file, "\tjmp\t*%%ecx\n");
29547 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29549 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29550 it needs no stub-binding-helper. */
29551 if (MACHOPIC_ATT_STUB)
29554 fprintf (file, "%s:\n", binder_name);
29558 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29559 fprintf (file, "\tpushl\t%%ecx\n");
29562 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29564 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29566 /* N.B. Keep the correspondence of these
29567 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29568 old-pic/new-pic/non-pic stubs; altering this will break
29569 compatibility with existing dylibs. */
29573 if (TARGET_DEEP_BRANCH_PREDICTION)
29574 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29575 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29577 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29578 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29581 /* 16-byte -mdynamic-no-pic stub. */
29582 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29584 fprintf (file, "%s:\n", lazy_ptr_name);
29585 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29586 fprintf (file, ASM_LONG "%s\n", binder_name);
29588 #endif /* TARGET_MACHO */
29590 /* Order the registers for register allocator. */
29593 x86_order_regs_for_local_alloc (void)
29598 /* First allocate the local general purpose registers. */
29599 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29600 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29601 reg_alloc_order [pos++] = i;
29603 /* Global general purpose registers. */
29604 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29605 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29606 reg_alloc_order [pos++] = i;
29608 /* x87 registers come first in case we are doing FP math
29610 if (!TARGET_SSE_MATH)
29611 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29612 reg_alloc_order [pos++] = i;
29614 /* SSE registers. */
29615 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29616 reg_alloc_order [pos++] = i;
29617 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29618 reg_alloc_order [pos++] = i;
29620 /* x87 registers. */
29621 if (TARGET_SSE_MATH)
29622 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29623 reg_alloc_order [pos++] = i;
29625 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29626 reg_alloc_order [pos++] = i;
29628 /* Initialize the rest of array as we do not allocate some registers
29630 while (pos < FIRST_PSEUDO_REGISTER)
29631 reg_alloc_order [pos++] = 0;
29634 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29635 in struct attribute_spec handler. */
29637 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29639 int flags ATTRIBUTE_UNUSED,
29640 bool *no_add_attrs)
29642 if (TREE_CODE (*node) != FUNCTION_TYPE
29643 && TREE_CODE (*node) != METHOD_TYPE
29644 && TREE_CODE (*node) != FIELD_DECL
29645 && TREE_CODE (*node) != TYPE_DECL)
29647 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29649 *no_add_attrs = true;
29654 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29656 *no_add_attrs = true;
29659 if (is_attribute_p ("callee_pop_aggregate_return", name))
29663 cst = TREE_VALUE (args);
29664 if (TREE_CODE (cst) != INTEGER_CST)
29666 warning (OPT_Wattributes,
29667 "%qE attribute requires an integer constant argument",
29669 *no_add_attrs = true;
29671 else if (compare_tree_int (cst, 0) != 0
29672 && compare_tree_int (cst, 1) != 0)
29674 warning (OPT_Wattributes,
29675 "argument to %qE attribute is neither zero, nor one",
29677 *no_add_attrs = true;
29686 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29687 struct attribute_spec.handler. */
29689 ix86_handle_abi_attribute (tree *node, tree name,
29690 tree args ATTRIBUTE_UNUSED,
29691 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29693 if (TREE_CODE (*node) != FUNCTION_TYPE
29694 && TREE_CODE (*node) != METHOD_TYPE
29695 && TREE_CODE (*node) != FIELD_DECL
29696 && TREE_CODE (*node) != TYPE_DECL)
29698 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29700 *no_add_attrs = true;
29705 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29707 *no_add_attrs = true;
29711 /* Can combine regparm with all attributes but fastcall. */
29712 if (is_attribute_p ("ms_abi", name))
29714 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29716 error ("ms_abi and sysv_abi attributes are not compatible");
29721 else if (is_attribute_p ("sysv_abi", name))
29723 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29725 error ("ms_abi and sysv_abi attributes are not compatible");
29734 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29735 struct attribute_spec.handler. */
29737 ix86_handle_struct_attribute (tree *node, tree name,
29738 tree args ATTRIBUTE_UNUSED,
29739 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29742 if (DECL_P (*node))
29744 if (TREE_CODE (*node) == TYPE_DECL)
29745 type = &TREE_TYPE (*node);
29750 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29751 || TREE_CODE (*type) == UNION_TYPE)))
29753 warning (OPT_Wattributes, "%qE attribute ignored",
29755 *no_add_attrs = true;
29758 else if ((is_attribute_p ("ms_struct", name)
29759 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29760 || ((is_attribute_p ("gcc_struct", name)
29761 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29763 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29765 *no_add_attrs = true;
29772 ix86_handle_fndecl_attribute (tree *node, tree name,
29773 tree args ATTRIBUTE_UNUSED,
29774 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29776 if (TREE_CODE (*node) != FUNCTION_DECL)
29778 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29780 *no_add_attrs = true;
29786 ix86_ms_bitfield_layout_p (const_tree record_type)
29788 return ((TARGET_MS_BITFIELD_LAYOUT
29789 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29790 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29793 /* Returns an expression indicating where the this parameter is
29794 located on entry to the FUNCTION. */
29797 x86_this_parameter (tree function)
29799 tree type = TREE_TYPE (function);
29800 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29805 const int *parm_regs;
29807 if (ix86_function_type_abi (type) == MS_ABI)
29808 parm_regs = x86_64_ms_abi_int_parameter_registers;
29810 parm_regs = x86_64_int_parameter_registers;
29811 return gen_rtx_REG (DImode, parm_regs[aggr]);
29814 nregs = ix86_function_regparm (type, function);
29816 if (nregs > 0 && !stdarg_p (type))
29819 unsigned int ccvt = ix86_get_callcvt (type);
29821 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29822 regno = aggr ? DX_REG : CX_REG;
29823 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29827 return gen_rtx_MEM (SImode,
29828 plus_constant (stack_pointer_rtx, 4));
29837 return gen_rtx_MEM (SImode,
29838 plus_constant (stack_pointer_rtx, 4));
29841 return gen_rtx_REG (SImode, regno);
29844 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29847 /* Determine whether x86_output_mi_thunk can succeed. */
29850 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29851 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29852 HOST_WIDE_INT vcall_offset, const_tree function)
29854 /* 64-bit can handle anything. */
29858 /* For 32-bit, everything's fine if we have one free register. */
29859 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29862 /* Need a free register for vcall_offset. */
29866 /* Need a free register for GOT references. */
29867 if (flag_pic && !targetm.binds_local_p (function))
29870 /* Otherwise ok. */
29874 /* Output the assembler code for a thunk function. THUNK_DECL is the
29875 declaration for the thunk function itself, FUNCTION is the decl for
29876 the target function. DELTA is an immediate constant offset to be
29877 added to THIS. If VCALL_OFFSET is nonzero, the word at
29878 *(*this + vcall_offset) should be added to THIS. */
29881 x86_output_mi_thunk (FILE *file,
29882 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29883 HOST_WIDE_INT vcall_offset, tree function)
29886 rtx this_param = x86_this_parameter (function);
29889 /* Make sure unwind info is emitted for the thunk if needed. */
29890 final_start_function (emit_barrier (), file, 1);
29892 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29893 pull it in now and let DELTA benefit. */
29894 if (REG_P (this_param))
29895 this_reg = this_param;
29896 else if (vcall_offset)
29898 /* Put the this parameter into %eax. */
29899 xops[0] = this_param;
29900 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29901 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29904 this_reg = NULL_RTX;
29906 /* Adjust the this parameter by a fixed constant. */
29909 xops[0] = GEN_INT (delta);
29910 xops[1] = this_reg ? this_reg : this_param;
29913 if (!x86_64_general_operand (xops[0], DImode))
29915 tmp = gen_rtx_REG (DImode, R10_REG);
29917 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29919 xops[1] = this_param;
29921 if (x86_maybe_negate_const_int (&xops[0], DImode))
29922 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29924 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29926 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29927 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29929 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29932 /* Adjust the this parameter by a value stored in the vtable. */
29936 tmp = gen_rtx_REG (DImode, R10_REG);
29939 int tmp_regno = CX_REG;
29940 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
29941 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
29942 tmp_regno = AX_REG;
29943 tmp = gen_rtx_REG (SImode, tmp_regno);
29946 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29948 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29950 /* Adjust the this parameter. */
29951 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29952 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29954 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29955 xops[0] = GEN_INT (vcall_offset);
29957 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29958 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29960 xops[1] = this_reg;
29961 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29964 /* If necessary, drop THIS back to its stack slot. */
29965 if (this_reg && this_reg != this_param)
29967 xops[0] = this_reg;
29968 xops[1] = this_param;
29969 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29972 xops[0] = XEXP (DECL_RTL (function), 0);
29975 if (!flag_pic || targetm.binds_local_p (function)
29976 || DEFAULT_ABI == MS_ABI)
29977 output_asm_insn ("jmp\t%P0", xops);
29978 /* All thunks should be in the same object as their target,
29979 and thus binds_local_p should be true. */
29980 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29981 gcc_unreachable ();
29984 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29985 tmp = gen_rtx_CONST (Pmode, tmp);
29986 tmp = gen_rtx_MEM (QImode, tmp);
29988 output_asm_insn ("jmp\t%A0", xops);
29993 if (!flag_pic || targetm.binds_local_p (function))
29994 output_asm_insn ("jmp\t%P0", xops);
29999 rtx sym_ref = XEXP (DECL_RTL (function), 0);
30000 if (TARGET_MACHO_BRANCH_ISLANDS)
30001 sym_ref = (gen_rtx_SYMBOL_REF
30003 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
30004 tmp = gen_rtx_MEM (QImode, sym_ref);
30006 output_asm_insn ("jmp\t%0", xops);
30009 #endif /* TARGET_MACHO */
30011 tmp = gen_rtx_REG (SImode, CX_REG);
30012 output_set_got (tmp, NULL_RTX);
30015 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
30016 output_asm_insn ("jmp\t{*}%1", xops);
30019 final_end_function ();
30023 x86_file_start (void)
30025 default_file_start ();
30027 darwin_file_start ();
30029 if (X86_FILE_START_VERSION_DIRECTIVE)
30030 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
30031 if (X86_FILE_START_FLTUSED)
30032 fputs ("\t.global\t__fltused\n", asm_out_file);
30033 if (ix86_asm_dialect == ASM_INTEL)
30034 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
30038 x86_field_alignment (tree field, int computed)
30040 enum machine_mode mode;
30041 tree type = TREE_TYPE (field);
30043 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
30045 mode = TYPE_MODE (strip_array_types (type));
30046 if (mode == DFmode || mode == DCmode
30047 || GET_MODE_CLASS (mode) == MODE_INT
30048 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
30049 return MIN (32, computed);
30053 /* Output assembler code to FILE to increment profiler label # LABELNO
30054 for profiling a function entry. */
30056 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
30058 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
30063 #ifndef NO_PROFILE_COUNTERS
30064 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
30067 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
30068 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
30070 fprintf (file, "\tcall\t%s\n", mcount_name);
30074 #ifndef NO_PROFILE_COUNTERS
30075 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
30078 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
30082 #ifndef NO_PROFILE_COUNTERS
30083 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
30086 fprintf (file, "\tcall\t%s\n", mcount_name);
30090 /* We don't have exact information about the insn sizes, but we may assume
30091 quite safely that we are informed about all 1 byte insns and memory
30092 address sizes. This is enough to eliminate unnecessary padding in
30096 min_insn_size (rtx insn)
30100 if (!INSN_P (insn) || !active_insn_p (insn))
30103 /* Discard alignments we've emit and jump instructions. */
30104 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30105 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30107 if (JUMP_TABLE_DATA_P (insn))
30110 /* Important case - calls are always 5 bytes.
30111 It is common to have many calls in the row. */
30113 && symbolic_reference_mentioned_p (PATTERN (insn))
30114 && !SIBLING_CALL_P (insn))
30116 len = get_attr_length (insn);
30120 /* For normal instructions we rely on get_attr_length being exact,
30121 with a few exceptions. */
30122 if (!JUMP_P (insn))
30124 enum attr_type type = get_attr_type (insn);
30129 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30130 || asm_noperands (PATTERN (insn)) >= 0)
30137 /* Otherwise trust get_attr_length. */
30141 l = get_attr_length_address (insn);
30142 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30151 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30153 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30157 ix86_avoid_jump_mispredicts (void)
30159 rtx insn, start = get_insns ();
30160 int nbytes = 0, njumps = 0;
30163 /* Look for all minimal intervals of instructions containing 4 jumps.
30164 The intervals are bounded by START and INSN. NBYTES is the total
30165 size of instructions in the interval including INSN and not including
30166 START. When the NBYTES is smaller than 16 bytes, it is possible
30167 that the end of START and INSN ends up in the same 16byte page.
30169 The smallest offset in the page INSN can start is the case where START
30170 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30171 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30173 for (insn = start; insn; insn = NEXT_INSN (insn))
30177 if (LABEL_P (insn))
30179 int align = label_to_alignment (insn);
30180 int max_skip = label_to_max_skip (insn);
30184 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30185 already in the current 16 byte page, because otherwise
30186 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30187 bytes to reach 16 byte boundary. */
30189 || (align <= 3 && max_skip != (1 << align) - 1))
30192 fprintf (dump_file, "Label %i with max_skip %i\n",
30193 INSN_UID (insn), max_skip);
30196 while (nbytes + max_skip >= 16)
30198 start = NEXT_INSN (start);
30199 if ((JUMP_P (start)
30200 && GET_CODE (PATTERN (start)) != ADDR_VEC
30201 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30203 njumps--, isjump = 1;
30206 nbytes -= min_insn_size (start);
30212 min_size = min_insn_size (insn);
30213 nbytes += min_size;
30215 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30216 INSN_UID (insn), min_size);
30218 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30219 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30227 start = NEXT_INSN (start);
30228 if ((JUMP_P (start)
30229 && GET_CODE (PATTERN (start)) != ADDR_VEC
30230 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30232 njumps--, isjump = 1;
30235 nbytes -= min_insn_size (start);
30237 gcc_assert (njumps >= 0);
30239 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30240 INSN_UID (start), INSN_UID (insn), nbytes);
30242 if (njumps == 3 && isjump && nbytes < 16)
30244 int padsize = 15 - nbytes + min_insn_size (insn);
30247 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30248 INSN_UID (insn), padsize);
30249 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30255 /* AMD Athlon works faster
30256 when RET is not destination of conditional jump or directly preceded
30257 by other jump instruction. We avoid the penalty by inserting NOP just
30258 before the RET instructions in such cases. */
30260 ix86_pad_returns (void)
30265 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30267 basic_block bb = e->src;
30268 rtx ret = BB_END (bb);
30270 bool replace = false;
30272 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30273 || optimize_bb_for_size_p (bb))
30275 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30276 if (active_insn_p (prev) || LABEL_P (prev))
30278 if (prev && LABEL_P (prev))
30283 FOR_EACH_EDGE (e, ei, bb->preds)
30284 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30285 && !(e->flags & EDGE_FALLTHRU))
30290 prev = prev_active_insn (ret);
30292 && ((JUMP_P (prev) && any_condjump_p (prev))
30295 /* Empty functions get branch mispredict even when
30296 the jump destination is not visible to us. */
30297 if (!prev && !optimize_function_for_size_p (cfun))
30302 emit_jump_insn_before (gen_return_internal_long (), ret);
30308 /* Count the minimum number of instructions in BB. Return 4 if the
30309 number of instructions >= 4. */
30312 ix86_count_insn_bb (basic_block bb)
30315 int insn_count = 0;
30317 /* Count number of instructions in this block. Return 4 if the number
30318 of instructions >= 4. */
30319 FOR_BB_INSNS (bb, insn)
30321 /* Only happen in exit blocks. */
30323 && GET_CODE (PATTERN (insn)) == RETURN)
30326 if (NONDEBUG_INSN_P (insn)
30327 && GET_CODE (PATTERN (insn)) != USE
30328 && GET_CODE (PATTERN (insn)) != CLOBBER)
30331 if (insn_count >= 4)
30340 /* Count the minimum number of instructions in code path in BB.
30341 Return 4 if the number of instructions >= 4. */
30344 ix86_count_insn (basic_block bb)
30348 int min_prev_count;
30350 /* Only bother counting instructions along paths with no
30351 more than 2 basic blocks between entry and exit. Given
30352 that BB has an edge to exit, determine if a predecessor
30353 of BB has an edge from entry. If so, compute the number
30354 of instructions in the predecessor block. If there
30355 happen to be multiple such blocks, compute the minimum. */
30356 min_prev_count = 4;
30357 FOR_EACH_EDGE (e, ei, bb->preds)
30360 edge_iterator prev_ei;
30362 if (e->src == ENTRY_BLOCK_PTR)
30364 min_prev_count = 0;
30367 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30369 if (prev_e->src == ENTRY_BLOCK_PTR)
30371 int count = ix86_count_insn_bb (e->src);
30372 if (count < min_prev_count)
30373 min_prev_count = count;
30379 if (min_prev_count < 4)
30380 min_prev_count += ix86_count_insn_bb (bb);
30382 return min_prev_count;
30385 /* Pad short funtion to 4 instructions. */
30388 ix86_pad_short_function (void)
30393 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30395 rtx ret = BB_END (e->src);
30396 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30398 int insn_count = ix86_count_insn (e->src);
30400 /* Pad short function. */
30401 if (insn_count < 4)
30405 /* Find epilogue. */
30408 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30409 insn = PREV_INSN (insn);
30414 /* Two NOPs count as one instruction. */
30415 insn_count = 2 * (4 - insn_count);
30416 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30422 /* Implement machine specific optimizations. We implement padding of returns
30423 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30427 /* We are freeing block_for_insn in the toplev to keep compatibility
30428 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30429 compute_bb_for_insn ();
30431 /* Run the vzeroupper optimization if needed. */
30432 if (TARGET_VZEROUPPER)
30433 move_or_delete_vzeroupper ();
30435 if (optimize && optimize_function_for_speed_p (cfun))
30437 if (TARGET_PAD_SHORT_FUNCTION)
30438 ix86_pad_short_function ();
30439 else if (TARGET_PAD_RETURNS)
30440 ix86_pad_returns ();
30441 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30442 if (TARGET_FOUR_JUMP_LIMIT)
30443 ix86_avoid_jump_mispredicts ();
30448 /* Return nonzero when QImode register that must be represented via REX prefix
30451 x86_extended_QIreg_mentioned_p (rtx insn)
30454 extract_insn_cached (insn);
30455 for (i = 0; i < recog_data.n_operands; i++)
30456 if (REG_P (recog_data.operand[i])
30457 && REGNO (recog_data.operand[i]) > BX_REG)
30462 /* Return nonzero when P points to register encoded via REX prefix.
30463 Called via for_each_rtx. */
30465 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30467 unsigned int regno;
30470 regno = REGNO (*p);
30471 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30474 /* Return true when INSN mentions register that must be encoded using REX
30477 x86_extended_reg_mentioned_p (rtx insn)
30479 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30480 extended_reg_mentioned_1, NULL);
30483 /* If profitable, negate (without causing overflow) integer constant
30484 of mode MODE at location LOC. Return true in this case. */
30486 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30490 if (!CONST_INT_P (*loc))
30496 /* DImode x86_64 constants must fit in 32 bits. */
30497 gcc_assert (x86_64_immediate_operand (*loc, mode));
30508 gcc_unreachable ();
30511 /* Avoid overflows. */
30512 if (mode_signbit_p (mode, *loc))
30515 val = INTVAL (*loc);
30517 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30518 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30519 if ((val < 0 && val != -128)
30522 *loc = GEN_INT (-val);
30529 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30530 optabs would emit if we didn't have TFmode patterns. */
30533 x86_emit_floatuns (rtx operands[2])
30535 rtx neglab, donelab, i0, i1, f0, in, out;
30536 enum machine_mode mode, inmode;
30538 inmode = GET_MODE (operands[1]);
30539 gcc_assert (inmode == SImode || inmode == DImode);
30542 in = force_reg (inmode, operands[1]);
30543 mode = GET_MODE (out);
30544 neglab = gen_label_rtx ();
30545 donelab = gen_label_rtx ();
30546 f0 = gen_reg_rtx (mode);
30548 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30550 expand_float (out, in, 0);
30552 emit_jump_insn (gen_jump (donelab));
30555 emit_label (neglab);
30557 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30559 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30561 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30563 expand_float (f0, i0, 0);
30565 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30567 emit_label (donelab);
30570 /* AVX does not support 32-byte integer vector operations,
30571 thus the longest vector we are faced with is V16QImode. */
30572 #define MAX_VECT_LEN 16
30574 struct expand_vec_perm_d
30576 rtx target, op0, op1;
30577 unsigned char perm[MAX_VECT_LEN];
30578 enum machine_mode vmode;
30579 unsigned char nelt;
30583 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30584 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30586 /* Get a vector mode of the same size as the original but with elements
30587 twice as wide. This is only guaranteed to apply to integral vectors. */
30589 static inline enum machine_mode
30590 get_mode_wider_vector (enum machine_mode o)
30592 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30593 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30594 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30595 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30599 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30600 with all elements equal to VAR. Return true if successful. */
30603 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30604 rtx target, rtx val)
30627 /* First attempt to recognize VAL as-is. */
30628 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30629 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30630 if (recog_memoized (insn) < 0)
30633 /* If that fails, force VAL into a register. */
30636 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30637 seq = get_insns ();
30640 emit_insn_before (seq, insn);
30642 ok = recog_memoized (insn) >= 0;
30651 if (TARGET_SSE || TARGET_3DNOW_A)
30655 val = gen_lowpart (SImode, val);
30656 x = gen_rtx_TRUNCATE (HImode, val);
30657 x = gen_rtx_VEC_DUPLICATE (mode, x);
30658 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30671 struct expand_vec_perm_d dperm;
30675 memset (&dperm, 0, sizeof (dperm));
30676 dperm.target = target;
30677 dperm.vmode = mode;
30678 dperm.nelt = GET_MODE_NUNITS (mode);
30679 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30681 /* Extend to SImode using a paradoxical SUBREG. */
30682 tmp1 = gen_reg_rtx (SImode);
30683 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30685 /* Insert the SImode value as low element of a V4SImode vector. */
30686 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30687 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30689 ok = (expand_vec_perm_1 (&dperm)
30690 || expand_vec_perm_broadcast_1 (&dperm));
30702 /* Replicate the value once into the next wider mode and recurse. */
30704 enum machine_mode smode, wsmode, wvmode;
30707 smode = GET_MODE_INNER (mode);
30708 wvmode = get_mode_wider_vector (mode);
30709 wsmode = GET_MODE_INNER (wvmode);
30711 val = convert_modes (wsmode, smode, val, true);
30712 x = expand_simple_binop (wsmode, ASHIFT, val,
30713 GEN_INT (GET_MODE_BITSIZE (smode)),
30714 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30715 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30717 x = gen_lowpart (wvmode, target);
30718 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30726 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30727 rtx x = gen_reg_rtx (hvmode);
30729 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30732 x = gen_rtx_VEC_CONCAT (mode, x, x);
30733 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30742 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30743 whose ONE_VAR element is VAR, and other elements are zero. Return true
30747 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30748 rtx target, rtx var, int one_var)
30750 enum machine_mode vsimode;
30753 bool use_vector_set = false;
30758 /* For SSE4.1, we normally use vector set. But if the second
30759 element is zero and inter-unit moves are OK, we use movq
30761 use_vector_set = (TARGET_64BIT
30763 && !(TARGET_INTER_UNIT_MOVES
30769 use_vector_set = TARGET_SSE4_1;
30772 use_vector_set = TARGET_SSE2;
30775 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30782 use_vector_set = TARGET_AVX;
30785 /* Use ix86_expand_vector_set in 64bit mode only. */
30786 use_vector_set = TARGET_AVX && TARGET_64BIT;
30792 if (use_vector_set)
30794 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30795 var = force_reg (GET_MODE_INNER (mode), var);
30796 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30812 var = force_reg (GET_MODE_INNER (mode), var);
30813 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30814 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30819 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30820 new_target = gen_reg_rtx (mode);
30822 new_target = target;
30823 var = force_reg (GET_MODE_INNER (mode), var);
30824 x = gen_rtx_VEC_DUPLICATE (mode, var);
30825 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30826 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30829 /* We need to shuffle the value to the correct position, so
30830 create a new pseudo to store the intermediate result. */
30832 /* With SSE2, we can use the integer shuffle insns. */
30833 if (mode != V4SFmode && TARGET_SSE2)
30835 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30837 GEN_INT (one_var == 1 ? 0 : 1),
30838 GEN_INT (one_var == 2 ? 0 : 1),
30839 GEN_INT (one_var == 3 ? 0 : 1)));
30840 if (target != new_target)
30841 emit_move_insn (target, new_target);
30845 /* Otherwise convert the intermediate result to V4SFmode and
30846 use the SSE1 shuffle instructions. */
30847 if (mode != V4SFmode)
30849 tmp = gen_reg_rtx (V4SFmode);
30850 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30855 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30857 GEN_INT (one_var == 1 ? 0 : 1),
30858 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30859 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30861 if (mode != V4SFmode)
30862 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30863 else if (tmp != target)
30864 emit_move_insn (target, tmp);
30866 else if (target != new_target)
30867 emit_move_insn (target, new_target);
30872 vsimode = V4SImode;
30878 vsimode = V2SImode;
30884 /* Zero extend the variable element to SImode and recurse. */
30885 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30887 x = gen_reg_rtx (vsimode);
30888 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30890 gcc_unreachable ();
30892 emit_move_insn (target, gen_lowpart (mode, x));
30900 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30901 consisting of the values in VALS. It is known that all elements
30902 except ONE_VAR are constants. Return true if successful. */
30905 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30906 rtx target, rtx vals, int one_var)
30908 rtx var = XVECEXP (vals, 0, one_var);
30909 enum machine_mode wmode;
30912 const_vec = copy_rtx (vals);
30913 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30914 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30922 /* For the two element vectors, it's just as easy to use
30923 the general case. */
30927 /* Use ix86_expand_vector_set in 64bit mode only. */
30950 /* There's no way to set one QImode entry easily. Combine
30951 the variable value with its adjacent constant value, and
30952 promote to an HImode set. */
30953 x = XVECEXP (vals, 0, one_var ^ 1);
30956 var = convert_modes (HImode, QImode, var, true);
30957 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30958 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30959 x = GEN_INT (INTVAL (x) & 0xff);
30963 var = convert_modes (HImode, QImode, var, true);
30964 x = gen_int_mode (INTVAL (x) << 8, HImode);
30966 if (x != const0_rtx)
30967 var = expand_simple_binop (HImode, IOR, var, x, var,
30968 1, OPTAB_LIB_WIDEN);
30970 x = gen_reg_rtx (wmode);
30971 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30972 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30974 emit_move_insn (target, gen_lowpart (mode, x));
30981 emit_move_insn (target, const_vec);
30982 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30986 /* A subroutine of ix86_expand_vector_init_general. Use vector
30987 concatenate to handle the most general case: all values variable,
30988 and none identical. */
30991 ix86_expand_vector_init_concat (enum machine_mode mode,
30992 rtx target, rtx *ops, int n)
30994 enum machine_mode cmode, hmode = VOIDmode;
30995 rtx first[8], second[4];
31035 gcc_unreachable ();
31038 if (!register_operand (ops[1], cmode))
31039 ops[1] = force_reg (cmode, ops[1]);
31040 if (!register_operand (ops[0], cmode))
31041 ops[0] = force_reg (cmode, ops[0]);
31042 emit_insn (gen_rtx_SET (VOIDmode, target,
31043 gen_rtx_VEC_CONCAT (mode, ops[0],
31063 gcc_unreachable ();
31079 gcc_unreachable ();
31084 /* FIXME: We process inputs backward to help RA. PR 36222. */
31087 for (; i > 0; i -= 2, j--)
31089 first[j] = gen_reg_rtx (cmode);
31090 v = gen_rtvec (2, ops[i - 1], ops[i]);
31091 ix86_expand_vector_init (false, first[j],
31092 gen_rtx_PARALLEL (cmode, v));
31098 gcc_assert (hmode != VOIDmode);
31099 for (i = j = 0; i < n; i += 2, j++)
31101 second[j] = gen_reg_rtx (hmode);
31102 ix86_expand_vector_init_concat (hmode, second [j],
31106 ix86_expand_vector_init_concat (mode, target, second, n);
31109 ix86_expand_vector_init_concat (mode, target, first, n);
31113 gcc_unreachable ();
31117 /* A subroutine of ix86_expand_vector_init_general. Use vector
31118 interleave to handle the most general case: all values variable,
31119 and none identical. */
31122 ix86_expand_vector_init_interleave (enum machine_mode mode,
31123 rtx target, rtx *ops, int n)
31125 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31128 rtx (*gen_load_even) (rtx, rtx, rtx);
31129 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31130 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31135 gen_load_even = gen_vec_setv8hi;
31136 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31137 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31138 inner_mode = HImode;
31139 first_imode = V4SImode;
31140 second_imode = V2DImode;
31141 third_imode = VOIDmode;
31144 gen_load_even = gen_vec_setv16qi;
31145 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31146 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31147 inner_mode = QImode;
31148 first_imode = V8HImode;
31149 second_imode = V4SImode;
31150 third_imode = V2DImode;
31153 gcc_unreachable ();
31156 for (i = 0; i < n; i++)
31158 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31159 op0 = gen_reg_rtx (SImode);
31160 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31162 /* Insert the SImode value as low element of V4SImode vector. */
31163 op1 = gen_reg_rtx (V4SImode);
31164 op0 = gen_rtx_VEC_MERGE (V4SImode,
31165 gen_rtx_VEC_DUPLICATE (V4SImode,
31167 CONST0_RTX (V4SImode),
31169 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31171 /* Cast the V4SImode vector back to a vector in orignal mode. */
31172 op0 = gen_reg_rtx (mode);
31173 emit_move_insn (op0, gen_lowpart (mode, op1));
31175 /* Load even elements into the second positon. */
31176 emit_insn (gen_load_even (op0,
31177 force_reg (inner_mode,
31181 /* Cast vector to FIRST_IMODE vector. */
31182 ops[i] = gen_reg_rtx (first_imode);
31183 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31186 /* Interleave low FIRST_IMODE vectors. */
31187 for (i = j = 0; i < n; i += 2, j++)
31189 op0 = gen_reg_rtx (first_imode);
31190 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31192 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31193 ops[j] = gen_reg_rtx (second_imode);
31194 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31197 /* Interleave low SECOND_IMODE vectors. */
31198 switch (second_imode)
31201 for (i = j = 0; i < n / 2; i += 2, j++)
31203 op0 = gen_reg_rtx (second_imode);
31204 emit_insn (gen_interleave_second_low (op0, ops[i],
31207 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31209 ops[j] = gen_reg_rtx (third_imode);
31210 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31212 second_imode = V2DImode;
31213 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31217 op0 = gen_reg_rtx (second_imode);
31218 emit_insn (gen_interleave_second_low (op0, ops[0],
31221 /* Cast the SECOND_IMODE vector back to a vector on original
31223 emit_insn (gen_rtx_SET (VOIDmode, target,
31224 gen_lowpart (mode, op0)));
31228 gcc_unreachable ();
31232 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31233 all values variable, and none identical. */
31236 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31237 rtx target, rtx vals)
31239 rtx ops[32], op0, op1;
31240 enum machine_mode half_mode = VOIDmode;
31247 if (!mmx_ok && !TARGET_SSE)
31259 n = GET_MODE_NUNITS (mode);
31260 for (i = 0; i < n; i++)
31261 ops[i] = XVECEXP (vals, 0, i);
31262 ix86_expand_vector_init_concat (mode, target, ops, n);
31266 half_mode = V16QImode;
31270 half_mode = V8HImode;
31274 n = GET_MODE_NUNITS (mode);
31275 for (i = 0; i < n; i++)
31276 ops[i] = XVECEXP (vals, 0, i);
31277 op0 = gen_reg_rtx (half_mode);
31278 op1 = gen_reg_rtx (half_mode);
31279 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31281 ix86_expand_vector_init_interleave (half_mode, op1,
31282 &ops [n >> 1], n >> 2);
31283 emit_insn (gen_rtx_SET (VOIDmode, target,
31284 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31288 if (!TARGET_SSE4_1)
31296 /* Don't use ix86_expand_vector_init_interleave if we can't
31297 move from GPR to SSE register directly. */
31298 if (!TARGET_INTER_UNIT_MOVES)
31301 n = GET_MODE_NUNITS (mode);
31302 for (i = 0; i < n; i++)
31303 ops[i] = XVECEXP (vals, 0, i);
31304 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31312 gcc_unreachable ();
31316 int i, j, n_elts, n_words, n_elt_per_word;
31317 enum machine_mode inner_mode;
31318 rtx words[4], shift;
31320 inner_mode = GET_MODE_INNER (mode);
31321 n_elts = GET_MODE_NUNITS (mode);
31322 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31323 n_elt_per_word = n_elts / n_words;
31324 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31326 for (i = 0; i < n_words; ++i)
31328 rtx word = NULL_RTX;
31330 for (j = 0; j < n_elt_per_word; ++j)
31332 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31333 elt = convert_modes (word_mode, inner_mode, elt, true);
31339 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31340 word, 1, OPTAB_LIB_WIDEN);
31341 word = expand_simple_binop (word_mode, IOR, word, elt,
31342 word, 1, OPTAB_LIB_WIDEN);
31350 emit_move_insn (target, gen_lowpart (mode, words[0]));
31351 else if (n_words == 2)
31353 rtx tmp = gen_reg_rtx (mode);
31354 emit_clobber (tmp);
31355 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31356 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31357 emit_move_insn (target, tmp);
31359 else if (n_words == 4)
31361 rtx tmp = gen_reg_rtx (V4SImode);
31362 gcc_assert (word_mode == SImode);
31363 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31364 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31365 emit_move_insn (target, gen_lowpart (mode, tmp));
31368 gcc_unreachable ();
31372 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31373 instructions unless MMX_OK is true. */
31376 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31378 enum machine_mode mode = GET_MODE (target);
31379 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31380 int n_elts = GET_MODE_NUNITS (mode);
31381 int n_var = 0, one_var = -1;
31382 bool all_same = true, all_const_zero = true;
31386 for (i = 0; i < n_elts; ++i)
31388 x = XVECEXP (vals, 0, i);
31389 if (!(CONST_INT_P (x)
31390 || GET_CODE (x) == CONST_DOUBLE
31391 || GET_CODE (x) == CONST_FIXED))
31392 n_var++, one_var = i;
31393 else if (x != CONST0_RTX (inner_mode))
31394 all_const_zero = false;
31395 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31399 /* Constants are best loaded from the constant pool. */
31402 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31406 /* If all values are identical, broadcast the value. */
31408 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31409 XVECEXP (vals, 0, 0)))
31412 /* Values where only one field is non-constant are best loaded from
31413 the pool and overwritten via move later. */
31417 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31418 XVECEXP (vals, 0, one_var),
31422 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31426 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31430 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31432 enum machine_mode mode = GET_MODE (target);
31433 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31434 enum machine_mode half_mode;
31435 bool use_vec_merge = false;
31437 static rtx (*gen_extract[6][2]) (rtx, rtx)
31439 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31440 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31441 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31442 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31443 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31444 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31446 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31448 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31449 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31450 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31451 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31452 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31453 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31463 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31464 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31466 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31468 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31469 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31475 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
31479 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31480 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
31482 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31484 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31485 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31492 /* For the two element vectors, we implement a VEC_CONCAT with
31493 the extraction of the other element. */
31495 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31496 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31499 op0 = val, op1 = tmp;
31501 op0 = tmp, op1 = val;
31503 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31504 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31509 use_vec_merge = TARGET_SSE4_1;
31516 use_vec_merge = true;
31520 /* tmp = target = A B C D */
31521 tmp = copy_to_reg (target);
31522 /* target = A A B B */
31523 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31524 /* target = X A B B */
31525 ix86_expand_vector_set (false, target, val, 0);
31526 /* target = A X C D */
31527 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31528 const1_rtx, const0_rtx,
31529 GEN_INT (2+4), GEN_INT (3+4)));
31533 /* tmp = target = A B C D */
31534 tmp = copy_to_reg (target);
31535 /* tmp = X B C D */
31536 ix86_expand_vector_set (false, tmp, val, 0);
31537 /* target = A B X D */
31538 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31539 const0_rtx, const1_rtx,
31540 GEN_INT (0+4), GEN_INT (3+4)));
31544 /* tmp = target = A B C D */
31545 tmp = copy_to_reg (target);
31546 /* tmp = X B C D */
31547 ix86_expand_vector_set (false, tmp, val, 0);
31548 /* target = A B X D */
31549 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31550 const0_rtx, const1_rtx,
31551 GEN_INT (2+4), GEN_INT (0+4)));
31555 gcc_unreachable ();
31560 use_vec_merge = TARGET_SSE4_1;
31564 /* Element 0 handled by vec_merge below. */
31567 use_vec_merge = true;
31573 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31574 store into element 0, then shuffle them back. */
31578 order[0] = GEN_INT (elt);
31579 order[1] = const1_rtx;
31580 order[2] = const2_rtx;
31581 order[3] = GEN_INT (3);
31582 order[elt] = const0_rtx;
31584 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31585 order[1], order[2], order[3]));
31587 ix86_expand_vector_set (false, target, val, 0);
31589 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31590 order[1], order[2], order[3]));
31594 /* For SSE1, we have to reuse the V4SF code. */
31595 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31596 gen_lowpart (SFmode, val), elt);
31601 use_vec_merge = TARGET_SSE2;
31604 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31608 use_vec_merge = TARGET_SSE4_1;
31615 half_mode = V16QImode;
31621 half_mode = V8HImode;
31627 half_mode = V4SImode;
31633 half_mode = V2DImode;
31639 half_mode = V4SFmode;
31645 half_mode = V2DFmode;
31651 /* Compute offset. */
31655 gcc_assert (i <= 1);
31657 /* Extract the half. */
31658 tmp = gen_reg_rtx (half_mode);
31659 emit_insn (gen_extract[j][i] (tmp, target));
31661 /* Put val in tmp at elt. */
31662 ix86_expand_vector_set (false, tmp, val, elt);
31665 emit_insn (gen_insert[j][i] (target, target, tmp));
31674 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31675 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31676 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31680 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31682 emit_move_insn (mem, target);
31684 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31685 emit_move_insn (tmp, val);
31687 emit_move_insn (target, mem);
31692 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31694 enum machine_mode mode = GET_MODE (vec);
31695 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31696 bool use_vec_extr = false;
31709 use_vec_extr = true;
31713 use_vec_extr = TARGET_SSE4_1;
31725 tmp = gen_reg_rtx (mode);
31726 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31727 GEN_INT (elt), GEN_INT (elt),
31728 GEN_INT (elt+4), GEN_INT (elt+4)));
31732 tmp = gen_reg_rtx (mode);
31733 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31737 gcc_unreachable ();
31740 use_vec_extr = true;
31745 use_vec_extr = TARGET_SSE4_1;
31759 tmp = gen_reg_rtx (mode);
31760 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31761 GEN_INT (elt), GEN_INT (elt),
31762 GEN_INT (elt), GEN_INT (elt)));
31766 tmp = gen_reg_rtx (mode);
31767 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31771 gcc_unreachable ();
31774 use_vec_extr = true;
31779 /* For SSE1, we have to reuse the V4SF code. */
31780 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31781 gen_lowpart (V4SFmode, vec), elt);
31787 use_vec_extr = TARGET_SSE2;
31790 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31794 use_vec_extr = TARGET_SSE4_1;
31798 /* ??? Could extract the appropriate HImode element and shift. */
31805 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31806 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31808 /* Let the rtl optimizers know about the zero extension performed. */
31809 if (inner_mode == QImode || inner_mode == HImode)
31811 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31812 target = gen_lowpart (SImode, target);
31815 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31819 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31821 emit_move_insn (mem, vec);
31823 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31824 emit_move_insn (target, tmp);
31828 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31829 pattern to reduce; DEST is the destination; IN is the input vector. */
31832 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31834 rtx tmp1, tmp2, tmp3;
31836 tmp1 = gen_reg_rtx (V4SFmode);
31837 tmp2 = gen_reg_rtx (V4SFmode);
31838 tmp3 = gen_reg_rtx (V4SFmode);
31840 emit_insn (gen_sse_movhlps (tmp1, in, in));
31841 emit_insn (fn (tmp2, tmp1, in));
31843 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31844 const1_rtx, const1_rtx,
31845 GEN_INT (1+4), GEN_INT (1+4)));
31846 emit_insn (fn (dest, tmp2, tmp3));
31849 /* Target hook for scalar_mode_supported_p. */
31851 ix86_scalar_mode_supported_p (enum machine_mode mode)
31853 if (DECIMAL_FLOAT_MODE_P (mode))
31854 return default_decimal_float_supported_p ();
31855 else if (mode == TFmode)
31858 return default_scalar_mode_supported_p (mode);
31861 /* Implements target hook vector_mode_supported_p. */
31863 ix86_vector_mode_supported_p (enum machine_mode mode)
31865 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31867 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31869 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31871 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31873 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31878 /* Target hook for c_mode_for_suffix. */
31879 static enum machine_mode
31880 ix86_c_mode_for_suffix (char suffix)
31890 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31892 We do this in the new i386 backend to maintain source compatibility
31893 with the old cc0-based compiler. */
31896 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31897 tree inputs ATTRIBUTE_UNUSED,
31900 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31902 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31907 /* Implements target vector targetm.asm.encode_section_info. This
31908 is not used by netware. */
31910 static void ATTRIBUTE_UNUSED
31911 ix86_encode_section_info (tree decl, rtx rtl, int first)
31913 default_encode_section_info (decl, rtl, first);
31915 if (TREE_CODE (decl) == VAR_DECL
31916 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31917 && ix86_in_large_data_p (decl))
31918 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31921 /* Worker function for REVERSE_CONDITION. */
31924 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31926 return (mode != CCFPmode && mode != CCFPUmode
31927 ? reverse_condition (code)
31928 : reverse_condition_maybe_unordered (code));
31931 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31935 output_387_reg_move (rtx insn, rtx *operands)
31937 if (REG_P (operands[0]))
31939 if (REG_P (operands[1])
31940 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31942 if (REGNO (operands[0]) == FIRST_STACK_REG)
31943 return output_387_ffreep (operands, 0);
31944 return "fstp\t%y0";
31946 if (STACK_TOP_P (operands[0]))
31947 return "fld%Z1\t%y1";
31950 else if (MEM_P (operands[0]))
31952 gcc_assert (REG_P (operands[1]));
31953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31954 return "fstp%Z0\t%y0";
31957 /* There is no non-popping store to memory for XFmode.
31958 So if we need one, follow the store with a load. */
31959 if (GET_MODE (operands[0]) == XFmode)
31960 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31962 return "fst%Z0\t%y0";
31969 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31970 FP status register is set. */
31973 ix86_emit_fp_unordered_jump (rtx label)
31975 rtx reg = gen_reg_rtx (HImode);
31978 emit_insn (gen_x86_fnstsw_1 (reg));
31980 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31982 emit_insn (gen_x86_sahf_1 (reg));
31984 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31985 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31989 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31991 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31992 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31995 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31996 gen_rtx_LABEL_REF (VOIDmode, label),
31998 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
32000 emit_jump_insn (temp);
32001 predict_jump (REG_BR_PROB_BASE * 10 / 100);
32004 /* Output code to perform a log1p XFmode calculation. */
32006 void ix86_emit_i387_log1p (rtx op0, rtx op1)
32008 rtx label1 = gen_label_rtx ();
32009 rtx label2 = gen_label_rtx ();
32011 rtx tmp = gen_reg_rtx (XFmode);
32012 rtx tmp2 = gen_reg_rtx (XFmode);
32015 emit_insn (gen_absxf2 (tmp, op1));
32016 test = gen_rtx_GE (VOIDmode, tmp,
32017 CONST_DOUBLE_FROM_REAL_VALUE (
32018 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
32020 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
32022 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32023 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
32024 emit_jump (label2);
32026 emit_label (label1);
32027 emit_move_insn (tmp, CONST1_RTX (XFmode));
32028 emit_insn (gen_addxf3 (tmp, op1, tmp));
32029 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
32030 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
32032 emit_label (label2);
32035 /* Output code to perform a Newton-Rhapson approximation of a single precision
32036 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
32038 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
32040 rtx x0, x1, e0, e1;
32042 x0 = gen_reg_rtx (mode);
32043 e0 = gen_reg_rtx (mode);
32044 e1 = gen_reg_rtx (mode);
32045 x1 = gen_reg_rtx (mode);
32047 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
32049 /* x0 = rcp(b) estimate */
32050 emit_insn (gen_rtx_SET (VOIDmode, x0,
32051 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
32054 emit_insn (gen_rtx_SET (VOIDmode, e0,
32055 gen_rtx_MULT (mode, x0, b)));
32058 emit_insn (gen_rtx_SET (VOIDmode, e0,
32059 gen_rtx_MULT (mode, x0, e0)));
32062 emit_insn (gen_rtx_SET (VOIDmode, e1,
32063 gen_rtx_PLUS (mode, x0, x0)));
32066 emit_insn (gen_rtx_SET (VOIDmode, x1,
32067 gen_rtx_MINUS (mode, e1, e0)));
32070 emit_insn (gen_rtx_SET (VOIDmode, res,
32071 gen_rtx_MULT (mode, a, x1)));
32074 /* Output code to perform a Newton-Rhapson approximation of a
32075 single precision floating point [reciprocal] square root. */
32077 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
32080 rtx x0, e0, e1, e2, e3, mthree, mhalf;
32083 x0 = gen_reg_rtx (mode);
32084 e0 = gen_reg_rtx (mode);
32085 e1 = gen_reg_rtx (mode);
32086 e2 = gen_reg_rtx (mode);
32087 e3 = gen_reg_rtx (mode);
32089 real_from_integer (&r, VOIDmode, -3, -1, 0);
32090 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32092 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
32093 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
32095 if (VECTOR_MODE_P (mode))
32097 mthree = ix86_build_const_vector (mode, true, mthree);
32098 mhalf = ix86_build_const_vector (mode, true, mhalf);
32101 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32102 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32104 /* x0 = rsqrt(a) estimate */
32105 emit_insn (gen_rtx_SET (VOIDmode, x0,
32106 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32109 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32114 zero = gen_reg_rtx (mode);
32115 mask = gen_reg_rtx (mode);
32117 zero = force_reg (mode, CONST0_RTX(mode));
32118 emit_insn (gen_rtx_SET (VOIDmode, mask,
32119 gen_rtx_NE (mode, zero, a)));
32121 emit_insn (gen_rtx_SET (VOIDmode, x0,
32122 gen_rtx_AND (mode, x0, mask)));
32126 emit_insn (gen_rtx_SET (VOIDmode, e0,
32127 gen_rtx_MULT (mode, x0, a)));
32129 emit_insn (gen_rtx_SET (VOIDmode, e1,
32130 gen_rtx_MULT (mode, e0, x0)));
32133 mthree = force_reg (mode, mthree);
32134 emit_insn (gen_rtx_SET (VOIDmode, e2,
32135 gen_rtx_PLUS (mode, e1, mthree)));
32137 mhalf = force_reg (mode, mhalf);
32139 /* e3 = -.5 * x0 */
32140 emit_insn (gen_rtx_SET (VOIDmode, e3,
32141 gen_rtx_MULT (mode, x0, mhalf)));
32143 /* e3 = -.5 * e0 */
32144 emit_insn (gen_rtx_SET (VOIDmode, e3,
32145 gen_rtx_MULT (mode, e0, mhalf)));
32146 /* ret = e2 * e3 */
32147 emit_insn (gen_rtx_SET (VOIDmode, res,
32148 gen_rtx_MULT (mode, e2, e3)));
32151 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32153 static void ATTRIBUTE_UNUSED
32154 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32157 /* With Binutils 2.15, the "@unwind" marker must be specified on
32158 every occurrence of the ".eh_frame" section, not just the first
32161 && strcmp (name, ".eh_frame") == 0)
32163 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32164 flags & SECTION_WRITE ? "aw" : "a");
32167 default_elf_asm_named_section (name, flags, decl);
32170 /* Return the mangling of TYPE if it is an extended fundamental type. */
32172 static const char *
32173 ix86_mangle_type (const_tree type)
32175 type = TYPE_MAIN_VARIANT (type);
32177 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32178 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32181 switch (TYPE_MODE (type))
32184 /* __float128 is "g". */
32187 /* "long double" or __float80 is "e". */
32194 /* For 32-bit code we can save PIC register setup by using
32195 __stack_chk_fail_local hidden function instead of calling
32196 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32197 register, so it is better to call __stack_chk_fail directly. */
32200 ix86_stack_protect_fail (void)
32202 return TARGET_64BIT
32203 ? default_external_stack_protect_fail ()
32204 : default_hidden_stack_protect_fail ();
32207 /* Select a format to encode pointers in exception handling data. CODE
32208 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32209 true if the symbol may be affected by dynamic relocations.
32211 ??? All x86 object file formats are capable of representing this.
32212 After all, the relocation needed is the same as for the call insn.
32213 Whether or not a particular assembler allows us to enter such, I
32214 guess we'll have to see. */
32216 asm_preferred_eh_data_format (int code, int global)
32220 int type = DW_EH_PE_sdata8;
32222 || ix86_cmodel == CM_SMALL_PIC
32223 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32224 type = DW_EH_PE_sdata4;
32225 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32227 if (ix86_cmodel == CM_SMALL
32228 || (ix86_cmodel == CM_MEDIUM && code))
32229 return DW_EH_PE_udata4;
32230 return DW_EH_PE_absptr;
32233 /* Expand copysign from SIGN to the positive value ABS_VALUE
32234 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32237 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32239 enum machine_mode mode = GET_MODE (sign);
32240 rtx sgn = gen_reg_rtx (mode);
32241 if (mask == NULL_RTX)
32243 enum machine_mode vmode;
32245 if (mode == SFmode)
32247 else if (mode == DFmode)
32252 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32253 if (!VECTOR_MODE_P (mode))
32255 /* We need to generate a scalar mode mask in this case. */
32256 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32257 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32258 mask = gen_reg_rtx (mode);
32259 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32263 mask = gen_rtx_NOT (mode, mask);
32264 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32265 gen_rtx_AND (mode, mask, sign)));
32266 emit_insn (gen_rtx_SET (VOIDmode, result,
32267 gen_rtx_IOR (mode, abs_value, sgn)));
32270 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32271 mask for masking out the sign-bit is stored in *SMASK, if that is
32274 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32276 enum machine_mode vmode, mode = GET_MODE (op0);
32279 xa = gen_reg_rtx (mode);
32280 if (mode == SFmode)
32282 else if (mode == DFmode)
32286 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32287 if (!VECTOR_MODE_P (mode))
32289 /* We need to generate a scalar mode mask in this case. */
32290 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32291 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32292 mask = gen_reg_rtx (mode);
32293 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32295 emit_insn (gen_rtx_SET (VOIDmode, xa,
32296 gen_rtx_AND (mode, op0, mask)));
32304 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32305 swapping the operands if SWAP_OPERANDS is true. The expanded
32306 code is a forward jump to a newly created label in case the
32307 comparison is true. The generated label rtx is returned. */
32309 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32310 bool swap_operands)
32321 label = gen_label_rtx ();
32322 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32323 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32324 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32325 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32326 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32327 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32328 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32329 JUMP_LABEL (tmp) = label;
32334 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32335 using comparison code CODE. Operands are swapped for the comparison if
32336 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32338 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32339 bool swap_operands)
32341 rtx (*insn)(rtx, rtx, rtx, rtx);
32342 enum machine_mode mode = GET_MODE (op0);
32343 rtx mask = gen_reg_rtx (mode);
32352 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
32354 emit_insn (insn (mask, op0, op1,
32355 gen_rtx_fmt_ee (code, mode, op0, op1)));
32359 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32360 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32362 ix86_gen_TWO52 (enum machine_mode mode)
32364 REAL_VALUE_TYPE TWO52r;
32367 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32368 TWO52 = const_double_from_real_value (TWO52r, mode);
32369 TWO52 = force_reg (mode, TWO52);
32374 /* Expand SSE sequence for computing lround from OP1 storing
32377 ix86_expand_lround (rtx op0, rtx op1)
32379 /* C code for the stuff we're doing below:
32380 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32383 enum machine_mode mode = GET_MODE (op1);
32384 const struct real_format *fmt;
32385 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32388 /* load nextafter (0.5, 0.0) */
32389 fmt = REAL_MODE_FORMAT (mode);
32390 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32391 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32393 /* adj = copysign (0.5, op1) */
32394 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32395 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32397 /* adj = op1 + adj */
32398 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32400 /* op0 = (imode)adj */
32401 expand_fix (op0, adj, 0);
32404 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32407 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32409 /* C code for the stuff we're doing below (for do_floor):
32411 xi -= (double)xi > op1 ? 1 : 0;
32414 enum machine_mode fmode = GET_MODE (op1);
32415 enum machine_mode imode = GET_MODE (op0);
32416 rtx ireg, freg, label, tmp;
32418 /* reg = (long)op1 */
32419 ireg = gen_reg_rtx (imode);
32420 expand_fix (ireg, op1, 0);
32422 /* freg = (double)reg */
32423 freg = gen_reg_rtx (fmode);
32424 expand_float (freg, ireg, 0);
32426 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32427 label = ix86_expand_sse_compare_and_jump (UNLE,
32428 freg, op1, !do_floor);
32429 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32430 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32431 emit_move_insn (ireg, tmp);
32433 emit_label (label);
32434 LABEL_NUSES (label) = 1;
32436 emit_move_insn (op0, ireg);
32439 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32440 result in OPERAND0. */
32442 ix86_expand_rint (rtx operand0, rtx operand1)
32444 /* C code for the stuff we're doing below:
32445 xa = fabs (operand1);
32446 if (!isless (xa, 2**52))
32448 xa = xa + 2**52 - 2**52;
32449 return copysign (xa, operand1);
32451 enum machine_mode mode = GET_MODE (operand0);
32452 rtx res, xa, label, TWO52, mask;
32454 res = gen_reg_rtx (mode);
32455 emit_move_insn (res, operand1);
32457 /* xa = abs (operand1) */
32458 xa = ix86_expand_sse_fabs (res, &mask);
32460 /* if (!isless (xa, TWO52)) goto label; */
32461 TWO52 = ix86_gen_TWO52 (mode);
32462 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32464 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32465 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32467 ix86_sse_copysign_to_positive (res, xa, res, mask);
32469 emit_label (label);
32470 LABEL_NUSES (label) = 1;
32472 emit_move_insn (operand0, res);
32475 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32478 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32480 /* C code for the stuff we expand below.
32481 double xa = fabs (x), x2;
32482 if (!isless (xa, TWO52))
32484 xa = xa + TWO52 - TWO52;
32485 x2 = copysign (xa, x);
32494 enum machine_mode mode = GET_MODE (operand0);
32495 rtx xa, TWO52, tmp, label, one, res, mask;
32497 TWO52 = ix86_gen_TWO52 (mode);
32499 /* Temporary for holding the result, initialized to the input
32500 operand to ease control flow. */
32501 res = gen_reg_rtx (mode);
32502 emit_move_insn (res, operand1);
32504 /* xa = abs (operand1) */
32505 xa = ix86_expand_sse_fabs (res, &mask);
32507 /* if (!isless (xa, TWO52)) goto label; */
32508 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32510 /* xa = xa + TWO52 - TWO52; */
32511 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32512 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32514 /* xa = copysign (xa, operand1) */
32515 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32517 /* generate 1.0 or -1.0 */
32518 one = force_reg (mode,
32519 const_double_from_real_value (do_floor
32520 ? dconst1 : dconstm1, mode));
32522 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32523 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32524 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32525 gen_rtx_AND (mode, one, tmp)));
32526 /* We always need to subtract here to preserve signed zero. */
32527 tmp = expand_simple_binop (mode, MINUS,
32528 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32529 emit_move_insn (res, tmp);
32531 emit_label (label);
32532 LABEL_NUSES (label) = 1;
32534 emit_move_insn (operand0, res);
32537 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32540 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32542 /* C code for the stuff we expand below.
32543 double xa = fabs (x), x2;
32544 if (!isless (xa, TWO52))
32546 x2 = (double)(long)x;
32553 if (HONOR_SIGNED_ZEROS (mode))
32554 return copysign (x2, x);
32557 enum machine_mode mode = GET_MODE (operand0);
32558 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32560 TWO52 = ix86_gen_TWO52 (mode);
32562 /* Temporary for holding the result, initialized to the input
32563 operand to ease control flow. */
32564 res = gen_reg_rtx (mode);
32565 emit_move_insn (res, operand1);
32567 /* xa = abs (operand1) */
32568 xa = ix86_expand_sse_fabs (res, &mask);
32570 /* if (!isless (xa, TWO52)) goto label; */
32571 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32573 /* xa = (double)(long)x */
32574 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32575 expand_fix (xi, res, 0);
32576 expand_float (xa, xi, 0);
32579 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32581 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32582 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32583 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32584 gen_rtx_AND (mode, one, tmp)));
32585 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32586 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32587 emit_move_insn (res, tmp);
32589 if (HONOR_SIGNED_ZEROS (mode))
32590 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32592 emit_label (label);
32593 LABEL_NUSES (label) = 1;
32595 emit_move_insn (operand0, res);
32598 /* Expand SSE sequence for computing round from OPERAND1 storing
32599 into OPERAND0. Sequence that works without relying on DImode truncation
32600 via cvttsd2siq that is only available on 64bit targets. */
32602 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32604 /* C code for the stuff we expand below.
32605 double xa = fabs (x), xa2, x2;
32606 if (!isless (xa, TWO52))
32608 Using the absolute value and copying back sign makes
32609 -0.0 -> -0.0 correct.
32610 xa2 = xa + TWO52 - TWO52;
32615 else if (dxa > 0.5)
32617 x2 = copysign (xa2, x);
32620 enum machine_mode mode = GET_MODE (operand0);
32621 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32623 TWO52 = ix86_gen_TWO52 (mode);
32625 /* Temporary for holding the result, initialized to the input
32626 operand to ease control flow. */
32627 res = gen_reg_rtx (mode);
32628 emit_move_insn (res, operand1);
32630 /* xa = abs (operand1) */
32631 xa = ix86_expand_sse_fabs (res, &mask);
32633 /* if (!isless (xa, TWO52)) goto label; */
32634 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32636 /* xa2 = xa + TWO52 - TWO52; */
32637 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32638 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32640 /* dxa = xa2 - xa; */
32641 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32643 /* generate 0.5, 1.0 and -0.5 */
32644 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32645 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32646 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32650 tmp = gen_reg_rtx (mode);
32651 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32652 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32653 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32654 gen_rtx_AND (mode, one, tmp)));
32655 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32656 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32657 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32658 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32659 gen_rtx_AND (mode, one, tmp)));
32660 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32662 /* res = copysign (xa2, operand1) */
32663 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32665 emit_label (label);
32666 LABEL_NUSES (label) = 1;
32668 emit_move_insn (operand0, res);
32671 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32674 ix86_expand_trunc (rtx operand0, rtx operand1)
32676 /* C code for SSE variant we expand below.
32677 double xa = fabs (x), x2;
32678 if (!isless (xa, TWO52))
32680 x2 = (double)(long)x;
32681 if (HONOR_SIGNED_ZEROS (mode))
32682 return copysign (x2, x);
32685 enum machine_mode mode = GET_MODE (operand0);
32686 rtx xa, xi, TWO52, label, res, mask;
32688 TWO52 = ix86_gen_TWO52 (mode);
32690 /* Temporary for holding the result, initialized to the input
32691 operand to ease control flow. */
32692 res = gen_reg_rtx (mode);
32693 emit_move_insn (res, operand1);
32695 /* xa = abs (operand1) */
32696 xa = ix86_expand_sse_fabs (res, &mask);
32698 /* if (!isless (xa, TWO52)) goto label; */
32699 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32701 /* x = (double)(long)x */
32702 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32703 expand_fix (xi, res, 0);
32704 expand_float (res, xi, 0);
32706 if (HONOR_SIGNED_ZEROS (mode))
32707 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32709 emit_label (label);
32710 LABEL_NUSES (label) = 1;
32712 emit_move_insn (operand0, res);
32715 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32718 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32720 enum machine_mode mode = GET_MODE (operand0);
32721 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32723 /* C code for SSE variant we expand below.
32724 double xa = fabs (x), x2;
32725 if (!isless (xa, TWO52))
32727 xa2 = xa + TWO52 - TWO52;
32731 x2 = copysign (xa2, x);
32735 TWO52 = ix86_gen_TWO52 (mode);
32737 /* Temporary for holding the result, initialized to the input
32738 operand to ease control flow. */
32739 res = gen_reg_rtx (mode);
32740 emit_move_insn (res, operand1);
32742 /* xa = abs (operand1) */
32743 xa = ix86_expand_sse_fabs (res, &smask);
32745 /* if (!isless (xa, TWO52)) goto label; */
32746 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32748 /* res = xa + TWO52 - TWO52; */
32749 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32750 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32751 emit_move_insn (res, tmp);
32754 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32756 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32757 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32758 emit_insn (gen_rtx_SET (VOIDmode, mask,
32759 gen_rtx_AND (mode, mask, one)));
32760 tmp = expand_simple_binop (mode, MINUS,
32761 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32762 emit_move_insn (res, tmp);
32764 /* res = copysign (res, operand1) */
32765 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32767 emit_label (label);
32768 LABEL_NUSES (label) = 1;
32770 emit_move_insn (operand0, res);
32773 /* Expand SSE sequence for computing round from OPERAND1 storing
32776 ix86_expand_round (rtx operand0, rtx operand1)
32778 /* C code for the stuff we're doing below:
32779 double xa = fabs (x);
32780 if (!isless (xa, TWO52))
32782 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32783 return copysign (xa, x);
32785 enum machine_mode mode = GET_MODE (operand0);
32786 rtx res, TWO52, xa, label, xi, half, mask;
32787 const struct real_format *fmt;
32788 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32790 /* Temporary for holding the result, initialized to the input
32791 operand to ease control flow. */
32792 res = gen_reg_rtx (mode);
32793 emit_move_insn (res, operand1);
32795 TWO52 = ix86_gen_TWO52 (mode);
32796 xa = ix86_expand_sse_fabs (res, &mask);
32797 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32799 /* load nextafter (0.5, 0.0) */
32800 fmt = REAL_MODE_FORMAT (mode);
32801 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32802 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32804 /* xa = xa + 0.5 */
32805 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32806 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32808 /* xa = (double)(int64_t)xa */
32809 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32810 expand_fix (xi, xa, 0);
32811 expand_float (xa, xi, 0);
32813 /* res = copysign (xa, operand1) */
32814 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32816 emit_label (label);
32817 LABEL_NUSES (label) = 1;
32819 emit_move_insn (operand0, res);
32823 /* Table of valid machine attributes. */
32824 static const struct attribute_spec ix86_attribute_table[] =
32826 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
32827 affects_type_identity } */
32828 /* Stdcall attribute says callee is responsible for popping arguments
32829 if they are not variable. */
32830 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32832 /* Fastcall attribute says callee is responsible for popping arguments
32833 if they are not variable. */
32834 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32836 /* Thiscall attribute says callee is responsible for popping arguments
32837 if they are not variable. */
32838 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32840 /* Cdecl attribute says the callee is a normal C declaration */
32841 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32843 /* Regparm attribute specifies how many integer arguments are to be
32844 passed in registers. */
32845 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
32847 /* Sseregparm attribute says we are using x86_64 calling conventions
32848 for FP arguments. */
32849 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
32851 /* force_align_arg_pointer says this function realigns the stack at entry. */
32852 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32853 false, true, true, ix86_handle_cconv_attribute, false },
32854 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32855 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
32856 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
32857 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
32860 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32862 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
32864 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32865 SUBTARGET_ATTRIBUTE_TABLE,
32867 /* ms_abi and sysv_abi calling convention function attributes. */
32868 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32869 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
32870 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
32872 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32873 ix86_handle_callee_pop_aggregate_return, true },
32875 { NULL, 0, 0, false, false, false, NULL, false }
32878 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32880 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32881 tree vectype ATTRIBUTE_UNUSED,
32882 int misalign ATTRIBUTE_UNUSED)
32884 switch (type_of_cost)
32887 return ix86_cost->scalar_stmt_cost;
32890 return ix86_cost->scalar_load_cost;
32893 return ix86_cost->scalar_store_cost;
32896 return ix86_cost->vec_stmt_cost;
32899 return ix86_cost->vec_align_load_cost;
32902 return ix86_cost->vec_store_cost;
32904 case vec_to_scalar:
32905 return ix86_cost->vec_to_scalar_cost;
32907 case scalar_to_vec:
32908 return ix86_cost->scalar_to_vec_cost;
32910 case unaligned_load:
32911 case unaligned_store:
32912 return ix86_cost->vec_unalign_load_cost;
32914 case cond_branch_taken:
32915 return ix86_cost->cond_taken_branch_cost;
32917 case cond_branch_not_taken:
32918 return ix86_cost->cond_not_taken_branch_cost;
32924 gcc_unreachable ();
32929 /* Implement targetm.vectorize.builtin_vec_perm. */
32932 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32934 tree itype = TREE_TYPE (vec_type);
32935 bool u = TYPE_UNSIGNED (itype);
32936 enum machine_mode vmode = TYPE_MODE (vec_type);
32937 enum ix86_builtins fcode;
32938 bool ok = TARGET_SSE2;
32944 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32947 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32949 itype = ix86_get_builtin_type (IX86_BT_DI);
32954 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32958 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32960 itype = ix86_get_builtin_type (IX86_BT_SI);
32964 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32967 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32970 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32973 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32983 *mask_type = itype;
32984 return ix86_builtins[(int) fcode];
32987 /* Return a vector mode with twice as many elements as VMODE. */
32988 /* ??? Consider moving this to a table generated by genmodes.c. */
32990 static enum machine_mode
32991 doublesize_vector_mode (enum machine_mode vmode)
32995 case V2SFmode: return V4SFmode;
32996 case V1DImode: return V2DImode;
32997 case V2SImode: return V4SImode;
32998 case V4HImode: return V8HImode;
32999 case V8QImode: return V16QImode;
33001 case V2DFmode: return V4DFmode;
33002 case V4SFmode: return V8SFmode;
33003 case V2DImode: return V4DImode;
33004 case V4SImode: return V8SImode;
33005 case V8HImode: return V16HImode;
33006 case V16QImode: return V32QImode;
33008 case V4DFmode: return V8DFmode;
33009 case V8SFmode: return V16SFmode;
33010 case V4DImode: return V8DImode;
33011 case V8SImode: return V16SImode;
33012 case V16HImode: return V32HImode;
33013 case V32QImode: return V64QImode;
33016 gcc_unreachable ();
33020 /* Construct (set target (vec_select op0 (parallel perm))) and
33021 return true if that's a valid instruction in the active ISA. */
33024 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
33026 rtx rperm[MAX_VECT_LEN], x;
33029 for (i = 0; i < nelt; ++i)
33030 rperm[i] = GEN_INT (perm[i]);
33032 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
33033 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
33034 x = gen_rtx_SET (VOIDmode, target, x);
33037 if (recog_memoized (x) < 0)
33045 /* Similar, but generate a vec_concat from op0 and op1 as well. */
33048 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
33049 const unsigned char *perm, unsigned nelt)
33051 enum machine_mode v2mode;
33054 v2mode = doublesize_vector_mode (GET_MODE (op0));
33055 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
33056 return expand_vselect (target, x, perm, nelt);
33059 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33060 in terms of blendp[sd] / pblendw / pblendvb. */
33063 expand_vec_perm_blend (struct expand_vec_perm_d *d)
33065 enum machine_mode vmode = d->vmode;
33066 unsigned i, mask, nelt = d->nelt;
33067 rtx target, op0, op1, x;
33069 if (!TARGET_SSE4_1 || d->op0 == d->op1)
33071 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
33074 /* This is a blend, not a permute. Elements must stay in their
33075 respective lanes. */
33076 for (i = 0; i < nelt; ++i)
33078 unsigned e = d->perm[i];
33079 if (!(e == i || e == i + nelt))
33086 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
33087 decision should be extracted elsewhere, so that we only try that
33088 sequence once all budget==3 options have been tried. */
33090 /* For bytes, see if bytes move in pairs so we can use pblendw with
33091 an immediate argument, rather than pblendvb with a vector argument. */
33092 if (vmode == V16QImode)
33094 bool pblendw_ok = true;
33095 for (i = 0; i < 16 && pblendw_ok; i += 2)
33096 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
33100 rtx rperm[16], vperm;
33102 for (i = 0; i < nelt; ++i)
33103 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
33105 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33106 vperm = force_reg (V16QImode, vperm);
33108 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33113 target = d->target;
33125 for (i = 0; i < nelt; ++i)
33126 mask |= (d->perm[i] >= nelt) << i;
33130 for (i = 0; i < 2; ++i)
33131 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33135 for (i = 0; i < 4; ++i)
33136 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33140 for (i = 0; i < 8; ++i)
33141 mask |= (d->perm[i * 2] >= 16) << i;
33145 target = gen_lowpart (vmode, target);
33146 op0 = gen_lowpart (vmode, op0);
33147 op1 = gen_lowpart (vmode, op1);
33151 gcc_unreachable ();
33154 /* This matches five different patterns with the different modes. */
33155 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33156 x = gen_rtx_SET (VOIDmode, target, x);
33162 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33163 in terms of the variable form of vpermilps.
33165 Note that we will have already failed the immediate input vpermilps,
33166 which requires that the high and low part shuffle be identical; the
33167 variable form doesn't require that. */
33170 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33172 rtx rperm[8], vperm;
33175 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33178 /* We can only permute within the 128-bit lane. */
33179 for (i = 0; i < 8; ++i)
33181 unsigned e = d->perm[i];
33182 if (i < 4 ? e >= 4 : e < 4)
33189 for (i = 0; i < 8; ++i)
33191 unsigned e = d->perm[i];
33193 /* Within each 128-bit lane, the elements of op0 are numbered
33194 from 0 and the elements of op1 are numbered from 4. */
33200 rperm[i] = GEN_INT (e);
33203 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33204 vperm = force_reg (V8SImode, vperm);
33205 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33210 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33211 in terms of pshufb or vpperm. */
33214 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33216 unsigned i, nelt, eltsz;
33217 rtx rperm[16], vperm, target, op0, op1;
33219 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33221 if (GET_MODE_SIZE (d->vmode) != 16)
33228 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33230 for (i = 0; i < nelt; ++i)
33232 unsigned j, e = d->perm[i];
33233 for (j = 0; j < eltsz; ++j)
33234 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33237 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33238 vperm = force_reg (V16QImode, vperm);
33240 target = gen_lowpart (V16QImode, d->target);
33241 op0 = gen_lowpart (V16QImode, d->op0);
33242 if (d->op0 == d->op1)
33243 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33246 op1 = gen_lowpart (V16QImode, d->op1);
33247 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33253 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33254 in a single instruction. */
33257 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33259 unsigned i, nelt = d->nelt;
33260 unsigned char perm2[MAX_VECT_LEN];
33262 /* Check plain VEC_SELECT first, because AVX has instructions that could
33263 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33264 input where SEL+CONCAT may not. */
33265 if (d->op0 == d->op1)
33267 int mask = nelt - 1;
33269 for (i = 0; i < nelt; i++)
33270 perm2[i] = d->perm[i] & mask;
33272 if (expand_vselect (d->target, d->op0, perm2, nelt))
33275 /* There are plenty of patterns in sse.md that are written for
33276 SEL+CONCAT and are not replicated for a single op. Perhaps
33277 that should be changed, to avoid the nastiness here. */
33279 /* Recognize interleave style patterns, which means incrementing
33280 every other permutation operand. */
33281 for (i = 0; i < nelt; i += 2)
33283 perm2[i] = d->perm[i] & mask;
33284 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33286 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33289 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33292 for (i = 0; i < nelt; i += 4)
33294 perm2[i + 0] = d->perm[i + 0] & mask;
33295 perm2[i + 1] = d->perm[i + 1] & mask;
33296 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33297 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33300 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33305 /* Finally, try the fully general two operand permute. */
33306 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33309 /* Recognize interleave style patterns with reversed operands. */
33310 if (d->op0 != d->op1)
33312 for (i = 0; i < nelt; ++i)
33314 unsigned e = d->perm[i];
33322 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33326 /* Try the SSE4.1 blend variable merge instructions. */
33327 if (expand_vec_perm_blend (d))
33330 /* Try one of the AVX vpermil variable permutations. */
33331 if (expand_vec_perm_vpermil (d))
33334 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33335 if (expand_vec_perm_pshufb (d))
33341 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33342 in terms of a pair of pshuflw + pshufhw instructions. */
33345 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33347 unsigned char perm2[MAX_VECT_LEN];
33351 if (d->vmode != V8HImode || d->op0 != d->op1)
33354 /* The two permutations only operate in 64-bit lanes. */
33355 for (i = 0; i < 4; ++i)
33356 if (d->perm[i] >= 4)
33358 for (i = 4; i < 8; ++i)
33359 if (d->perm[i] < 4)
33365 /* Emit the pshuflw. */
33366 memcpy (perm2, d->perm, 4);
33367 for (i = 4; i < 8; ++i)
33369 ok = expand_vselect (d->target, d->op0, perm2, 8);
33372 /* Emit the pshufhw. */
33373 memcpy (perm2 + 4, d->perm + 4, 4);
33374 for (i = 0; i < 4; ++i)
33376 ok = expand_vselect (d->target, d->target, perm2, 8);
33382 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33383 the permutation using the SSSE3 palignr instruction. This succeeds
33384 when all of the elements in PERM fit within one vector and we merely
33385 need to shift them down so that a single vector permutation has a
33386 chance to succeed. */
33389 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33391 unsigned i, nelt = d->nelt;
33396 /* Even with AVX, palignr only operates on 128-bit vectors. */
33397 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33400 min = nelt, max = 0;
33401 for (i = 0; i < nelt; ++i)
33403 unsigned e = d->perm[i];
33409 if (min == 0 || max - min >= nelt)
33412 /* Given that we have SSSE3, we know we'll be able to implement the
33413 single operand permutation after the palignr with pshufb. */
33417 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33418 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33419 gen_lowpart (TImode, d->op1),
33420 gen_lowpart (TImode, d->op0), shift));
33422 d->op0 = d->op1 = d->target;
33425 for (i = 0; i < nelt; ++i)
33427 unsigned e = d->perm[i] - min;
33433 /* Test for the degenerate case where the alignment by itself
33434 produces the desired permutation. */
33438 ok = expand_vec_perm_1 (d);
33444 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33445 a two vector permutation into a single vector permutation by using
33446 an interleave operation to merge the vectors. */
33449 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33451 struct expand_vec_perm_d dremap, dfinal;
33452 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33453 unsigned contents, h1, h2, h3, h4;
33454 unsigned char remap[2 * MAX_VECT_LEN];
33458 if (d->op0 == d->op1)
33461 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33462 lanes. We can use similar techniques with the vperm2f128 instruction,
33463 but it requires slightly different logic. */
33464 if (GET_MODE_SIZE (d->vmode) != 16)
33467 /* Examine from whence the elements come. */
33469 for (i = 0; i < nelt; ++i)
33470 contents |= 1u << d->perm[i];
33472 /* Split the two input vectors into 4 halves. */
33473 h1 = (1u << nelt2) - 1;
33478 memset (remap, 0xff, sizeof (remap));
33481 /* If the elements from the low halves use interleave low, and similarly
33482 for interleave high. If the elements are from mis-matched halves, we
33483 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33484 if ((contents & (h1 | h3)) == contents)
33486 for (i = 0; i < nelt2; ++i)
33489 remap[i + nelt] = i * 2 + 1;
33490 dremap.perm[i * 2] = i;
33491 dremap.perm[i * 2 + 1] = i + nelt;
33494 else if ((contents & (h2 | h4)) == contents)
33496 for (i = 0; i < nelt2; ++i)
33498 remap[i + nelt2] = i * 2;
33499 remap[i + nelt + nelt2] = i * 2 + 1;
33500 dremap.perm[i * 2] = i + nelt2;
33501 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33504 else if ((contents & (h1 | h4)) == contents)
33506 for (i = 0; i < nelt2; ++i)
33509 remap[i + nelt + nelt2] = i + nelt2;
33510 dremap.perm[i] = i;
33511 dremap.perm[i + nelt2] = i + nelt + nelt2;
33515 dremap.vmode = V2DImode;
33517 dremap.perm[0] = 0;
33518 dremap.perm[1] = 3;
33521 else if ((contents & (h2 | h3)) == contents)
33523 for (i = 0; i < nelt2; ++i)
33525 remap[i + nelt2] = i;
33526 remap[i + nelt] = i + nelt2;
33527 dremap.perm[i] = i + nelt2;
33528 dremap.perm[i + nelt2] = i + nelt;
33532 dremap.vmode = V2DImode;
33534 dremap.perm[0] = 1;
33535 dremap.perm[1] = 2;
33541 /* Use the remapping array set up above to move the elements from their
33542 swizzled locations into their final destinations. */
33544 for (i = 0; i < nelt; ++i)
33546 unsigned e = remap[d->perm[i]];
33547 gcc_assert (e < nelt);
33548 dfinal.perm[i] = e;
33550 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33551 dfinal.op1 = dfinal.op0;
33552 dremap.target = dfinal.op0;
33554 /* Test if the final remap can be done with a single insn. For V4SFmode or
33555 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33557 ok = expand_vec_perm_1 (&dfinal);
33558 seq = get_insns ();
33564 if (dremap.vmode != dfinal.vmode)
33566 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33567 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33568 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33571 ok = expand_vec_perm_1 (&dremap);
33578 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33579 permutation with two pshufb insns and an ior. We should have already
33580 failed all two instruction sequences. */
33583 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33585 rtx rperm[2][16], vperm, l, h, op, m128;
33586 unsigned int i, nelt, eltsz;
33588 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33590 gcc_assert (d->op0 != d->op1);
33593 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33595 /* Generate two permutation masks. If the required element is within
33596 the given vector it is shuffled into the proper lane. If the required
33597 element is in the other vector, force a zero into the lane by setting
33598 bit 7 in the permutation mask. */
33599 m128 = GEN_INT (-128);
33600 for (i = 0; i < nelt; ++i)
33602 unsigned j, e = d->perm[i];
33603 unsigned which = (e >= nelt);
33607 for (j = 0; j < eltsz; ++j)
33609 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33610 rperm[1-which][i*eltsz + j] = m128;
33614 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33615 vperm = force_reg (V16QImode, vperm);
33617 l = gen_reg_rtx (V16QImode);
33618 op = gen_lowpart (V16QImode, d->op0);
33619 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33621 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33622 vperm = force_reg (V16QImode, vperm);
33624 h = gen_reg_rtx (V16QImode);
33625 op = gen_lowpart (V16QImode, d->op1);
33626 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33628 op = gen_lowpart (V16QImode, d->target);
33629 emit_insn (gen_iorv16qi3 (op, l, h));
33634 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33635 and extract-odd permutations. */
33638 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33645 t1 = gen_reg_rtx (V4DFmode);
33646 t2 = gen_reg_rtx (V4DFmode);
33648 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33649 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33650 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33652 /* Now an unpck[lh]pd will produce the result required. */
33654 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33656 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33662 int mask = odd ? 0xdd : 0x88;
33664 t1 = gen_reg_rtx (V8SFmode);
33665 t2 = gen_reg_rtx (V8SFmode);
33666 t3 = gen_reg_rtx (V8SFmode);
33668 /* Shuffle within the 128-bit lanes to produce:
33669 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33670 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33673 /* Shuffle the lanes around to produce:
33674 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33675 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33678 /* Shuffle within the 128-bit lanes to produce:
33679 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33680 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33682 /* Shuffle within the 128-bit lanes to produce:
33683 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33684 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33686 /* Shuffle the lanes around to produce:
33687 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33688 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33697 /* These are always directly implementable by expand_vec_perm_1. */
33698 gcc_unreachable ();
33702 return expand_vec_perm_pshufb2 (d);
33705 /* We need 2*log2(N)-1 operations to achieve odd/even
33706 with interleave. */
33707 t1 = gen_reg_rtx (V8HImode);
33708 t2 = gen_reg_rtx (V8HImode);
33709 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33710 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33711 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33712 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33714 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33716 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33723 return expand_vec_perm_pshufb2 (d);
33726 t1 = gen_reg_rtx (V16QImode);
33727 t2 = gen_reg_rtx (V16QImode);
33728 t3 = gen_reg_rtx (V16QImode);
33729 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33730 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33731 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33732 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33733 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33734 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33736 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33738 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33744 gcc_unreachable ();
33750 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33751 extract-even and extract-odd permutations. */
33754 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33756 unsigned i, odd, nelt = d->nelt;
33759 if (odd != 0 && odd != 1)
33762 for (i = 1; i < nelt; ++i)
33763 if (d->perm[i] != 2 * i + odd)
33766 return expand_vec_perm_even_odd_1 (d, odd);
33769 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33770 permutations. We assume that expand_vec_perm_1 has already failed. */
33773 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33775 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33776 enum machine_mode vmode = d->vmode;
33777 unsigned char perm2[4];
33785 /* These are special-cased in sse.md so that we can optionally
33786 use the vbroadcast instruction. They expand to two insns
33787 if the input happens to be in a register. */
33788 gcc_unreachable ();
33794 /* These are always implementable using standard shuffle patterns. */
33795 gcc_unreachable ();
33799 /* These can be implemented via interleave. We save one insn by
33800 stopping once we have promoted to V4SImode and then use pshufd. */
33803 optab otab = vec_interleave_low_optab;
33807 otab = vec_interleave_high_optab;
33812 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33813 vmode = get_mode_wider_vector (vmode);
33814 op0 = gen_lowpart (vmode, op0);
33816 while (vmode != V4SImode);
33818 memset (perm2, elt, 4);
33819 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33824 gcc_unreachable ();
33828 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33829 broadcast permutations. */
33832 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33834 unsigned i, elt, nelt = d->nelt;
33836 if (d->op0 != d->op1)
33840 for (i = 1; i < nelt; ++i)
33841 if (d->perm[i] != elt)
33844 return expand_vec_perm_broadcast_1 (d);
33847 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33848 With all of the interface bits taken care of, perform the expansion
33849 in D and return true on success. */
33852 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33854 /* Try a single instruction expansion. */
33855 if (expand_vec_perm_1 (d))
33858 /* Try sequences of two instructions. */
33860 if (expand_vec_perm_pshuflw_pshufhw (d))
33863 if (expand_vec_perm_palignr (d))
33866 if (expand_vec_perm_interleave2 (d))
33869 if (expand_vec_perm_broadcast (d))
33872 /* Try sequences of three instructions. */
33874 if (expand_vec_perm_pshufb2 (d))
33877 /* ??? Look for narrow permutations whose element orderings would
33878 allow the promotion to a wider mode. */
33880 /* ??? Look for sequences of interleave or a wider permute that place
33881 the data into the correct lanes for a half-vector shuffle like
33882 pshuf[lh]w or vpermilps. */
33884 /* ??? Look for sequences of interleave that produce the desired results.
33885 The combinatorics of punpck[lh] get pretty ugly... */
33887 if (expand_vec_perm_even_odd (d))
33893 /* Extract the values from the vector CST into the permutation array in D.
33894 Return 0 on error, 1 if all values from the permutation come from the
33895 first vector, 2 if all values from the second vector, and 3 otherwise. */
33898 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33900 tree list = TREE_VECTOR_CST_ELTS (cst);
33901 unsigned i, nelt = d->nelt;
33904 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33906 unsigned HOST_WIDE_INT e;
33908 if (!host_integerp (TREE_VALUE (list), 1))
33910 e = tree_low_cst (TREE_VALUE (list), 1);
33914 ret |= (e < nelt ? 1 : 2);
33917 gcc_assert (list == NULL);
33919 /* For all elements from second vector, fold the elements to first. */
33921 for (i = 0; i < nelt; ++i)
33922 d->perm[i] -= nelt;
33928 ix86_expand_vec_perm_builtin (tree exp)
33930 struct expand_vec_perm_d d;
33931 tree arg0, arg1, arg2;
33933 arg0 = CALL_EXPR_ARG (exp, 0);
33934 arg1 = CALL_EXPR_ARG (exp, 1);
33935 arg2 = CALL_EXPR_ARG (exp, 2);
33937 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33938 d.nelt = GET_MODE_NUNITS (d.vmode);
33939 d.testing_p = false;
33940 gcc_assert (VECTOR_MODE_P (d.vmode));
33942 if (TREE_CODE (arg2) != VECTOR_CST)
33944 error_at (EXPR_LOCATION (exp),
33945 "vector permutation requires vector constant");
33949 switch (extract_vec_perm_cst (&d, arg2))
33955 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33959 if (!operand_equal_p (arg0, arg1, 0))
33961 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33962 d.op0 = force_reg (d.vmode, d.op0);
33963 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33964 d.op1 = force_reg (d.vmode, d.op1);
33968 /* The elements of PERM do not suggest that only the first operand
33969 is used, but both operands are identical. Allow easier matching
33970 of the permutation by folding the permutation into the single
33973 unsigned i, nelt = d.nelt;
33974 for (i = 0; i < nelt; ++i)
33975 if (d.perm[i] >= nelt)
33981 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33982 d.op0 = force_reg (d.vmode, d.op0);
33987 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33988 d.op0 = force_reg (d.vmode, d.op0);
33993 d.target = gen_reg_rtx (d.vmode);
33994 if (ix86_expand_vec_perm_builtin_1 (&d))
33997 /* For compiler generated permutations, we should never got here, because
33998 the compiler should also be checking the ok hook. But since this is a
33999 builtin the user has access too, so don't abort. */
34003 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
34006 sorry ("vector permutation (%d %d %d %d)",
34007 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
34010 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
34011 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34012 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
34015 sorry ("vector permutation "
34016 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
34017 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
34018 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
34019 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
34020 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
34023 gcc_unreachable ();
34026 return CONST0_RTX (d.vmode);
34029 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
34032 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
34034 struct expand_vec_perm_d d;
34038 d.vmode = TYPE_MODE (vec_type);
34039 d.nelt = GET_MODE_NUNITS (d.vmode);
34040 d.testing_p = true;
34042 /* Given sufficient ISA support we can just return true here
34043 for selected vector modes. */
34044 if (GET_MODE_SIZE (d.vmode) == 16)
34046 /* All implementable with a single vpperm insn. */
34049 /* All implementable with 2 pshufb + 1 ior. */
34052 /* All implementable with shufpd or unpck[lh]pd. */
34057 vec_mask = extract_vec_perm_cst (&d, mask);
34059 /* This hook is cannot be called in response to something that the
34060 user does (unlike the builtin expander) so we shouldn't ever see
34061 an error generated from the extract. */
34062 gcc_assert (vec_mask > 0 && vec_mask <= 3);
34063 one_vec = (vec_mask != 3);
34065 /* Implementable with shufps or pshufd. */
34066 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
34069 /* Otherwise we have to go through the motions and see if we can
34070 figure out how to generate the requested permutation. */
34071 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
34072 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
34074 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
34077 ret = ix86_expand_vec_perm_builtin_1 (&d);
34084 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
34086 struct expand_vec_perm_d d;
34092 d.vmode = GET_MODE (targ);
34093 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
34094 d.testing_p = false;
34096 for (i = 0; i < nelt; ++i)
34097 d.perm[i] = i * 2 + odd;
34099 /* We'll either be able to implement the permutation directly... */
34100 if (expand_vec_perm_1 (&d))
34103 /* ... or we use the special-case patterns. */
34104 expand_vec_perm_even_odd_1 (&d, odd);
34107 /* Expand an insert into a vector register through pinsr insn.
34108 Return true if successful. */
34111 ix86_expand_pinsr (rtx *operands)
34113 rtx dst = operands[0];
34114 rtx src = operands[3];
34116 unsigned int size = INTVAL (operands[1]);
34117 unsigned int pos = INTVAL (operands[2]);
34119 if (GET_CODE (dst) == SUBREG)
34121 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
34122 dst = SUBREG_REG (dst);
34125 if (GET_CODE (src) == SUBREG)
34126 src = SUBREG_REG (src);
34128 switch (GET_MODE (dst))
34135 enum machine_mode srcmode, dstmode;
34136 rtx (*pinsr)(rtx, rtx, rtx, rtx);
34138 srcmode = mode_for_size (size, MODE_INT, 0);
34143 if (!TARGET_SSE4_1)
34145 dstmode = V16QImode;
34146 pinsr = gen_sse4_1_pinsrb;
34152 dstmode = V8HImode;
34153 pinsr = gen_sse2_pinsrw;
34157 if (!TARGET_SSE4_1)
34159 dstmode = V4SImode;
34160 pinsr = gen_sse4_1_pinsrd;
34164 gcc_assert (TARGET_64BIT);
34165 if (!TARGET_SSE4_1)
34167 dstmode = V2DImode;
34168 pinsr = gen_sse4_1_pinsrq;
34175 dst = gen_lowpart (dstmode, dst);
34176 src = gen_lowpart (srcmode, src);
34180 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
34189 /* This function returns the calling abi specific va_list type node.
34190 It returns the FNDECL specific va_list type. */
34193 ix86_fn_abi_va_list (tree fndecl)
34196 return va_list_type_node;
34197 gcc_assert (fndecl != NULL_TREE);
34199 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34200 return ms_va_list_type_node;
34202 return sysv_va_list_type_node;
34205 /* Returns the canonical va_list type specified by TYPE. If there
34206 is no valid TYPE provided, it return NULL_TREE. */
34209 ix86_canonical_va_list_type (tree type)
34213 /* Resolve references and pointers to va_list type. */
34214 if (TREE_CODE (type) == MEM_REF)
34215 type = TREE_TYPE (type);
34216 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34217 type = TREE_TYPE (type);
34218 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34219 type = TREE_TYPE (type);
34221 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34223 wtype = va_list_type_node;
34224 gcc_assert (wtype != NULL_TREE);
34226 if (TREE_CODE (wtype) == ARRAY_TYPE)
34228 /* If va_list is an array type, the argument may have decayed
34229 to a pointer type, e.g. by being passed to another function.
34230 In that case, unwrap both types so that we can compare the
34231 underlying records. */
34232 if (TREE_CODE (htype) == ARRAY_TYPE
34233 || POINTER_TYPE_P (htype))
34235 wtype = TREE_TYPE (wtype);
34236 htype = TREE_TYPE (htype);
34239 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34240 return va_list_type_node;
34241 wtype = sysv_va_list_type_node;
34242 gcc_assert (wtype != NULL_TREE);
34244 if (TREE_CODE (wtype) == ARRAY_TYPE)
34246 /* If va_list is an array type, the argument may have decayed
34247 to a pointer type, e.g. by being passed to another function.
34248 In that case, unwrap both types so that we can compare the
34249 underlying records. */
34250 if (TREE_CODE (htype) == ARRAY_TYPE
34251 || POINTER_TYPE_P (htype))
34253 wtype = TREE_TYPE (wtype);
34254 htype = TREE_TYPE (htype);
34257 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34258 return sysv_va_list_type_node;
34259 wtype = ms_va_list_type_node;
34260 gcc_assert (wtype != NULL_TREE);
34262 if (TREE_CODE (wtype) == ARRAY_TYPE)
34264 /* If va_list is an array type, the argument may have decayed
34265 to a pointer type, e.g. by being passed to another function.
34266 In that case, unwrap both types so that we can compare the
34267 underlying records. */
34268 if (TREE_CODE (htype) == ARRAY_TYPE
34269 || POINTER_TYPE_P (htype))
34271 wtype = TREE_TYPE (wtype);
34272 htype = TREE_TYPE (htype);
34275 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34276 return ms_va_list_type_node;
34279 return std_canonical_va_list_type (type);
34282 /* Iterate through the target-specific builtin types for va_list.
34283 IDX denotes the iterator, *PTREE is set to the result type of
34284 the va_list builtin, and *PNAME to its internal type.
34285 Returns zero if there is no element for this index, otherwise
34286 IDX should be increased upon the next call.
34287 Note, do not iterate a base builtin's name like __builtin_va_list.
34288 Used from c_common_nodes_and_builtins. */
34291 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34301 *ptree = ms_va_list_type_node;
34302 *pname = "__builtin_ms_va_list";
34306 *ptree = sysv_va_list_type_node;
34307 *pname = "__builtin_sysv_va_list";
34315 #undef TARGET_SCHED_DISPATCH
34316 #define TARGET_SCHED_DISPATCH has_dispatch
34317 #undef TARGET_SCHED_DISPATCH_DO
34318 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34320 /* The size of the dispatch window is the total number of bytes of
34321 object code allowed in a window. */
34322 #define DISPATCH_WINDOW_SIZE 16
34324 /* Number of dispatch windows considered for scheduling. */
34325 #define MAX_DISPATCH_WINDOWS 3
34327 /* Maximum number of instructions in a window. */
34330 /* Maximum number of immediate operands in a window. */
34333 /* Maximum number of immediate bits allowed in a window. */
34334 #define MAX_IMM_SIZE 128
34336 /* Maximum number of 32 bit immediates allowed in a window. */
34337 #define MAX_IMM_32 4
34339 /* Maximum number of 64 bit immediates allowed in a window. */
34340 #define MAX_IMM_64 2
34342 /* Maximum total of loads or prefetches allowed in a window. */
34345 /* Maximum total of stores allowed in a window. */
34346 #define MAX_STORE 1
34352 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34353 enum dispatch_group {
34368 /* Number of allowable groups in a dispatch window. It is an array
34369 indexed by dispatch_group enum. 100 is used as a big number,
34370 because the number of these kind of operations does not have any
34371 effect in dispatch window, but we need them for other reasons in
34373 static unsigned int num_allowable_groups[disp_last] = {
34374 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34377 char group_name[disp_last + 1][16] = {
34378 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34379 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34380 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34383 /* Instruction path. */
34386 path_single, /* Single micro op. */
34387 path_double, /* Double micro op. */
34388 path_multi, /* Instructions with more than 2 micro op.. */
34392 /* sched_insn_info defines a window to the instructions scheduled in
34393 the basic block. It contains a pointer to the insn_info table and
34394 the instruction scheduled.
34396 Windows are allocated for each basic block and are linked
34398 typedef struct sched_insn_info_s {
34400 enum dispatch_group group;
34401 enum insn_path path;
34406 /* Linked list of dispatch windows. This is a two way list of
34407 dispatch windows of a basic block. It contains information about
34408 the number of uops in the window and the total number of
34409 instructions and of bytes in the object code for this dispatch
34411 typedef struct dispatch_windows_s {
34412 int num_insn; /* Number of insn in the window. */
34413 int num_uops; /* Number of uops in the window. */
34414 int window_size; /* Number of bytes in the window. */
34415 int window_num; /* Window number between 0 or 1. */
34416 int num_imm; /* Number of immediates in an insn. */
34417 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34418 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34419 int imm_size; /* Total immediates in the window. */
34420 int num_loads; /* Total memory loads in the window. */
34421 int num_stores; /* Total memory stores in the window. */
34422 int violation; /* Violation exists in window. */
34423 sched_insn_info *window; /* Pointer to the window. */
34424 struct dispatch_windows_s *next;
34425 struct dispatch_windows_s *prev;
34426 } dispatch_windows;
34428 /* Immediate valuse used in an insn. */
34429 typedef struct imm_info_s
34436 static dispatch_windows *dispatch_window_list;
34437 static dispatch_windows *dispatch_window_list1;
34439 /* Get dispatch group of insn. */
34441 static enum dispatch_group
34442 get_mem_group (rtx insn)
34444 enum attr_memory memory;
34446 if (INSN_CODE (insn) < 0)
34447 return disp_no_group;
34448 memory = get_attr_memory (insn);
34449 if (memory == MEMORY_STORE)
34452 if (memory == MEMORY_LOAD)
34455 if (memory == MEMORY_BOTH)
34456 return disp_load_store;
34458 return disp_no_group;
34461 /* Return true if insn is a compare instruction. */
34466 enum attr_type type;
34468 type = get_attr_type (insn);
34469 return (type == TYPE_TEST
34470 || type == TYPE_ICMP
34471 || type == TYPE_FCMP
34472 || GET_CODE (PATTERN (insn)) == COMPARE);
34475 /* Return true if a dispatch violation encountered. */
34478 dispatch_violation (void)
34480 if (dispatch_window_list->next)
34481 return dispatch_window_list->next->violation;
34482 return dispatch_window_list->violation;
34485 /* Return true if insn is a branch instruction. */
34488 is_branch (rtx insn)
34490 return (CALL_P (insn) || JUMP_P (insn));
34493 /* Return true if insn is a prefetch instruction. */
34496 is_prefetch (rtx insn)
34498 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34501 /* This function initializes a dispatch window and the list container holding a
34502 pointer to the window. */
34505 init_window (int window_num)
34508 dispatch_windows *new_list;
34510 if (window_num == 0)
34511 new_list = dispatch_window_list;
34513 new_list = dispatch_window_list1;
34515 new_list->num_insn = 0;
34516 new_list->num_uops = 0;
34517 new_list->window_size = 0;
34518 new_list->next = NULL;
34519 new_list->prev = NULL;
34520 new_list->window_num = window_num;
34521 new_list->num_imm = 0;
34522 new_list->num_imm_32 = 0;
34523 new_list->num_imm_64 = 0;
34524 new_list->imm_size = 0;
34525 new_list->num_loads = 0;
34526 new_list->num_stores = 0;
34527 new_list->violation = false;
34529 for (i = 0; i < MAX_INSN; i++)
34531 new_list->window[i].insn = NULL;
34532 new_list->window[i].group = disp_no_group;
34533 new_list->window[i].path = no_path;
34534 new_list->window[i].byte_len = 0;
34535 new_list->window[i].imm_bytes = 0;
34540 /* This function allocates and initializes a dispatch window and the
34541 list container holding a pointer to the window. */
34543 static dispatch_windows *
34544 allocate_window (void)
34546 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34547 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34552 /* This routine initializes the dispatch scheduling information. It
34553 initiates building dispatch scheduler tables and constructs the
34554 first dispatch window. */
34557 init_dispatch_sched (void)
34559 /* Allocate a dispatch list and a window. */
34560 dispatch_window_list = allocate_window ();
34561 dispatch_window_list1 = allocate_window ();
34566 /* This function returns true if a branch is detected. End of a basic block
34567 does not have to be a branch, but here we assume only branches end a
34571 is_end_basic_block (enum dispatch_group group)
34573 return group == disp_branch;
34576 /* This function is called when the end of a window processing is reached. */
34579 process_end_window (void)
34581 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34582 if (dispatch_window_list->next)
34584 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34585 gcc_assert (dispatch_window_list->window_size
34586 + dispatch_window_list1->window_size <= 48);
34592 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34593 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34594 for 48 bytes of instructions. Note that these windows are not dispatch
34595 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34597 static dispatch_windows *
34598 allocate_next_window (int window_num)
34600 if (window_num == 0)
34602 if (dispatch_window_list->next)
34605 return dispatch_window_list;
34608 dispatch_window_list->next = dispatch_window_list1;
34609 dispatch_window_list1->prev = dispatch_window_list;
34611 return dispatch_window_list1;
34614 /* Increment the number of immediate operands of an instruction. */
34617 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34622 switch ( GET_CODE (*in_rtx))
34627 (imm_values->imm)++;
34628 if (x86_64_immediate_operand (*in_rtx, SImode))
34629 (imm_values->imm32)++;
34631 (imm_values->imm64)++;
34635 (imm_values->imm)++;
34636 (imm_values->imm64)++;
34640 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34642 (imm_values->imm)++;
34643 (imm_values->imm32)++;
34654 /* Compute number of immediate operands of an instruction. */
34657 find_constant (rtx in_rtx, imm_info *imm_values)
34659 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34660 (rtx_function) find_constant_1, (void *) imm_values);
34663 /* Return total size of immediate operands of an instruction along with number
34664 of corresponding immediate-operands. It initializes its parameters to zero
34665 befor calling FIND_CONSTANT.
34666 INSN is the input instruction. IMM is the total of immediates.
34667 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34671 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34673 imm_info imm_values = {0, 0, 0};
34675 find_constant (insn, &imm_values);
34676 *imm = imm_values.imm;
34677 *imm32 = imm_values.imm32;
34678 *imm64 = imm_values.imm64;
34679 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34682 /* This function indicates if an operand of an instruction is an
34686 has_immediate (rtx insn)
34688 int num_imm_operand;
34689 int num_imm32_operand;
34690 int num_imm64_operand;
34693 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34694 &num_imm64_operand);
34698 /* Return single or double path for instructions. */
34700 static enum insn_path
34701 get_insn_path (rtx insn)
34703 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34705 if ((int)path == 0)
34706 return path_single;
34708 if ((int)path == 1)
34709 return path_double;
34714 /* Return insn dispatch group. */
34716 static enum dispatch_group
34717 get_insn_group (rtx insn)
34719 enum dispatch_group group = get_mem_group (insn);
34723 if (is_branch (insn))
34724 return disp_branch;
34729 if (has_immediate (insn))
34732 if (is_prefetch (insn))
34733 return disp_prefetch;
34735 return disp_no_group;
34738 /* Count number of GROUP restricted instructions in a dispatch
34739 window WINDOW_LIST. */
34742 count_num_restricted (rtx insn, dispatch_windows *window_list)
34744 enum dispatch_group group = get_insn_group (insn);
34746 int num_imm_operand;
34747 int num_imm32_operand;
34748 int num_imm64_operand;
34750 if (group == disp_no_group)
34753 if (group == disp_imm)
34755 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34756 &num_imm64_operand);
34757 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34758 || num_imm_operand + window_list->num_imm > MAX_IMM
34759 || (num_imm32_operand > 0
34760 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34761 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34762 || (num_imm64_operand > 0
34763 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34764 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34765 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34766 && num_imm64_operand > 0
34767 && ((window_list->num_imm_64 > 0
34768 && window_list->num_insn >= 2)
34769 || window_list->num_insn >= 3)))
34775 if ((group == disp_load_store
34776 && (window_list->num_loads >= MAX_LOAD
34777 || window_list->num_stores >= MAX_STORE))
34778 || ((group == disp_load
34779 || group == disp_prefetch)
34780 && window_list->num_loads >= MAX_LOAD)
34781 || (group == disp_store
34782 && window_list->num_stores >= MAX_STORE))
34788 /* This function returns true if insn satisfies dispatch rules on the
34789 last window scheduled. */
34792 fits_dispatch_window (rtx insn)
34794 dispatch_windows *window_list = dispatch_window_list;
34795 dispatch_windows *window_list_next = dispatch_window_list->next;
34796 unsigned int num_restrict;
34797 enum dispatch_group group = get_insn_group (insn);
34798 enum insn_path path = get_insn_path (insn);
34801 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34802 instructions should be given the lowest priority in the
34803 scheduling process in Haifa scheduler to make sure they will be
34804 scheduled in the same dispatch window as the refrence to them. */
34805 if (group == disp_jcc || group == disp_cmp)
34808 /* Check nonrestricted. */
34809 if (group == disp_no_group || group == disp_branch)
34812 /* Get last dispatch window. */
34813 if (window_list_next)
34814 window_list = window_list_next;
34816 if (window_list->window_num == 1)
34818 sum = window_list->prev->window_size + window_list->window_size;
34821 || (min_insn_size (insn) + sum) >= 48)
34822 /* Window 1 is full. Go for next window. */
34826 num_restrict = count_num_restricted (insn, window_list);
34828 if (num_restrict > num_allowable_groups[group])
34831 /* See if it fits in the first window. */
34832 if (window_list->window_num == 0)
34834 /* The first widow should have only single and double path
34836 if (path == path_double
34837 && (window_list->num_uops + 2) > MAX_INSN)
34839 else if (path != path_single)
34845 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34846 dispatch window WINDOW_LIST. */
34849 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34851 int byte_len = min_insn_size (insn);
34852 int num_insn = window_list->num_insn;
34854 sched_insn_info *window = window_list->window;
34855 enum dispatch_group group = get_insn_group (insn);
34856 enum insn_path path = get_insn_path (insn);
34857 int num_imm_operand;
34858 int num_imm32_operand;
34859 int num_imm64_operand;
34861 if (!window_list->violation && group != disp_cmp
34862 && !fits_dispatch_window (insn))
34863 window_list->violation = true;
34865 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34866 &num_imm64_operand);
34868 /* Initialize window with new instruction. */
34869 window[num_insn].insn = insn;
34870 window[num_insn].byte_len = byte_len;
34871 window[num_insn].group = group;
34872 window[num_insn].path = path;
34873 window[num_insn].imm_bytes = imm_size;
34875 window_list->window_size += byte_len;
34876 window_list->num_insn = num_insn + 1;
34877 window_list->num_uops = window_list->num_uops + num_uops;
34878 window_list->imm_size += imm_size;
34879 window_list->num_imm += num_imm_operand;
34880 window_list->num_imm_32 += num_imm32_operand;
34881 window_list->num_imm_64 += num_imm64_operand;
34883 if (group == disp_store)
34884 window_list->num_stores += 1;
34885 else if (group == disp_load
34886 || group == disp_prefetch)
34887 window_list->num_loads += 1;
34888 else if (group == disp_load_store)
34890 window_list->num_stores += 1;
34891 window_list->num_loads += 1;
34895 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34896 If the total bytes of instructions or the number of instructions in
34897 the window exceed allowable, it allocates a new window. */
34900 add_to_dispatch_window (rtx insn)
34903 dispatch_windows *window_list;
34904 dispatch_windows *next_list;
34905 dispatch_windows *window0_list;
34906 enum insn_path path;
34907 enum dispatch_group insn_group;
34915 if (INSN_CODE (insn) < 0)
34918 byte_len = min_insn_size (insn);
34919 window_list = dispatch_window_list;
34920 next_list = window_list->next;
34921 path = get_insn_path (insn);
34922 insn_group = get_insn_group (insn);
34924 /* Get the last dispatch window. */
34926 window_list = dispatch_window_list->next;
34928 if (path == path_single)
34930 else if (path == path_double)
34933 insn_num_uops = (int) path;
34935 /* If current window is full, get a new window.
34936 Window number zero is full, if MAX_INSN uops are scheduled in it.
34937 Window number one is full, if window zero's bytes plus window
34938 one's bytes is 32, or if the bytes of the new instruction added
34939 to the total makes it greater than 48, or it has already MAX_INSN
34940 instructions in it. */
34941 num_insn = window_list->num_insn;
34942 num_uops = window_list->num_uops;
34943 window_num = window_list->window_num;
34944 insn_fits = fits_dispatch_window (insn);
34946 if (num_insn >= MAX_INSN
34947 || num_uops + insn_num_uops > MAX_INSN
34950 window_num = ~window_num & 1;
34951 window_list = allocate_next_window (window_num);
34954 if (window_num == 0)
34956 add_insn_window (insn, window_list, insn_num_uops);
34957 if (window_list->num_insn >= MAX_INSN
34958 && insn_group == disp_branch)
34960 process_end_window ();
34964 else if (window_num == 1)
34966 window0_list = window_list->prev;
34967 sum = window0_list->window_size + window_list->window_size;
34969 || (byte_len + sum) >= 48)
34971 process_end_window ();
34972 window_list = dispatch_window_list;
34975 add_insn_window (insn, window_list, insn_num_uops);
34978 gcc_unreachable ();
34980 if (is_end_basic_block (insn_group))
34982 /* End of basic block is reached do end-basic-block process. */
34983 process_end_window ();
34988 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34990 DEBUG_FUNCTION static void
34991 debug_dispatch_window_file (FILE *file, int window_num)
34993 dispatch_windows *list;
34996 if (window_num == 0)
34997 list = dispatch_window_list;
34999 list = dispatch_window_list1;
35001 fprintf (file, "Window #%d:\n", list->window_num);
35002 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
35003 list->num_insn, list->num_uops, list->window_size);
35004 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35005 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
35007 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
35009 fprintf (file, " insn info:\n");
35011 for (i = 0; i < MAX_INSN; i++)
35013 if (!list->window[i].insn)
35015 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
35016 i, group_name[list->window[i].group],
35017 i, (void *)list->window[i].insn,
35018 i, list->window[i].path,
35019 i, list->window[i].byte_len,
35020 i, list->window[i].imm_bytes);
35024 /* Print to stdout a dispatch window. */
35026 DEBUG_FUNCTION void
35027 debug_dispatch_window (int window_num)
35029 debug_dispatch_window_file (stdout, window_num);
35032 /* Print INSN dispatch information to FILE. */
35034 DEBUG_FUNCTION static void
35035 debug_insn_dispatch_info_file (FILE *file, rtx insn)
35038 enum insn_path path;
35039 enum dispatch_group group;
35041 int num_imm_operand;
35042 int num_imm32_operand;
35043 int num_imm64_operand;
35045 if (INSN_CODE (insn) < 0)
35048 byte_len = min_insn_size (insn);
35049 path = get_insn_path (insn);
35050 group = get_insn_group (insn);
35051 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
35052 &num_imm64_operand);
35054 fprintf (file, " insn info:\n");
35055 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
35056 group_name[group], path, byte_len);
35057 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
35058 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
35061 /* Print to STDERR the status of the ready list with respect to
35062 dispatch windows. */
35064 DEBUG_FUNCTION void
35065 debug_ready_dispatch (void)
35068 int no_ready = number_in_ready ();
35070 fprintf (stdout, "Number of ready: %d\n", no_ready);
35072 for (i = 0; i < no_ready; i++)
35073 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
35076 /* This routine is the driver of the dispatch scheduler. */
35079 do_dispatch (rtx insn, int mode)
35081 if (mode == DISPATCH_INIT)
35082 init_dispatch_sched ();
35083 else if (mode == ADD_TO_DISPATCH_WINDOW)
35084 add_to_dispatch_window (insn);
35087 /* Return TRUE if Dispatch Scheduling is supported. */
35090 has_dispatch (rtx insn, int action)
35092 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
35098 case IS_DISPATCH_ON:
35103 return is_cmp (insn);
35105 case DISPATCH_VIOLATION:
35106 return dispatch_violation ();
35108 case FITS_DISPATCH_WINDOW:
35109 return fits_dispatch_window (insn);
35115 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
35116 place emms and femms instructions. */
35118 static enum machine_mode
35119 ix86_preferred_simd_mode (enum machine_mode mode)
35121 /* Disable double precision vectorizer if needed. */
35122 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
35125 if (!TARGET_AVX && !TARGET_SSE)
35131 return (TARGET_AVX && !flag_prefer_avx128) ? V8SFmode : V4SFmode;
35133 return (TARGET_AVX && !flag_prefer_avx128) ? V4DFmode : V2DFmode;
35149 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
35152 static unsigned int
35153 ix86_autovectorize_vector_sizes (void)
35155 return TARGET_AVX ? 32 | 16 : 0;
35158 /* Initialize the GCC target structure. */
35159 #undef TARGET_RETURN_IN_MEMORY
35160 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
35162 #undef TARGET_LEGITIMIZE_ADDRESS
35163 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
35165 #undef TARGET_ATTRIBUTE_TABLE
35166 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
35167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35168 # undef TARGET_MERGE_DECL_ATTRIBUTES
35169 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
35172 #undef TARGET_COMP_TYPE_ATTRIBUTES
35173 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
35175 #undef TARGET_INIT_BUILTINS
35176 #define TARGET_INIT_BUILTINS ix86_init_builtins
35177 #undef TARGET_BUILTIN_DECL
35178 #define TARGET_BUILTIN_DECL ix86_builtin_decl
35179 #undef TARGET_EXPAND_BUILTIN
35180 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
35182 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
35183 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
35184 ix86_builtin_vectorized_function
35186 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
35187 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
35189 #undef TARGET_BUILTIN_RECIPROCAL
35190 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
35192 #undef TARGET_ASM_FUNCTION_EPILOGUE
35193 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35195 #undef TARGET_ENCODE_SECTION_INFO
35196 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35197 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35199 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35202 #undef TARGET_ASM_OPEN_PAREN
35203 #define TARGET_ASM_OPEN_PAREN ""
35204 #undef TARGET_ASM_CLOSE_PAREN
35205 #define TARGET_ASM_CLOSE_PAREN ""
35207 #undef TARGET_ASM_BYTE_OP
35208 #define TARGET_ASM_BYTE_OP ASM_BYTE
35210 #undef TARGET_ASM_ALIGNED_HI_OP
35211 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35212 #undef TARGET_ASM_ALIGNED_SI_OP
35213 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35215 #undef TARGET_ASM_ALIGNED_DI_OP
35216 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35219 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35220 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35222 #undef TARGET_ASM_UNALIGNED_HI_OP
35223 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35224 #undef TARGET_ASM_UNALIGNED_SI_OP
35225 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35226 #undef TARGET_ASM_UNALIGNED_DI_OP
35227 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35229 #undef TARGET_PRINT_OPERAND
35230 #define TARGET_PRINT_OPERAND ix86_print_operand
35231 #undef TARGET_PRINT_OPERAND_ADDRESS
35232 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35233 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35234 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35235 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35236 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35238 #undef TARGET_SCHED_INIT_GLOBAL
35239 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35240 #undef TARGET_SCHED_ADJUST_COST
35241 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35242 #undef TARGET_SCHED_ISSUE_RATE
35243 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35244 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35245 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35246 ia32_multipass_dfa_lookahead
35248 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35249 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35252 #undef TARGET_HAVE_TLS
35253 #define TARGET_HAVE_TLS true
35255 #undef TARGET_CANNOT_FORCE_CONST_MEM
35256 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35257 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35258 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35260 #undef TARGET_DELEGITIMIZE_ADDRESS
35261 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35263 #undef TARGET_MS_BITFIELD_LAYOUT_P
35264 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35267 #undef TARGET_BINDS_LOCAL_P
35268 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35270 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35271 #undef TARGET_BINDS_LOCAL_P
35272 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35275 #undef TARGET_ASM_OUTPUT_MI_THUNK
35276 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35277 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35278 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35280 #undef TARGET_ASM_FILE_START
35281 #define TARGET_ASM_FILE_START x86_file_start
35283 #undef TARGET_DEFAULT_TARGET_FLAGS
35284 #define TARGET_DEFAULT_TARGET_FLAGS \
35286 | TARGET_SUBTARGET_DEFAULT \
35287 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35289 #undef TARGET_HANDLE_OPTION
35290 #define TARGET_HANDLE_OPTION ix86_handle_option
35292 #undef TARGET_OPTION_OVERRIDE
35293 #define TARGET_OPTION_OVERRIDE ix86_option_override
35294 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35295 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35296 #undef TARGET_OPTION_INIT_STRUCT
35297 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35299 #undef TARGET_REGISTER_MOVE_COST
35300 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35301 #undef TARGET_MEMORY_MOVE_COST
35302 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35303 #undef TARGET_RTX_COSTS
35304 #define TARGET_RTX_COSTS ix86_rtx_costs
35305 #undef TARGET_ADDRESS_COST
35306 #define TARGET_ADDRESS_COST ix86_address_cost
35308 #undef TARGET_FIXED_CONDITION_CODE_REGS
35309 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35310 #undef TARGET_CC_MODES_COMPATIBLE
35311 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35313 #undef TARGET_MACHINE_DEPENDENT_REORG
35314 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35316 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35317 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35319 #undef TARGET_BUILD_BUILTIN_VA_LIST
35320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35322 #undef TARGET_ENUM_VA_LIST_P
35323 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35325 #undef TARGET_FN_ABI_VA_LIST
35326 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35328 #undef TARGET_CANONICAL_VA_LIST_TYPE
35329 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35331 #undef TARGET_EXPAND_BUILTIN_VA_START
35332 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35334 #undef TARGET_MD_ASM_CLOBBERS
35335 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35337 #undef TARGET_PROMOTE_PROTOTYPES
35338 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35339 #undef TARGET_STRUCT_VALUE_RTX
35340 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35341 #undef TARGET_SETUP_INCOMING_VARARGS
35342 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35343 #undef TARGET_MUST_PASS_IN_STACK
35344 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35345 #undef TARGET_FUNCTION_ARG_ADVANCE
35346 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35347 #undef TARGET_FUNCTION_ARG
35348 #define TARGET_FUNCTION_ARG ix86_function_arg
35349 #undef TARGET_FUNCTION_ARG_BOUNDARY
35350 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35351 #undef TARGET_PASS_BY_REFERENCE
35352 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35353 #undef TARGET_INTERNAL_ARG_POINTER
35354 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35355 #undef TARGET_UPDATE_STACK_BOUNDARY
35356 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35357 #undef TARGET_GET_DRAP_RTX
35358 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35359 #undef TARGET_STRICT_ARGUMENT_NAMING
35360 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35361 #undef TARGET_STATIC_CHAIN
35362 #define TARGET_STATIC_CHAIN ix86_static_chain
35363 #undef TARGET_TRAMPOLINE_INIT
35364 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35365 #undef TARGET_RETURN_POPS_ARGS
35366 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35368 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35369 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35371 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35372 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35374 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35375 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35377 #undef TARGET_C_MODE_FOR_SUFFIX
35378 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35381 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35382 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35385 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35386 #undef TARGET_INSERT_ATTRIBUTES
35387 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35390 #undef TARGET_MANGLE_TYPE
35391 #define TARGET_MANGLE_TYPE ix86_mangle_type
35393 #undef TARGET_STACK_PROTECT_FAIL
35394 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35396 #undef TARGET_SUPPORTS_SPLIT_STACK
35397 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35399 #undef TARGET_FUNCTION_VALUE
35400 #define TARGET_FUNCTION_VALUE ix86_function_value
35402 #undef TARGET_FUNCTION_VALUE_REGNO_P
35403 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35405 #undef TARGET_SECONDARY_RELOAD
35406 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35408 #undef TARGET_PREFERRED_RELOAD_CLASS
35409 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35410 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35411 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35412 #undef TARGET_CLASS_LIKELY_SPILLED_P
35413 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35415 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35416 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35417 ix86_builtin_vectorization_cost
35418 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35419 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35420 ix86_vectorize_builtin_vec_perm
35421 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35422 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35423 ix86_vectorize_builtin_vec_perm_ok
35424 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35425 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35426 ix86_preferred_simd_mode
35427 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35428 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35429 ix86_autovectorize_vector_sizes
35431 #undef TARGET_SET_CURRENT_FUNCTION
35432 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35434 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35435 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35437 #undef TARGET_OPTION_SAVE
35438 #define TARGET_OPTION_SAVE ix86_function_specific_save
35440 #undef TARGET_OPTION_RESTORE
35441 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35443 #undef TARGET_OPTION_PRINT
35444 #define TARGET_OPTION_PRINT ix86_function_specific_print
35446 #undef TARGET_CAN_INLINE_P
35447 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35449 #undef TARGET_EXPAND_TO_RTL_HOOK
35450 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35452 #undef TARGET_LEGITIMATE_ADDRESS_P
35453 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35455 #undef TARGET_LEGITIMATE_CONSTANT_P
35456 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
35458 #undef TARGET_FRAME_POINTER_REQUIRED
35459 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35461 #undef TARGET_CAN_ELIMINATE
35462 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35464 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35465 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35467 #undef TARGET_ASM_CODE_END
35468 #define TARGET_ASM_CODE_END ix86_code_end
35470 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35471 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35474 #undef TARGET_INIT_LIBFUNCS
35475 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35478 struct gcc_target targetm = TARGET_INITIALIZER;
35480 #include "gt-i386.h"