1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
62 enum upper_128bits_state
69 typedef struct block_info_def
71 /* State of the upper 128bits of AVX registers at exit. */
72 enum upper_128bits_state state;
73 /* TRUE if state of the upper 128bits of AVX registers is unchanged
76 /* TRUE if block has been processed. */
78 /* TRUE if block has been scanned. */
80 /* Previous state of the upper 128bits of AVX registers at entry. */
81 enum upper_128bits_state prev;
84 #define BLOCK_INFO(B) ((block_info) (B)->aux)
86 enum call_avx256_state
88 /* Callee returns 256bit AVX register. */
89 callee_return_avx256 = -1,
90 /* Callee returns and passes 256bit AVX register. */
91 callee_return_pass_avx256,
92 /* Callee passes 256bit AVX register. */
94 /* Callee doesn't return nor passe 256bit AVX register, or no
95 256bit AVX register in function return. */
97 /* vzeroupper intrinsic. */
101 /* Check if a 256bit AVX register is referenced in stores. */
104 check_avx256_stores (rtx dest, const_rtx set, void *data)
107 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
108 || (GET_CODE (set) == SET
109 && REG_P (SET_SRC (set))
110 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
112 enum upper_128bits_state *state
113 = (enum upper_128bits_state *) data;
118 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
119 in basic block BB. Delete it if upper 128bit AVX registers are
120 unused. If it isn't deleted, move it to just before a jump insn.
122 STATE is state of the upper 128bits of AVX registers at entry. */
125 move_or_delete_vzeroupper_2 (basic_block bb,
126 enum upper_128bits_state state)
129 rtx vzeroupper_insn = NULL_RTX;
134 if (BLOCK_INFO (bb)->unchanged)
137 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
140 BLOCK_INFO (bb)->state = state;
144 if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
147 fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
148 bb->index, BLOCK_INFO (bb)->state);
152 BLOCK_INFO (bb)->prev = state;
155 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
160 /* BB_END changes when it is deleted. */
161 bb_end = BB_END (bb);
163 while (insn != bb_end)
165 insn = NEXT_INSN (insn);
167 if (!NONDEBUG_INSN_P (insn))
170 /* Move vzeroupper before jump/call. */
171 if (JUMP_P (insn) || CALL_P (insn))
173 if (!vzeroupper_insn)
176 if (PREV_INSN (insn) != vzeroupper_insn)
180 fprintf (dump_file, "Move vzeroupper after:\n");
181 print_rtl_single (dump_file, PREV_INSN (insn));
182 fprintf (dump_file, "before:\n");
183 print_rtl_single (dump_file, insn);
185 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
188 vzeroupper_insn = NULL_RTX;
192 pat = PATTERN (insn);
194 /* Check insn for vzeroupper intrinsic. */
195 if (GET_CODE (pat) == UNSPEC_VOLATILE
196 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
200 /* Found vzeroupper intrinsic. */
201 fprintf (dump_file, "Found vzeroupper:\n");
202 print_rtl_single (dump_file, insn);
207 /* Check insn for vzeroall intrinsic. */
208 if (GET_CODE (pat) == PARALLEL
209 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
210 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
215 /* Delete pending vzeroupper insertion. */
218 delete_insn (vzeroupper_insn);
219 vzeroupper_insn = NULL_RTX;
222 else if (state != used)
224 note_stores (pat, check_avx256_stores, &state);
231 /* Process vzeroupper intrinsic. */
232 avx256 = INTVAL (XVECEXP (pat, 0, 0));
236 /* Since the upper 128bits are cleared, callee must not pass
237 256bit AVX register. We only need to check if callee
238 returns 256bit AVX register. */
239 if (avx256 == callee_return_avx256)
245 /* Remove unnecessary vzeroupper since upper 128bits are
249 fprintf (dump_file, "Delete redundant vzeroupper:\n");
250 print_rtl_single (dump_file, insn);
256 /* Set state to UNUSED if callee doesn't return 256bit AVX
258 if (avx256 != callee_return_pass_avx256)
261 if (avx256 == callee_return_pass_avx256
262 || avx256 == callee_pass_avx256)
264 /* Must remove vzeroupper since callee passes in 256bit
268 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
269 print_rtl_single (dump_file, insn);
275 vzeroupper_insn = insn;
281 BLOCK_INFO (bb)->state = state;
282 BLOCK_INFO (bb)->unchanged = unchanged;
283 BLOCK_INFO (bb)->scanned = true;
286 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
287 bb->index, unchanged ? "unchanged" : "changed",
291 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
292 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
293 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
297 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
301 enum upper_128bits_state state, old_state, new_state;
305 fprintf (dump_file, " Process [bb %i]: status: %d\n",
306 block->index, BLOCK_INFO (block)->processed);
308 if (BLOCK_INFO (block)->processed)
313 /* Check all predecessor edges of this block. */
314 seen_unknown = false;
315 FOR_EACH_EDGE (e, ei, block->preds)
319 switch (BLOCK_INFO (e->src)->state)
322 if (!unknown_is_unused)
336 old_state = BLOCK_INFO (block)->state;
337 move_or_delete_vzeroupper_2 (block, state);
338 new_state = BLOCK_INFO (block)->state;
340 if (state != unknown || new_state == used)
341 BLOCK_INFO (block)->processed = true;
343 /* Need to rescan if the upper 128bits of AVX registers are changed
345 if (new_state != old_state)
347 if (new_state == used)
348 cfun->machine->rescan_vzeroupper_p = 1;
355 /* Go through the instruction stream looking for vzeroupper. Delete
356 it if upper 128bit AVX registers are unused. If it isn't deleted,
357 move it to just before a jump insn. */
360 move_or_delete_vzeroupper (void)
365 fibheap_t worklist, pending, fibheap_swap;
366 sbitmap visited, in_worklist, in_pending, sbitmap_swap;
371 /* Set up block info for each basic block. */
372 alloc_aux_for_blocks (sizeof (struct block_info_def));
374 /* Process outgoing edges of entry point. */
376 fprintf (dump_file, "Process outgoing edges of entry point\n");
378 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
380 move_or_delete_vzeroupper_2 (e->dest,
381 cfun->machine->caller_pass_avx256_p
383 BLOCK_INFO (e->dest)->processed = true;
386 /* Compute reverse completion order of depth first search of the CFG
387 so that the data-flow runs faster. */
388 rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
389 bb_order = XNEWVEC (int, last_basic_block);
390 pre_and_rev_post_order_compute (NULL, rc_order, false);
391 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
392 bb_order[rc_order[i]] = i;
395 worklist = fibheap_new ();
396 pending = fibheap_new ();
397 visited = sbitmap_alloc (last_basic_block);
398 in_worklist = sbitmap_alloc (last_basic_block);
399 in_pending = sbitmap_alloc (last_basic_block);
400 sbitmap_zero (in_worklist);
402 /* Don't check outgoing edges of entry point. */
403 sbitmap_ones (in_pending);
405 if (BLOCK_INFO (bb)->processed)
406 RESET_BIT (in_pending, bb->index);
409 move_or_delete_vzeroupper_1 (bb, false);
410 fibheap_insert (pending, bb_order[bb->index], bb);
414 fprintf (dump_file, "Check remaining basic blocks\n");
416 while (!fibheap_empty (pending))
418 fibheap_swap = pending;
420 worklist = fibheap_swap;
421 sbitmap_swap = in_pending;
422 in_pending = in_worklist;
423 in_worklist = sbitmap_swap;
425 sbitmap_zero (visited);
427 cfun->machine->rescan_vzeroupper_p = 0;
429 while (!fibheap_empty (worklist))
431 bb = (basic_block) fibheap_extract_min (worklist);
432 RESET_BIT (in_worklist, bb->index);
433 gcc_assert (!TEST_BIT (visited, bb->index));
434 if (!TEST_BIT (visited, bb->index))
438 SET_BIT (visited, bb->index);
440 if (move_or_delete_vzeroupper_1 (bb, false))
441 FOR_EACH_EDGE (e, ei, bb->succs)
443 if (e->dest == EXIT_BLOCK_PTR
444 || BLOCK_INFO (e->dest)->processed)
447 if (TEST_BIT (visited, e->dest->index))
449 if (!TEST_BIT (in_pending, e->dest->index))
451 /* Send E->DEST to next round. */
452 SET_BIT (in_pending, e->dest->index);
453 fibheap_insert (pending,
454 bb_order[e->dest->index],
458 else if (!TEST_BIT (in_worklist, e->dest->index))
460 /* Add E->DEST to current round. */
461 SET_BIT (in_worklist, e->dest->index);
462 fibheap_insert (worklist, bb_order[e->dest->index],
469 if (!cfun->machine->rescan_vzeroupper_p)
474 fibheap_delete (worklist);
475 fibheap_delete (pending);
476 sbitmap_free (visited);
477 sbitmap_free (in_worklist);
478 sbitmap_free (in_pending);
481 fprintf (dump_file, "Process remaining basic blocks\n");
484 move_or_delete_vzeroupper_1 (bb, true);
486 free_aux_for_blocks ();
489 static rtx legitimize_dllimport_symbol (rtx, bool);
491 #ifndef CHECK_STACK_LIMIT
492 #define CHECK_STACK_LIMIT (-1)
495 /* Return index of given mode in mult and division cost tables. */
496 #define MODE_INDEX(mode) \
497 ((mode) == QImode ? 0 \
498 : (mode) == HImode ? 1 \
499 : (mode) == SImode ? 2 \
500 : (mode) == DImode ? 3 \
503 /* Processor costs (relative to an add) */
504 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
505 #define COSTS_N_BYTES(N) ((N) * 2)
507 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
510 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
511 COSTS_N_BYTES (2), /* cost of an add instruction */
512 COSTS_N_BYTES (3), /* cost of a lea instruction */
513 COSTS_N_BYTES (2), /* variable shift costs */
514 COSTS_N_BYTES (3), /* constant shift costs */
515 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
516 COSTS_N_BYTES (3), /* HI */
517 COSTS_N_BYTES (3), /* SI */
518 COSTS_N_BYTES (3), /* DI */
519 COSTS_N_BYTES (5)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
522 COSTS_N_BYTES (3), /* HI */
523 COSTS_N_BYTES (3), /* SI */
524 COSTS_N_BYTES (3), /* DI */
525 COSTS_N_BYTES (5)}, /* other */
526 COSTS_N_BYTES (3), /* cost of movsx */
527 COSTS_N_BYTES (3), /* cost of movzx */
528 0, /* "large" insn */
530 2, /* cost for loading QImode using movzbl */
531 {2, 2, 2}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 2, 2}, /* cost of storing integer registers */
535 2, /* cost of reg,reg fld/fst */
536 {2, 2, 2}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {2, 2, 2}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 3, /* cost of moving MMX register */
541 {3, 3}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {3, 3}, /* cost of storing MMX registers
544 in SImode and DImode */
545 3, /* cost of moving SSE register */
546 {3, 3, 3}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {3, 3, 3}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 3, /* MMX or SSE register to integer */
551 0, /* size of l1 cache */
552 0, /* size of l2 cache */
553 0, /* size of prefetch block */
554 0, /* number of parallel prefetches */
556 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
557 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
558 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
559 COSTS_N_BYTES (2), /* cost of FABS instruction. */
560 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
561 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
562 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
563 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 1, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 1, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
579 /* Processor costs (relative to an add) */
581 struct processor_costs i386_cost = { /* 386 specific costs */
582 COSTS_N_INSNS (1), /* cost of an add instruction */
583 COSTS_N_INSNS (1), /* cost of a lea instruction */
584 COSTS_N_INSNS (3), /* variable shift costs */
585 COSTS_N_INSNS (2), /* constant shift costs */
586 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
587 COSTS_N_INSNS (6), /* HI */
588 COSTS_N_INSNS (6), /* SI */
589 COSTS_N_INSNS (6), /* DI */
590 COSTS_N_INSNS (6)}, /* other */
591 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
592 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
593 COSTS_N_INSNS (23), /* HI */
594 COSTS_N_INSNS (23), /* SI */
595 COSTS_N_INSNS (23), /* DI */
596 COSTS_N_INSNS (23)}, /* other */
597 COSTS_N_INSNS (3), /* cost of movsx */
598 COSTS_N_INSNS (2), /* cost of movzx */
599 15, /* "large" insn */
601 4, /* cost for loading QImode using movzbl */
602 {2, 4, 2}, /* cost of loading integer registers
603 in QImode, HImode and SImode.
604 Relative to reg-reg move (2). */
605 {2, 4, 2}, /* cost of storing integer registers */
606 2, /* cost of reg,reg fld/fst */
607 {8, 8, 8}, /* cost of loading fp registers
608 in SFmode, DFmode and XFmode */
609 {8, 8, 8}, /* cost of storing fp registers
610 in SFmode, DFmode and XFmode */
611 2, /* cost of moving MMX register */
612 {4, 8}, /* cost of loading MMX registers
613 in SImode and DImode */
614 {4, 8}, /* cost of storing MMX registers
615 in SImode and DImode */
616 2, /* cost of moving SSE register */
617 {4, 8, 16}, /* cost of loading SSE registers
618 in SImode, DImode and TImode */
619 {4, 8, 16}, /* cost of storing SSE registers
620 in SImode, DImode and TImode */
621 3, /* MMX or SSE register to integer */
622 0, /* size of l1 cache */
623 0, /* size of l2 cache */
624 0, /* size of prefetch block */
625 0, /* number of parallel prefetches */
627 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (22), /* cost of FABS instruction. */
631 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
633 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
634 DUMMY_STRINGOP_ALGS},
635 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
636 DUMMY_STRINGOP_ALGS},
637 1, /* scalar_stmt_cost. */
638 1, /* scalar load_cost. */
639 1, /* scalar_store_cost. */
640 1, /* vec_stmt_cost. */
641 1, /* vec_to_scalar_cost. */
642 1, /* scalar_to_vec_cost. */
643 1, /* vec_align_load_cost. */
644 2, /* vec_unalign_load_cost. */
645 1, /* vec_store_cost. */
646 3, /* cond_taken_branch_cost. */
647 1, /* cond_not_taken_branch_cost. */
651 struct processor_costs i486_cost = { /* 486 specific costs */
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1), /* cost of a lea instruction */
654 COSTS_N_INSNS (3), /* variable shift costs */
655 COSTS_N_INSNS (2), /* constant shift costs */
656 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (12), /* HI */
658 COSTS_N_INSNS (12), /* SI */
659 COSTS_N_INSNS (12), /* DI */
660 COSTS_N_INSNS (12)}, /* other */
661 1, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (40), /* HI */
664 COSTS_N_INSNS (40), /* SI */
665 COSTS_N_INSNS (40), /* DI */
666 COSTS_N_INSNS (40)}, /* other */
667 COSTS_N_INSNS (3), /* cost of movsx */
668 COSTS_N_INSNS (2), /* cost of movzx */
669 15, /* "large" insn */
671 4, /* cost for loading QImode using movzbl */
672 {2, 4, 2}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {2, 4, 2}, /* cost of storing integer registers */
676 2, /* cost of reg,reg fld/fst */
677 {8, 8, 8}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {8, 8, 8}, /* cost of storing fp registers
680 in SFmode, DFmode and XFmode */
681 2, /* cost of moving MMX register */
682 {4, 8}, /* cost of loading MMX registers
683 in SImode and DImode */
684 {4, 8}, /* cost of storing MMX registers
685 in SImode and DImode */
686 2, /* cost of moving SSE register */
687 {4, 8, 16}, /* cost of loading SSE registers
688 in SImode, DImode and TImode */
689 {4, 8, 16}, /* cost of storing SSE registers
690 in SImode, DImode and TImode */
691 3, /* MMX or SSE register to integer */
692 4, /* size of l1 cache. 486 has 8kB cache
693 shared for code and data, so 4kB is
694 not really precise. */
695 4, /* size of l2 cache */
696 0, /* size of prefetch block */
697 0, /* number of parallel prefetches */
699 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
700 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
701 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
702 COSTS_N_INSNS (3), /* cost of FABS instruction. */
703 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
704 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
705 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
706 DUMMY_STRINGOP_ALGS},
707 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
708 DUMMY_STRINGOP_ALGS},
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
723 struct processor_costs pentium_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1), /* cost of a lea instruction */
726 COSTS_N_INSNS (4), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (11), /* HI */
730 COSTS_N_INSNS (11), /* SI */
731 COSTS_N_INSNS (11), /* DI */
732 COSTS_N_INSNS (11)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (25), /* HI */
736 COSTS_N_INSNS (25), /* SI */
737 COSTS_N_INSNS (25), /* DI */
738 COSTS_N_INSNS (25)}, /* other */
739 COSTS_N_INSNS (3), /* cost of movsx */
740 COSTS_N_INSNS (2), /* cost of movzx */
741 8, /* "large" insn */
743 6, /* cost for loading QImode using movzbl */
744 {2, 4, 2}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {2, 4, 2}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {2, 2, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 6}, /* cost of storing fp registers
752 in SFmode, DFmode and XFmode */
753 8, /* cost of moving MMX register */
754 {8, 8}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {8, 8}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {4, 8, 16}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {4, 8, 16}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 3, /* MMX or SSE register to integer */
764 8, /* size of l1 cache. */
765 8, /* size of l2 cache */
766 0, /* size of prefetch block */
767 0, /* number of parallel prefetches */
769 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (1), /* cost of FABS instruction. */
773 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
775 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
776 DUMMY_STRINGOP_ALGS},
777 {{libcall, {{-1, rep_prefix_4_byte}}},
778 DUMMY_STRINGOP_ALGS},
779 1, /* scalar_stmt_cost. */
780 1, /* scalar load_cost. */
781 1, /* scalar_store_cost. */
782 1, /* vec_stmt_cost. */
783 1, /* vec_to_scalar_cost. */
784 1, /* scalar_to_vec_cost. */
785 1, /* vec_align_load_cost. */
786 2, /* vec_unalign_load_cost. */
787 1, /* vec_store_cost. */
788 3, /* cond_taken_branch_cost. */
789 1, /* cond_not_taken_branch_cost. */
793 struct processor_costs pentiumpro_cost = {
794 COSTS_N_INSNS (1), /* cost of an add instruction */
795 COSTS_N_INSNS (1), /* cost of a lea instruction */
796 COSTS_N_INSNS (1), /* variable shift costs */
797 COSTS_N_INSNS (1), /* constant shift costs */
798 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
799 COSTS_N_INSNS (4), /* HI */
800 COSTS_N_INSNS (4), /* SI */
801 COSTS_N_INSNS (4), /* DI */
802 COSTS_N_INSNS (4)}, /* other */
803 0, /* cost of multiply per each bit set */
804 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
805 COSTS_N_INSNS (17), /* HI */
806 COSTS_N_INSNS (17), /* SI */
807 COSTS_N_INSNS (17), /* DI */
808 COSTS_N_INSNS (17)}, /* other */
809 COSTS_N_INSNS (1), /* cost of movsx */
810 COSTS_N_INSNS (1), /* cost of movzx */
811 8, /* "large" insn */
813 2, /* cost for loading QImode using movzbl */
814 {4, 4, 4}, /* cost of loading integer registers
815 in QImode, HImode and SImode.
816 Relative to reg-reg move (2). */
817 {2, 2, 2}, /* cost of storing integer registers */
818 2, /* cost of reg,reg fld/fst */
819 {2, 2, 6}, /* cost of loading fp registers
820 in SFmode, DFmode and XFmode */
821 {4, 4, 6}, /* cost of storing fp registers
822 in SFmode, DFmode and XFmode */
823 2, /* cost of moving MMX register */
824 {2, 2}, /* cost of loading MMX registers
825 in SImode and DImode */
826 {2, 2}, /* cost of storing MMX registers
827 in SImode and DImode */
828 2, /* cost of moving SSE register */
829 {2, 2, 8}, /* cost of loading SSE registers
830 in SImode, DImode and TImode */
831 {2, 2, 8}, /* cost of storing SSE registers
832 in SImode, DImode and TImode */
833 3, /* MMX or SSE register to integer */
834 8, /* size of l1 cache. */
835 256, /* size of l2 cache */
836 32, /* size of prefetch block */
837 6, /* number of parallel prefetches */
839 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
840 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
841 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
842 COSTS_N_INSNS (2), /* cost of FABS instruction. */
843 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
844 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
845 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
846 (we ensure the alignment). For small blocks inline loop is still a
847 noticeable win, for bigger blocks either rep movsl or rep movsb is
848 way to go. Rep movsb has apparently more expensive startup time in CPU,
849 but after 4K the difference is down in the noise. */
850 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
851 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
852 DUMMY_STRINGOP_ALGS},
853 {{rep_prefix_4_byte, {{1024, unrolled_loop},
854 {8192, rep_prefix_4_byte}, {-1, libcall}}},
855 DUMMY_STRINGOP_ALGS},
856 1, /* scalar_stmt_cost. */
857 1, /* scalar load_cost. */
858 1, /* scalar_store_cost. */
859 1, /* vec_stmt_cost. */
860 1, /* vec_to_scalar_cost. */
861 1, /* scalar_to_vec_cost. */
862 1, /* vec_align_load_cost. */
863 2, /* vec_unalign_load_cost. */
864 1, /* vec_store_cost. */
865 3, /* cond_taken_branch_cost. */
866 1, /* cond_not_taken_branch_cost. */
870 struct processor_costs geode_cost = {
871 COSTS_N_INSNS (1), /* cost of an add instruction */
872 COSTS_N_INSNS (1), /* cost of a lea instruction */
873 COSTS_N_INSNS (2), /* variable shift costs */
874 COSTS_N_INSNS (1), /* constant shift costs */
875 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
876 COSTS_N_INSNS (4), /* HI */
877 COSTS_N_INSNS (7), /* SI */
878 COSTS_N_INSNS (7), /* DI */
879 COSTS_N_INSNS (7)}, /* other */
880 0, /* cost of multiply per each bit set */
881 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
882 COSTS_N_INSNS (23), /* HI */
883 COSTS_N_INSNS (39), /* SI */
884 COSTS_N_INSNS (39), /* DI */
885 COSTS_N_INSNS (39)}, /* other */
886 COSTS_N_INSNS (1), /* cost of movsx */
887 COSTS_N_INSNS (1), /* cost of movzx */
888 8, /* "large" insn */
890 1, /* cost for loading QImode using movzbl */
891 {1, 1, 1}, /* cost of loading integer registers
892 in QImode, HImode and SImode.
893 Relative to reg-reg move (2). */
894 {1, 1, 1}, /* cost of storing integer registers */
895 1, /* cost of reg,reg fld/fst */
896 {1, 1, 1}, /* cost of loading fp registers
897 in SFmode, DFmode and XFmode */
898 {4, 6, 6}, /* cost of storing fp registers
899 in SFmode, DFmode and XFmode */
901 1, /* cost of moving MMX register */
902 {1, 1}, /* cost of loading MMX registers
903 in SImode and DImode */
904 {1, 1}, /* cost of storing MMX registers
905 in SImode and DImode */
906 1, /* cost of moving SSE register */
907 {1, 1, 1}, /* cost of loading SSE registers
908 in SImode, DImode and TImode */
909 {1, 1, 1}, /* cost of storing SSE registers
910 in SImode, DImode and TImode */
911 1, /* MMX or SSE register to integer */
912 64, /* size of l1 cache. */
913 128, /* size of l2 cache. */
914 32, /* size of prefetch block */
915 1, /* number of parallel prefetches */
917 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
918 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
919 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
920 COSTS_N_INSNS (1), /* cost of FABS instruction. */
921 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
922 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
923 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
924 DUMMY_STRINGOP_ALGS},
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 1, /* scalar_stmt_cost. */
928 1, /* scalar load_cost. */
929 1, /* scalar_store_cost. */
930 1, /* vec_stmt_cost. */
931 1, /* vec_to_scalar_cost. */
932 1, /* scalar_to_vec_cost. */
933 1, /* vec_align_load_cost. */
934 2, /* vec_unalign_load_cost. */
935 1, /* vec_store_cost. */
936 3, /* cond_taken_branch_cost. */
937 1, /* cond_not_taken_branch_cost. */
941 struct processor_costs k6_cost = {
942 COSTS_N_INSNS (1), /* cost of an add instruction */
943 COSTS_N_INSNS (2), /* cost of a lea instruction */
944 COSTS_N_INSNS (1), /* variable shift costs */
945 COSTS_N_INSNS (1), /* constant shift costs */
946 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
947 COSTS_N_INSNS (3), /* HI */
948 COSTS_N_INSNS (3), /* SI */
949 COSTS_N_INSNS (3), /* DI */
950 COSTS_N_INSNS (3)}, /* other */
951 0, /* cost of multiply per each bit set */
952 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
953 COSTS_N_INSNS (18), /* HI */
954 COSTS_N_INSNS (18), /* SI */
955 COSTS_N_INSNS (18), /* DI */
956 COSTS_N_INSNS (18)}, /* other */
957 COSTS_N_INSNS (2), /* cost of movsx */
958 COSTS_N_INSNS (2), /* cost of movzx */
959 8, /* "large" insn */
961 3, /* cost for loading QImode using movzbl */
962 {4, 5, 4}, /* cost of loading integer registers
963 in QImode, HImode and SImode.
964 Relative to reg-reg move (2). */
965 {2, 3, 2}, /* cost of storing integer registers */
966 4, /* cost of reg,reg fld/fst */
967 {6, 6, 6}, /* cost of loading fp registers
968 in SFmode, DFmode and XFmode */
969 {4, 4, 4}, /* cost of storing fp registers
970 in SFmode, DFmode and XFmode */
971 2, /* cost of moving MMX register */
972 {2, 2}, /* cost of loading MMX registers
973 in SImode and DImode */
974 {2, 2}, /* cost of storing MMX registers
975 in SImode and DImode */
976 2, /* cost of moving SSE register */
977 {2, 2, 8}, /* cost of loading SSE registers
978 in SImode, DImode and TImode */
979 {2, 2, 8}, /* cost of storing SSE registers
980 in SImode, DImode and TImode */
981 6, /* MMX or SSE register to integer */
982 32, /* size of l1 cache. */
983 32, /* size of l2 cache. Some models
984 have integrated l2 cache, but
985 optimizing for k6 is not important
986 enough to worry about that. */
987 32, /* size of prefetch block */
988 1, /* number of parallel prefetches */
990 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
991 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
992 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
993 COSTS_N_INSNS (2), /* cost of FABS instruction. */
994 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
995 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
996 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
997 DUMMY_STRINGOP_ALGS},
998 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 1, /* scalar_stmt_cost. */
1001 1, /* scalar load_cost. */
1002 1, /* scalar_store_cost. */
1003 1, /* vec_stmt_cost. */
1004 1, /* vec_to_scalar_cost. */
1005 1, /* scalar_to_vec_cost. */
1006 1, /* vec_align_load_cost. */
1007 2, /* vec_unalign_load_cost. */
1008 1, /* vec_store_cost. */
1009 3, /* cond_taken_branch_cost. */
1010 1, /* cond_not_taken_branch_cost. */
1014 struct processor_costs athlon_cost = {
1015 COSTS_N_INSNS (1), /* cost of an add instruction */
1016 COSTS_N_INSNS (2), /* cost of a lea instruction */
1017 COSTS_N_INSNS (1), /* variable shift costs */
1018 COSTS_N_INSNS (1), /* constant shift costs */
1019 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1020 COSTS_N_INSNS (5), /* HI */
1021 COSTS_N_INSNS (5), /* SI */
1022 COSTS_N_INSNS (5), /* DI */
1023 COSTS_N_INSNS (5)}, /* other */
1024 0, /* cost of multiply per each bit set */
1025 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1026 COSTS_N_INSNS (26), /* HI */
1027 COSTS_N_INSNS (42), /* SI */
1028 COSTS_N_INSNS (74), /* DI */
1029 COSTS_N_INSNS (74)}, /* other */
1030 COSTS_N_INSNS (1), /* cost of movsx */
1031 COSTS_N_INSNS (1), /* cost of movzx */
1032 8, /* "large" insn */
1034 4, /* cost for loading QImode using movzbl */
1035 {3, 4, 3}, /* cost of loading integer registers
1036 in QImode, HImode and SImode.
1037 Relative to reg-reg move (2). */
1038 {3, 4, 3}, /* cost of storing integer registers */
1039 4, /* cost of reg,reg fld/fst */
1040 {4, 4, 12}, /* cost of loading fp registers
1041 in SFmode, DFmode and XFmode */
1042 {6, 6, 8}, /* cost of storing fp registers
1043 in SFmode, DFmode and XFmode */
1044 2, /* cost of moving MMX register */
1045 {4, 4}, /* cost of loading MMX registers
1046 in SImode and DImode */
1047 {4, 4}, /* cost of storing MMX registers
1048 in SImode and DImode */
1049 2, /* cost of moving SSE register */
1050 {4, 4, 6}, /* cost of loading SSE registers
1051 in SImode, DImode and TImode */
1052 {4, 4, 5}, /* cost of storing SSE registers
1053 in SImode, DImode and TImode */
1054 5, /* MMX or SSE register to integer */
1055 64, /* size of l1 cache. */
1056 256, /* size of l2 cache. */
1057 64, /* size of prefetch block */
1058 6, /* number of parallel prefetches */
1059 5, /* Branch cost */
1060 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1061 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1062 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1063 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1064 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1065 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1066 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1067 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1068 128 bytes for memset. */
1069 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1070 DUMMY_STRINGOP_ALGS},
1071 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1072 DUMMY_STRINGOP_ALGS},
1073 1, /* scalar_stmt_cost. */
1074 1, /* scalar load_cost. */
1075 1, /* scalar_store_cost. */
1076 1, /* vec_stmt_cost. */
1077 1, /* vec_to_scalar_cost. */
1078 1, /* scalar_to_vec_cost. */
1079 1, /* vec_align_load_cost. */
1080 2, /* vec_unalign_load_cost. */
1081 1, /* vec_store_cost. */
1082 3, /* cond_taken_branch_cost. */
1083 1, /* cond_not_taken_branch_cost. */
1087 struct processor_costs k8_cost = {
1088 COSTS_N_INSNS (1), /* cost of an add instruction */
1089 COSTS_N_INSNS (2), /* cost of a lea instruction */
1090 COSTS_N_INSNS (1), /* variable shift costs */
1091 COSTS_N_INSNS (1), /* constant shift costs */
1092 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1093 COSTS_N_INSNS (4), /* HI */
1094 COSTS_N_INSNS (3), /* SI */
1095 COSTS_N_INSNS (4), /* DI */
1096 COSTS_N_INSNS (5)}, /* other */
1097 0, /* cost of multiply per each bit set */
1098 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1099 COSTS_N_INSNS (26), /* HI */
1100 COSTS_N_INSNS (42), /* SI */
1101 COSTS_N_INSNS (74), /* DI */
1102 COSTS_N_INSNS (74)}, /* other */
1103 COSTS_N_INSNS (1), /* cost of movsx */
1104 COSTS_N_INSNS (1), /* cost of movzx */
1105 8, /* "large" insn */
1107 4, /* cost for loading QImode using movzbl */
1108 {3, 4, 3}, /* cost of loading integer registers
1109 in QImode, HImode and SImode.
1110 Relative to reg-reg move (2). */
1111 {3, 4, 3}, /* cost of storing integer registers */
1112 4, /* cost of reg,reg fld/fst */
1113 {4, 4, 12}, /* cost of loading fp registers
1114 in SFmode, DFmode and XFmode */
1115 {6, 6, 8}, /* cost of storing fp registers
1116 in SFmode, DFmode and XFmode */
1117 2, /* cost of moving MMX register */
1118 {3, 3}, /* cost of loading MMX registers
1119 in SImode and DImode */
1120 {4, 4}, /* cost of storing MMX registers
1121 in SImode and DImode */
1122 2, /* cost of moving SSE register */
1123 {4, 3, 6}, /* cost of loading SSE registers
1124 in SImode, DImode and TImode */
1125 {4, 4, 5}, /* cost of storing SSE registers
1126 in SImode, DImode and TImode */
1127 5, /* MMX or SSE register to integer */
1128 64, /* size of l1 cache. */
1129 512, /* size of l2 cache. */
1130 64, /* size of prefetch block */
1131 /* New AMD processors never drop prefetches; if they cannot be performed
1132 immediately, they are queued. We set number of simultaneous prefetches
1133 to a large constant to reflect this (it probably is not a good idea not
1134 to limit number of prefetches at all, as their execution also takes some
1136 100, /* number of parallel prefetches */
1137 3, /* Branch cost */
1138 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1139 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1140 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1141 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1142 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1143 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1144 /* K8 has optimized REP instruction for medium sized blocks, but for very
1145 small blocks it is better to use loop. For large blocks, libcall can
1146 do nontemporary accesses and beat inline considerably. */
1147 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1148 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1149 {{libcall, {{8, loop}, {24, unrolled_loop},
1150 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1151 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1152 4, /* scalar_stmt_cost. */
1153 2, /* scalar load_cost. */
1154 2, /* scalar_store_cost. */
1155 5, /* vec_stmt_cost. */
1156 0, /* vec_to_scalar_cost. */
1157 2, /* scalar_to_vec_cost. */
1158 2, /* vec_align_load_cost. */
1159 3, /* vec_unalign_load_cost. */
1160 3, /* vec_store_cost. */
1161 3, /* cond_taken_branch_cost. */
1162 2, /* cond_not_taken_branch_cost. */
1165 struct processor_costs amdfam10_cost = {
1166 COSTS_N_INSNS (1), /* cost of an add instruction */
1167 COSTS_N_INSNS (2), /* cost of a lea instruction */
1168 COSTS_N_INSNS (1), /* variable shift costs */
1169 COSTS_N_INSNS (1), /* constant shift costs */
1170 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1171 COSTS_N_INSNS (4), /* HI */
1172 COSTS_N_INSNS (3), /* SI */
1173 COSTS_N_INSNS (4), /* DI */
1174 COSTS_N_INSNS (5)}, /* other */
1175 0, /* cost of multiply per each bit set */
1176 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1177 COSTS_N_INSNS (35), /* HI */
1178 COSTS_N_INSNS (51), /* SI */
1179 COSTS_N_INSNS (83), /* DI */
1180 COSTS_N_INSNS (83)}, /* other */
1181 COSTS_N_INSNS (1), /* cost of movsx */
1182 COSTS_N_INSNS (1), /* cost of movzx */
1183 8, /* "large" insn */
1185 4, /* cost for loading QImode using movzbl */
1186 {3, 4, 3}, /* cost of loading integer registers
1187 in QImode, HImode and SImode.
1188 Relative to reg-reg move (2). */
1189 {3, 4, 3}, /* cost of storing integer registers */
1190 4, /* cost of reg,reg fld/fst */
1191 {4, 4, 12}, /* cost of loading fp registers
1192 in SFmode, DFmode and XFmode */
1193 {6, 6, 8}, /* cost of storing fp registers
1194 in SFmode, DFmode and XFmode */
1195 2, /* cost of moving MMX register */
1196 {3, 3}, /* cost of loading MMX registers
1197 in SImode and DImode */
1198 {4, 4}, /* cost of storing MMX registers
1199 in SImode and DImode */
1200 2, /* cost of moving SSE register */
1201 {4, 4, 3}, /* cost of loading SSE registers
1202 in SImode, DImode and TImode */
1203 {4, 4, 5}, /* cost of storing SSE registers
1204 in SImode, DImode and TImode */
1205 3, /* MMX or SSE register to integer */
1207 MOVD reg64, xmmreg Double FSTORE 4
1208 MOVD reg32, xmmreg Double FSTORE 4
1210 MOVD reg64, xmmreg Double FADD 3
1212 MOVD reg32, xmmreg Double FADD 3
1214 64, /* size of l1 cache. */
1215 512, /* size of l2 cache. */
1216 64, /* size of prefetch block */
1217 /* New AMD processors never drop prefetches; if they cannot be performed
1218 immediately, they are queued. We set number of simultaneous prefetches
1219 to a large constant to reflect this (it probably is not a good idea not
1220 to limit number of prefetches at all, as their execution also takes some
1222 100, /* number of parallel prefetches */
1223 2, /* Branch cost */
1224 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1225 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1226 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1227 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1228 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1229 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1231 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1232 very small blocks it is better to use loop. For large blocks, libcall can
1233 do nontemporary accesses and beat inline considerably. */
1234 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1235 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1236 {{libcall, {{8, loop}, {24, unrolled_loop},
1237 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1238 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1239 4, /* scalar_stmt_cost. */
1240 2, /* scalar load_cost. */
1241 2, /* scalar_store_cost. */
1242 6, /* vec_stmt_cost. */
1243 0, /* vec_to_scalar_cost. */
1244 2, /* scalar_to_vec_cost. */
1245 2, /* vec_align_load_cost. */
1246 2, /* vec_unalign_load_cost. */
1247 2, /* vec_store_cost. */
1248 2, /* cond_taken_branch_cost. */
1249 1, /* cond_not_taken_branch_cost. */
1252 struct processor_costs bdver1_cost = {
1253 COSTS_N_INSNS (1), /* cost of an add instruction */
1254 COSTS_N_INSNS (1), /* cost of a lea instruction */
1255 COSTS_N_INSNS (1), /* variable shift costs */
1256 COSTS_N_INSNS (1), /* constant shift costs */
1257 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1258 COSTS_N_INSNS (4), /* HI */
1259 COSTS_N_INSNS (4), /* SI */
1260 COSTS_N_INSNS (6), /* DI */
1261 COSTS_N_INSNS (6)}, /* other */
1262 0, /* cost of multiply per each bit set */
1263 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1264 COSTS_N_INSNS (35), /* HI */
1265 COSTS_N_INSNS (51), /* SI */
1266 COSTS_N_INSNS (83), /* DI */
1267 COSTS_N_INSNS (83)}, /* other */
1268 COSTS_N_INSNS (1), /* cost of movsx */
1269 COSTS_N_INSNS (1), /* cost of movzx */
1270 8, /* "large" insn */
1272 4, /* cost for loading QImode using movzbl */
1273 {5, 5, 4}, /* cost of loading integer registers
1274 in QImode, HImode and SImode.
1275 Relative to reg-reg move (2). */
1276 {4, 4, 4}, /* cost of storing integer registers */
1277 2, /* cost of reg,reg fld/fst */
1278 {5, 5, 12}, /* cost of loading fp registers
1279 in SFmode, DFmode and XFmode */
1280 {4, 4, 8}, /* cost of storing fp registers
1281 in SFmode, DFmode and XFmode */
1282 2, /* cost of moving MMX register */
1283 {4, 4}, /* cost of loading MMX registers
1284 in SImode and DImode */
1285 {4, 4}, /* cost of storing MMX registers
1286 in SImode and DImode */
1287 2, /* cost of moving SSE register */
1288 {4, 4, 4}, /* cost of loading SSE registers
1289 in SImode, DImode and TImode */
1290 {4, 4, 4}, /* cost of storing SSE registers
1291 in SImode, DImode and TImode */
1292 2, /* MMX or SSE register to integer */
1294 MOVD reg64, xmmreg Double FSTORE 4
1295 MOVD reg32, xmmreg Double FSTORE 4
1297 MOVD reg64, xmmreg Double FADD 3
1299 MOVD reg32, xmmreg Double FADD 3
1301 16, /* size of l1 cache. */
1302 2048, /* size of l2 cache. */
1303 64, /* size of prefetch block */
1304 /* New AMD processors never drop prefetches; if they cannot be performed
1305 immediately, they are queued. We set number of simultaneous prefetches
1306 to a large constant to reflect this (it probably is not a good idea not
1307 to limit number of prefetches at all, as their execution also takes some
1309 100, /* number of parallel prefetches */
1310 2, /* Branch cost */
1311 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1312 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1313 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1314 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1315 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1316 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1318 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1319 very small blocks it is better to use loop. For large blocks, libcall
1320 can do nontemporary accesses and beat inline considerably. */
1321 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1322 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1323 {{libcall, {{8, loop}, {24, unrolled_loop},
1324 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1325 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1326 6, /* scalar_stmt_cost. */
1327 4, /* scalar load_cost. */
1328 4, /* scalar_store_cost. */
1329 6, /* vec_stmt_cost. */
1330 0, /* vec_to_scalar_cost. */
1331 2, /* scalar_to_vec_cost. */
1332 4, /* vec_align_load_cost. */
1333 4, /* vec_unalign_load_cost. */
1334 4, /* vec_store_cost. */
1335 2, /* cond_taken_branch_cost. */
1336 1, /* cond_not_taken_branch_cost. */
1339 struct processor_costs btver1_cost = {
1340 COSTS_N_INSNS (1), /* cost of an add instruction */
1341 COSTS_N_INSNS (2), /* cost of a lea instruction */
1342 COSTS_N_INSNS (1), /* variable shift costs */
1343 COSTS_N_INSNS (1), /* constant shift costs */
1344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1345 COSTS_N_INSNS (4), /* HI */
1346 COSTS_N_INSNS (3), /* SI */
1347 COSTS_N_INSNS (4), /* DI */
1348 COSTS_N_INSNS (5)}, /* other */
1349 0, /* cost of multiply per each bit set */
1350 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1351 COSTS_N_INSNS (35), /* HI */
1352 COSTS_N_INSNS (51), /* SI */
1353 COSTS_N_INSNS (83), /* DI */
1354 COSTS_N_INSNS (83)}, /* other */
1355 COSTS_N_INSNS (1), /* cost of movsx */
1356 COSTS_N_INSNS (1), /* cost of movzx */
1357 8, /* "large" insn */
1359 4, /* cost for loading QImode using movzbl */
1360 {3, 4, 3}, /* cost of loading integer registers
1361 in QImode, HImode and SImode.
1362 Relative to reg-reg move (2). */
1363 {3, 4, 3}, /* cost of storing integer registers */
1364 4, /* cost of reg,reg fld/fst */
1365 {4, 4, 12}, /* cost of loading fp registers
1366 in SFmode, DFmode and XFmode */
1367 {6, 6, 8}, /* cost of storing fp registers
1368 in SFmode, DFmode and XFmode */
1369 2, /* cost of moving MMX register */
1370 {3, 3}, /* cost of loading MMX registers
1371 in SImode and DImode */
1372 {4, 4}, /* cost of storing MMX registers
1373 in SImode and DImode */
1374 2, /* cost of moving SSE register */
1375 {4, 4, 3}, /* cost of loading SSE registers
1376 in SImode, DImode and TImode */
1377 {4, 4, 5}, /* cost of storing SSE registers
1378 in SImode, DImode and TImode */
1379 3, /* MMX or SSE register to integer */
1381 MOVD reg64, xmmreg Double FSTORE 4
1382 MOVD reg32, xmmreg Double FSTORE 4
1384 MOVD reg64, xmmreg Double FADD 3
1386 MOVD reg32, xmmreg Double FADD 3
1388 32, /* size of l1 cache. */
1389 512, /* size of l2 cache. */
1390 64, /* size of prefetch block */
1391 100, /* number of parallel prefetches */
1392 2, /* Branch cost */
1393 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1394 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1395 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1396 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1397 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1398 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1400 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1401 very small blocks it is better to use loop. For large blocks, libcall can
1402 do nontemporary accesses and beat inline considerably. */
1403 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1404 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1405 {{libcall, {{8, loop}, {24, unrolled_loop},
1406 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1407 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1408 4, /* scalar_stmt_cost. */
1409 2, /* scalar load_cost. */
1410 2, /* scalar_store_cost. */
1411 6, /* vec_stmt_cost. */
1412 0, /* vec_to_scalar_cost. */
1413 2, /* scalar_to_vec_cost. */
1414 2, /* vec_align_load_cost. */
1415 2, /* vec_unalign_load_cost. */
1416 2, /* vec_store_cost. */
1417 2, /* cond_taken_branch_cost. */
1418 1, /* cond_not_taken_branch_cost. */
1422 struct processor_costs pentium4_cost = {
1423 COSTS_N_INSNS (1), /* cost of an add instruction */
1424 COSTS_N_INSNS (3), /* cost of a lea instruction */
1425 COSTS_N_INSNS (4), /* variable shift costs */
1426 COSTS_N_INSNS (4), /* constant shift costs */
1427 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1428 COSTS_N_INSNS (15), /* HI */
1429 COSTS_N_INSNS (15), /* SI */
1430 COSTS_N_INSNS (15), /* DI */
1431 COSTS_N_INSNS (15)}, /* other */
1432 0, /* cost of multiply per each bit set */
1433 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1434 COSTS_N_INSNS (56), /* HI */
1435 COSTS_N_INSNS (56), /* SI */
1436 COSTS_N_INSNS (56), /* DI */
1437 COSTS_N_INSNS (56)}, /* other */
1438 COSTS_N_INSNS (1), /* cost of movsx */
1439 COSTS_N_INSNS (1), /* cost of movzx */
1440 16, /* "large" insn */
1442 2, /* cost for loading QImode using movzbl */
1443 {4, 5, 4}, /* cost of loading integer registers
1444 in QImode, HImode and SImode.
1445 Relative to reg-reg move (2). */
1446 {2, 3, 2}, /* cost of storing integer registers */
1447 2, /* cost of reg,reg fld/fst */
1448 {2, 2, 6}, /* cost of loading fp registers
1449 in SFmode, DFmode and XFmode */
1450 {4, 4, 6}, /* cost of storing fp registers
1451 in SFmode, DFmode and XFmode */
1452 2, /* cost of moving MMX register */
1453 {2, 2}, /* cost of loading MMX registers
1454 in SImode and DImode */
1455 {2, 2}, /* cost of storing MMX registers
1456 in SImode and DImode */
1457 12, /* cost of moving SSE register */
1458 {12, 12, 12}, /* cost of loading SSE registers
1459 in SImode, DImode and TImode */
1460 {2, 2, 8}, /* cost of storing SSE registers
1461 in SImode, DImode and TImode */
1462 10, /* MMX or SSE register to integer */
1463 8, /* size of l1 cache. */
1464 256, /* size of l2 cache. */
1465 64, /* size of prefetch block */
1466 6, /* number of parallel prefetches */
1467 2, /* Branch cost */
1468 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1469 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1470 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1471 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1472 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1473 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1474 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1475 DUMMY_STRINGOP_ALGS},
1476 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1478 DUMMY_STRINGOP_ALGS},
1479 1, /* scalar_stmt_cost. */
1480 1, /* scalar load_cost. */
1481 1, /* scalar_store_cost. */
1482 1, /* vec_stmt_cost. */
1483 1, /* vec_to_scalar_cost. */
1484 1, /* scalar_to_vec_cost. */
1485 1, /* vec_align_load_cost. */
1486 2, /* vec_unalign_load_cost. */
1487 1, /* vec_store_cost. */
1488 3, /* cond_taken_branch_cost. */
1489 1, /* cond_not_taken_branch_cost. */
1493 struct processor_costs nocona_cost = {
1494 COSTS_N_INSNS (1), /* cost of an add instruction */
1495 COSTS_N_INSNS (1), /* cost of a lea instruction */
1496 COSTS_N_INSNS (1), /* variable shift costs */
1497 COSTS_N_INSNS (1), /* constant shift costs */
1498 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1499 COSTS_N_INSNS (10), /* HI */
1500 COSTS_N_INSNS (10), /* SI */
1501 COSTS_N_INSNS (10), /* DI */
1502 COSTS_N_INSNS (10)}, /* other */
1503 0, /* cost of multiply per each bit set */
1504 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1505 COSTS_N_INSNS (66), /* HI */
1506 COSTS_N_INSNS (66), /* SI */
1507 COSTS_N_INSNS (66), /* DI */
1508 COSTS_N_INSNS (66)}, /* other */
1509 COSTS_N_INSNS (1), /* cost of movsx */
1510 COSTS_N_INSNS (1), /* cost of movzx */
1511 16, /* "large" insn */
1512 17, /* MOVE_RATIO */
1513 4, /* cost for loading QImode using movzbl */
1514 {4, 4, 4}, /* cost of loading integer registers
1515 in QImode, HImode and SImode.
1516 Relative to reg-reg move (2). */
1517 {4, 4, 4}, /* cost of storing integer registers */
1518 3, /* cost of reg,reg fld/fst */
1519 {12, 12, 12}, /* cost of loading fp registers
1520 in SFmode, DFmode and XFmode */
1521 {4, 4, 4}, /* cost of storing fp registers
1522 in SFmode, DFmode and XFmode */
1523 6, /* cost of moving MMX register */
1524 {12, 12}, /* cost of loading MMX registers
1525 in SImode and DImode */
1526 {12, 12}, /* cost of storing MMX registers
1527 in SImode and DImode */
1528 6, /* cost of moving SSE register */
1529 {12, 12, 12}, /* cost of loading SSE registers
1530 in SImode, DImode and TImode */
1531 {12, 12, 12}, /* cost of storing SSE registers
1532 in SImode, DImode and TImode */
1533 8, /* MMX or SSE register to integer */
1534 8, /* size of l1 cache. */
1535 1024, /* size of l2 cache. */
1536 128, /* size of prefetch block */
1537 8, /* number of parallel prefetches */
1538 1, /* Branch cost */
1539 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1540 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1541 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1542 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1543 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1544 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1545 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1546 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1547 {100000, unrolled_loop}, {-1, libcall}}}},
1548 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1550 {libcall, {{24, loop}, {64, unrolled_loop},
1551 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1552 1, /* scalar_stmt_cost. */
1553 1, /* scalar load_cost. */
1554 1, /* scalar_store_cost. */
1555 1, /* vec_stmt_cost. */
1556 1, /* vec_to_scalar_cost. */
1557 1, /* scalar_to_vec_cost. */
1558 1, /* vec_align_load_cost. */
1559 2, /* vec_unalign_load_cost. */
1560 1, /* vec_store_cost. */
1561 3, /* cond_taken_branch_cost. */
1562 1, /* cond_not_taken_branch_cost. */
1566 struct processor_costs atom_cost = {
1567 COSTS_N_INSNS (1), /* cost of an add instruction */
1568 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1569 COSTS_N_INSNS (1), /* variable shift costs */
1570 COSTS_N_INSNS (1), /* constant shift costs */
1571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1572 COSTS_N_INSNS (4), /* HI */
1573 COSTS_N_INSNS (3), /* SI */
1574 COSTS_N_INSNS (4), /* DI */
1575 COSTS_N_INSNS (2)}, /* other */
1576 0, /* cost of multiply per each bit set */
1577 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1578 COSTS_N_INSNS (26), /* HI */
1579 COSTS_N_INSNS (42), /* SI */
1580 COSTS_N_INSNS (74), /* DI */
1581 COSTS_N_INSNS (74)}, /* other */
1582 COSTS_N_INSNS (1), /* cost of movsx */
1583 COSTS_N_INSNS (1), /* cost of movzx */
1584 8, /* "large" insn */
1585 17, /* MOVE_RATIO */
1586 2, /* cost for loading QImode using movzbl */
1587 {4, 4, 4}, /* cost of loading integer registers
1588 in QImode, HImode and SImode.
1589 Relative to reg-reg move (2). */
1590 {4, 4, 4}, /* cost of storing integer registers */
1591 4, /* cost of reg,reg fld/fst */
1592 {12, 12, 12}, /* cost of loading fp registers
1593 in SFmode, DFmode and XFmode */
1594 {6, 6, 8}, /* cost of storing fp registers
1595 in SFmode, DFmode and XFmode */
1596 2, /* cost of moving MMX register */
1597 {8, 8}, /* cost of loading MMX registers
1598 in SImode and DImode */
1599 {8, 8}, /* cost of storing MMX registers
1600 in SImode and DImode */
1601 2, /* cost of moving SSE register */
1602 {8, 8, 8}, /* cost of loading SSE registers
1603 in SImode, DImode and TImode */
1604 {8, 8, 8}, /* cost of storing SSE registers
1605 in SImode, DImode and TImode */
1606 5, /* MMX or SSE register to integer */
1607 32, /* size of l1 cache. */
1608 256, /* size of l2 cache. */
1609 64, /* size of prefetch block */
1610 6, /* number of parallel prefetches */
1611 3, /* Branch cost */
1612 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1613 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1614 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1615 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1616 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1617 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1618 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1619 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1620 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1621 {{libcall, {{8, loop}, {15, unrolled_loop},
1622 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1623 {libcall, {{24, loop}, {32, unrolled_loop},
1624 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1625 1, /* scalar_stmt_cost. */
1626 1, /* scalar load_cost. */
1627 1, /* scalar_store_cost. */
1628 1, /* vec_stmt_cost. */
1629 1, /* vec_to_scalar_cost. */
1630 1, /* scalar_to_vec_cost. */
1631 1, /* vec_align_load_cost. */
1632 2, /* vec_unalign_load_cost. */
1633 1, /* vec_store_cost. */
1634 3, /* cond_taken_branch_cost. */
1635 1, /* cond_not_taken_branch_cost. */
1638 /* Generic64 should produce code tuned for Nocona and K8. */
1640 struct processor_costs generic64_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 /* On all chips taken into consideration lea is 2 cycles and more. With
1643 this cost however our current implementation of synth_mult results in
1644 use of unnecessary temporary registers causing regression on several
1645 SPECfp benchmarks. */
1646 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1647 COSTS_N_INSNS (1), /* variable shift costs */
1648 COSTS_N_INSNS (1), /* constant shift costs */
1649 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1650 COSTS_N_INSNS (4), /* HI */
1651 COSTS_N_INSNS (3), /* SI */
1652 COSTS_N_INSNS (4), /* DI */
1653 COSTS_N_INSNS (2)}, /* other */
1654 0, /* cost of multiply per each bit set */
1655 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1656 COSTS_N_INSNS (26), /* HI */
1657 COSTS_N_INSNS (42), /* SI */
1658 COSTS_N_INSNS (74), /* DI */
1659 COSTS_N_INSNS (74)}, /* other */
1660 COSTS_N_INSNS (1), /* cost of movsx */
1661 COSTS_N_INSNS (1), /* cost of movzx */
1662 8, /* "large" insn */
1663 17, /* MOVE_RATIO */
1664 4, /* cost for loading QImode using movzbl */
1665 {4, 4, 4}, /* cost of loading integer registers
1666 in QImode, HImode and SImode.
1667 Relative to reg-reg move (2). */
1668 {4, 4, 4}, /* cost of storing integer registers */
1669 4, /* cost of reg,reg fld/fst */
1670 {12, 12, 12}, /* cost of loading fp registers
1671 in SFmode, DFmode and XFmode */
1672 {6, 6, 8}, /* cost of storing fp registers
1673 in SFmode, DFmode and XFmode */
1674 2, /* cost of moving MMX register */
1675 {8, 8}, /* cost of loading MMX registers
1676 in SImode and DImode */
1677 {8, 8}, /* cost of storing MMX registers
1678 in SImode and DImode */
1679 2, /* cost of moving SSE register */
1680 {8, 8, 8}, /* cost of loading SSE registers
1681 in SImode, DImode and TImode */
1682 {8, 8, 8}, /* cost of storing SSE registers
1683 in SImode, DImode and TImode */
1684 5, /* MMX or SSE register to integer */
1685 32, /* size of l1 cache. */
1686 512, /* size of l2 cache. */
1687 64, /* size of prefetch block */
1688 6, /* number of parallel prefetches */
1689 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1690 value is increased to perhaps more appropriate value of 5. */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {DUMMY_STRINGOP_ALGS,
1699 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1700 {DUMMY_STRINGOP_ALGS,
1701 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1702 1, /* scalar_stmt_cost. */
1703 1, /* scalar load_cost. */
1704 1, /* scalar_store_cost. */
1705 1, /* vec_stmt_cost. */
1706 1, /* vec_to_scalar_cost. */
1707 1, /* scalar_to_vec_cost. */
1708 1, /* vec_align_load_cost. */
1709 2, /* vec_unalign_load_cost. */
1710 1, /* vec_store_cost. */
1711 3, /* cond_taken_branch_cost. */
1712 1, /* cond_not_taken_branch_cost. */
1715 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1718 struct processor_costs generic32_cost = {
1719 COSTS_N_INSNS (1), /* cost of an add instruction */
1720 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1721 COSTS_N_INSNS (1), /* variable shift costs */
1722 COSTS_N_INSNS (1), /* constant shift costs */
1723 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1724 COSTS_N_INSNS (4), /* HI */
1725 COSTS_N_INSNS (3), /* SI */
1726 COSTS_N_INSNS (4), /* DI */
1727 COSTS_N_INSNS (2)}, /* other */
1728 0, /* cost of multiply per each bit set */
1729 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1730 COSTS_N_INSNS (26), /* HI */
1731 COSTS_N_INSNS (42), /* SI */
1732 COSTS_N_INSNS (74), /* DI */
1733 COSTS_N_INSNS (74)}, /* other */
1734 COSTS_N_INSNS (1), /* cost of movsx */
1735 COSTS_N_INSNS (1), /* cost of movzx */
1736 8, /* "large" insn */
1737 17, /* MOVE_RATIO */
1738 4, /* cost for loading QImode using movzbl */
1739 {4, 4, 4}, /* cost of loading integer registers
1740 in QImode, HImode and SImode.
1741 Relative to reg-reg move (2). */
1742 {4, 4, 4}, /* cost of storing integer registers */
1743 4, /* cost of reg,reg fld/fst */
1744 {12, 12, 12}, /* cost of loading fp registers
1745 in SFmode, DFmode and XFmode */
1746 {6, 6, 8}, /* cost of storing fp registers
1747 in SFmode, DFmode and XFmode */
1748 2, /* cost of moving MMX register */
1749 {8, 8}, /* cost of loading MMX registers
1750 in SImode and DImode */
1751 {8, 8}, /* cost of storing MMX registers
1752 in SImode and DImode */
1753 2, /* cost of moving SSE register */
1754 {8, 8, 8}, /* cost of loading SSE registers
1755 in SImode, DImode and TImode */
1756 {8, 8, 8}, /* cost of storing SSE registers
1757 in SImode, DImode and TImode */
1758 5, /* MMX or SSE register to integer */
1759 32, /* size of l1 cache. */
1760 256, /* size of l2 cache. */
1761 64, /* size of prefetch block */
1762 6, /* number of parallel prefetches */
1763 3, /* Branch cost */
1764 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1765 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1766 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1767 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1768 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1769 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1770 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1771 DUMMY_STRINGOP_ALGS},
1772 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1773 DUMMY_STRINGOP_ALGS},
1774 1, /* scalar_stmt_cost. */
1775 1, /* scalar load_cost. */
1776 1, /* scalar_store_cost. */
1777 1, /* vec_stmt_cost. */
1778 1, /* vec_to_scalar_cost. */
1779 1, /* scalar_to_vec_cost. */
1780 1, /* vec_align_load_cost. */
1781 2, /* vec_unalign_load_cost. */
1782 1, /* vec_store_cost. */
1783 3, /* cond_taken_branch_cost. */
1784 1, /* cond_not_taken_branch_cost. */
1787 const struct processor_costs *ix86_cost = &pentium_cost;
1789 /* Processor feature/optimization bitmasks. */
1790 #define m_386 (1<<PROCESSOR_I386)
1791 #define m_486 (1<<PROCESSOR_I486)
1792 #define m_PENT (1<<PROCESSOR_PENTIUM)
1793 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1794 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1795 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1796 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1797 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1798 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1799 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1800 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1801 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1802 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1803 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1804 #define m_ATOM (1<<PROCESSOR_ATOM)
1806 #define m_GEODE (1<<PROCESSOR_GEODE)
1807 #define m_K6 (1<<PROCESSOR_K6)
1808 #define m_K6_GEODE (m_K6 | m_GEODE)
1809 #define m_K8 (1<<PROCESSOR_K8)
1810 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1811 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1812 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1813 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1814 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1815 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1817 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1818 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1820 /* Generic instruction choice should be common subset of supported CPUs
1821 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1822 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1824 /* Feature tests against the various tunings. */
1825 unsigned char ix86_tune_features[X86_TUNE_LAST];
1827 /* Feature tests against the various tunings used to create ix86_tune_features
1828 based on the processor mask. */
1829 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1830 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1831 negatively, so enabling for Generic64 seems like good code size
1832 tradeoff. We can't enable it for 32bit generic because it does not
1833 work well with PPro base chips. */
1834 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1836 /* X86_TUNE_PUSH_MEMORY */
1837 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1838 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1840 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1843 /* X86_TUNE_UNROLL_STRLEN */
1844 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1845 | m_CORE2I7 | m_GENERIC,
1847 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1848 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1849 | m_CORE2I7 | m_GENERIC,
1851 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1852 on simulation result. But after P4 was made, no performance benefit
1853 was observed with branch hints. It also increases the code size.
1854 As a result, icc never generates branch hints. */
1857 /* X86_TUNE_DOUBLE_WITH_ADD */
1860 /* X86_TUNE_USE_SAHF */
1861 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1862 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1864 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1865 partial dependencies. */
1866 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1867 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1869 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1870 register stalls on Generic32 compilation setting as well. However
1871 in current implementation the partial register stalls are not eliminated
1872 very well - they can be introduced via subregs synthesized by combine
1873 and can happen in caller/callee saving sequences. Because this option
1874 pays back little on PPro based chips and is in conflict with partial reg
1875 dependencies used by Athlon/P4 based chips, it is better to leave it off
1876 for generic32 for now. */
1879 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1880 m_CORE2I7 | m_GENERIC,
1882 /* X86_TUNE_USE_HIMODE_FIOP */
1883 m_386 | m_486 | m_K6_GEODE,
1885 /* X86_TUNE_USE_SIMODE_FIOP */
1886 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1888 /* X86_TUNE_USE_MOV0 */
1891 /* X86_TUNE_USE_CLTD */
1892 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1894 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1897 /* X86_TUNE_SPLIT_LONG_MOVES */
1900 /* X86_TUNE_READ_MODIFY_WRITE */
1903 /* X86_TUNE_READ_MODIFY */
1906 /* X86_TUNE_PROMOTE_QIMODE */
1907 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1908 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1910 /* X86_TUNE_FAST_PREFIX */
1911 ~(m_PENT | m_486 | m_386),
1913 /* X86_TUNE_SINGLE_STRINGOP */
1914 m_386 | m_PENT4 | m_NOCONA,
1916 /* X86_TUNE_QIMODE_MATH */
1919 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1920 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1921 might be considered for Generic32 if our scheme for avoiding partial
1922 stalls was more effective. */
1925 /* X86_TUNE_PROMOTE_QI_REGS */
1928 /* X86_TUNE_PROMOTE_HI_REGS */
1931 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1932 over esp addition. */
1933 m_386 | m_486 | m_PENT | m_PPRO,
1935 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1936 over esp addition. */
1939 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1940 over esp subtraction. */
1941 m_386 | m_486 | m_PENT | m_K6_GEODE,
1943 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1944 over esp subtraction. */
1945 m_PENT | m_K6_GEODE,
1947 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1948 for DFmode copies */
1949 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1950 | m_GENERIC | m_GEODE),
1952 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1953 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1955 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1956 conflict here in between PPro/Pentium4 based chips that thread 128bit
1957 SSE registers as single units versus K8 based chips that divide SSE
1958 registers to two 64bit halves. This knob promotes all store destinations
1959 to be 128bit to allow register renaming on 128bit SSE units, but usually
1960 results in one extra microop on 64bit SSE units. Experimental results
1961 shows that disabling this option on P4 brings over 20% SPECfp regression,
1962 while enabling it on K8 brings roughly 2.4% regression that can be partly
1963 masked by careful scheduling of moves. */
1964 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1965 | m_AMDFAM10 | m_BDVER1,
1967 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1968 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1970 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1971 m_BDVER1 | m_COREI7,
1973 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1976 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1977 are resolved on SSE register parts instead of whole registers, so we may
1978 maintain just lower part of scalar values in proper format leaving the
1979 upper part undefined. */
1982 /* X86_TUNE_SSE_TYPELESS_STORES */
1985 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1986 m_PPRO | m_PENT4 | m_NOCONA,
1988 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1989 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1991 /* X86_TUNE_PROLOGUE_USING_MOVE */
1992 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1994 /* X86_TUNE_EPILOGUE_USING_MOVE */
1995 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1997 /* X86_TUNE_SHIFT1 */
2000 /* X86_TUNE_USE_FFREEP */
2003 /* X86_TUNE_INTER_UNIT_MOVES */
2004 ~(m_AMD_MULTIPLE | m_GENERIC),
2006 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2007 ~(m_AMDFAM10 | m_BDVER1),
2009 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2010 than 4 branch instructions in the 16 byte window. */
2011 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
2014 /* X86_TUNE_SCHEDULE */
2015 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
2018 /* X86_TUNE_USE_BT */
2019 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
2021 /* X86_TUNE_USE_INCDEC */
2022 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
2024 /* X86_TUNE_PAD_RETURNS */
2025 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
2027 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2030 /* X86_TUNE_EXT_80387_CONSTANTS */
2031 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
2032 | m_CORE2I7 | m_GENERIC,
2034 /* X86_TUNE_SHORTEN_X87_SSE */
2037 /* X86_TUNE_AVOID_VECTOR_DECODE */
2038 m_K8 | m_CORE2I7_64 | m_GENERIC64,
2040 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2041 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2044 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2045 vector path on AMD machines. */
2046 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2048 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2050 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
2052 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2056 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2057 but one byte longer. */
2060 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2061 operand that cannot be represented using a modRM byte. The XOR
2062 replacement is long decoded, so this split helps here as well. */
2065 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2067 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
2069 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2070 from integer to FP. */
2073 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2074 with a subsequent conditional jump instruction into a single
2075 compare-and-branch uop. */
2078 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2079 will impact LEA instruction selection. */
2082 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2087 /* Feature tests against the various architecture variations. */
2088 unsigned char ix86_arch_features[X86_ARCH_LAST];
2090 /* Feature tests against the various architecture variations, used to create
2091 ix86_arch_features based on the processor mask. */
2092 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2093 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2094 ~(m_386 | m_486 | m_PENT | m_K6),
2096 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2099 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2102 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2105 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2109 static const unsigned int x86_accumulate_outgoing_args
2110 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2113 static const unsigned int x86_arch_always_fancy_math_387
2114 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2115 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2117 static enum stringop_alg stringop_alg = no_stringop;
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2159 /* The "default" register map used in 32bit mode. */
2161 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2163 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2164 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2165 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2166 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2167 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2168 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2169 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2185 /* Define the register numbers to be used in Dwarf debugging information.
2186 The SVR4 reference port C compiler uses the following register numbers
2187 in its Dwarf output code:
2188 0 for %eax (gcc regno = 0)
2189 1 for %ecx (gcc regno = 2)
2190 2 for %edx (gcc regno = 1)
2191 3 for %ebx (gcc regno = 3)
2192 4 for %esp (gcc regno = 7)
2193 5 for %ebp (gcc regno = 6)
2194 6 for %esi (gcc regno = 4)
2195 7 for %edi (gcc regno = 5)
2196 The following three DWARF register numbers are never generated by
2197 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2198 believes these numbers have these meanings.
2199 8 for %eip (no gcc equivalent)
2200 9 for %eflags (gcc regno = 17)
2201 10 for %trapno (no gcc equivalent)
2202 It is not at all clear how we should number the FP stack registers
2203 for the x86 architecture. If the version of SDB on x86/svr4 were
2204 a bit less brain dead with respect to floating-point then we would
2205 have a precedent to follow with respect to DWARF register numbers
2206 for x86 FP registers, but the SDB on x86/svr4 is so completely
2207 broken with respect to FP registers that it is hardly worth thinking
2208 of it as something to strive for compatibility with.
2209 The version of x86/svr4 SDB I have at the moment does (partially)
2210 seem to believe that DWARF register number 11 is associated with
2211 the x86 register %st(0), but that's about all. Higher DWARF
2212 register numbers don't seem to be associated with anything in
2213 particular, and even for DWARF regno 11, SDB only seems to under-
2214 stand that it should say that a variable lives in %st(0) (when
2215 asked via an `=' command) if we said it was in DWARF regno 11,
2216 but SDB still prints garbage when asked for the value of the
2217 variable in question (via a `/' command).
2218 (Also note that the labels SDB prints for various FP stack regs
2219 when doing an `x' command are all wrong.)
2220 Note that these problems generally don't affect the native SVR4
2221 C compiler because it doesn't allow the use of -O with -g and
2222 because when it is *not* optimizing, it allocates a memory
2223 location for each floating-point variable, and the memory
2224 location is what gets described in the DWARF AT_location
2225 attribute for the variable in question.
2226 Regardless of the severe mental illness of the x86/svr4 SDB, we
2227 do something sensible here and we use the following DWARF
2228 register numbers. Note that these are all stack-top-relative
2230 11 for %st(0) (gcc regno = 8)
2231 12 for %st(1) (gcc regno = 9)
2232 13 for %st(2) (gcc regno = 10)
2233 14 for %st(3) (gcc regno = 11)
2234 15 for %st(4) (gcc regno = 12)
2235 16 for %st(5) (gcc regno = 13)
2236 17 for %st(6) (gcc regno = 14)
2237 18 for %st(7) (gcc regno = 15)
2239 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2241 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2242 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2243 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2244 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2245 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2246 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2247 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2250 /* Define parameter passing and return registers. */
2252 static int const x86_64_int_parameter_registers[6] =
2254 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2257 static int const x86_64_ms_abi_int_parameter_registers[4] =
2259 CX_REG, DX_REG, R8_REG, R9_REG
2262 static int const x86_64_int_return_registers[4] =
2264 AX_REG, DX_REG, DI_REG, SI_REG
2267 /* Define the structure for the machine field in struct function. */
2269 struct GTY(()) stack_local_entry {
2270 unsigned short mode;
2273 struct stack_local_entry *next;
2276 /* Structure describing stack frame layout.
2277 Stack grows downward:
2283 saved static chain if ix86_static_chain_on_stack
2285 saved frame pointer if frame_pointer_needed
2286 <- HARD_FRAME_POINTER
2292 <- sse_regs_save_offset
2295 [va_arg registers] |
2299 [padding2] | = to_allocate
2308 int outgoing_arguments_size;
2309 HOST_WIDE_INT frame;
2311 /* The offsets relative to ARG_POINTER. */
2312 HOST_WIDE_INT frame_pointer_offset;
2313 HOST_WIDE_INT hard_frame_pointer_offset;
2314 HOST_WIDE_INT stack_pointer_offset;
2315 HOST_WIDE_INT hfp_save_offset;
2316 HOST_WIDE_INT reg_save_offset;
2317 HOST_WIDE_INT sse_reg_save_offset;
2319 /* When save_regs_using_mov is set, emit prologue using
2320 move instead of push instructions. */
2321 bool save_regs_using_mov;
2324 /* Code model option. */
2325 enum cmodel ix86_cmodel;
2327 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2329 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2331 /* Which unit we are generating floating point math for. */
2332 enum fpmath_unit ix86_fpmath;
2334 /* Which cpu are we scheduling for. */
2335 enum attr_cpu ix86_schedule;
2337 /* Which cpu are we optimizing for. */
2338 enum processor_type ix86_tune;
2340 /* Which instruction set architecture to use. */
2341 enum processor_type ix86_arch;
2343 /* true if sse prefetch instruction is not NOOP. */
2344 int x86_prefetch_sse;
2346 /* ix86_regparm_string as a number */
2347 static int ix86_regparm;
2349 /* -mstackrealign option */
2350 static const char ix86_force_align_arg_pointer_string[]
2351 = "force_align_arg_pointer";
2353 static rtx (*ix86_gen_leave) (void);
2354 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2355 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2357 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2358 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2361 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2362 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2364 /* Preferred alignment for stack boundary in bits. */
2365 unsigned int ix86_preferred_stack_boundary;
2367 /* Alignment for incoming stack boundary in bits specified at
2369 static unsigned int ix86_user_incoming_stack_boundary;
2371 /* Default alignment for incoming stack boundary in bits. */
2372 static unsigned int ix86_default_incoming_stack_boundary;
2374 /* Alignment for incoming stack boundary in bits. */
2375 unsigned int ix86_incoming_stack_boundary;
2377 /* The abi used by target. */
2378 enum calling_abi ix86_abi;
2380 /* Values 1-5: see jump.c */
2381 int ix86_branch_cost;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Variables which are this size or smaller are put in the data/bss
2388 or ldata/lbss sections. */
2390 int ix86_section_threshold = 65536;
2392 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2393 char internal_label_prefix[16];
2394 int internal_label_prefix_len;
2396 /* Fence to use after loop using movnt. */
2399 /* Register class used for passing given 64bit part of the argument.
2400 These represent classes as documented by the PS ABI, with the exception
2401 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2402 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2404 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2405 whenever possible (upper half does contain padding). */
2406 enum x86_64_reg_class
2409 X86_64_INTEGER_CLASS,
2410 X86_64_INTEGERSI_CLASS,
2417 X86_64_COMPLEX_X87_CLASS,
2421 #define MAX_CLASSES 4
2423 /* Table of constants used by fldpi, fldln2, etc.... */
2424 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2425 static bool ext_80387_constants_init = 0;
2428 static struct machine_function * ix86_init_machine_status (void);
2429 static rtx ix86_function_value (const_tree, const_tree, bool);
2430 static bool ix86_function_value_regno_p (const unsigned int);
2431 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2433 static rtx ix86_static_chain (const_tree, bool);
2434 static int ix86_function_regparm (const_tree, const_tree);
2435 static void ix86_compute_frame_layout (struct ix86_frame *);
2436 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2438 static void ix86_add_new_builtins (int);
2439 static rtx ix86_expand_vec_perm_builtin (tree);
2440 static tree ix86_canonical_va_list_type (tree);
2441 static void predict_jump (int);
2442 static unsigned int split_stack_prologue_scratch_regno (void);
2443 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2445 enum ix86_function_specific_strings
2447 IX86_FUNCTION_SPECIFIC_ARCH,
2448 IX86_FUNCTION_SPECIFIC_TUNE,
2449 IX86_FUNCTION_SPECIFIC_FPMATH,
2450 IX86_FUNCTION_SPECIFIC_MAX
2453 static char *ix86_target_string (int, int, const char *, const char *,
2454 const char *, bool);
2455 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2456 static void ix86_function_specific_save (struct cl_target_option *);
2457 static void ix86_function_specific_restore (struct cl_target_option *);
2458 static void ix86_function_specific_print (FILE *, int,
2459 struct cl_target_option *);
2460 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2461 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2462 static bool ix86_can_inline_p (tree, tree);
2463 static void ix86_set_current_function (tree);
2464 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2466 static enum calling_abi ix86_function_abi (const_tree);
2469 #ifndef SUBTARGET32_DEFAULT_CPU
2470 #define SUBTARGET32_DEFAULT_CPU "i386"
2473 /* The svr4 ABI for the i386 says that records and unions are returned
2475 #ifndef DEFAULT_PCC_STRUCT_RETURN
2476 #define DEFAULT_PCC_STRUCT_RETURN 1
2479 /* Whether -mtune= or -march= were specified */
2480 static int ix86_tune_defaulted;
2481 static int ix86_arch_specified;
2483 /* A mask of ix86_isa_flags that includes bit X if X
2484 was set or cleared on the command line. */
2485 static int ix86_isa_flags_explicit;
2487 /* Define a set of ISAs which are available when a given ISA is
2488 enabled. MMX and SSE ISAs are handled separately. */
2490 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2491 #define OPTION_MASK_ISA_3DNOW_SET \
2492 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2494 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2495 #define OPTION_MASK_ISA_SSE2_SET \
2496 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2497 #define OPTION_MASK_ISA_SSE3_SET \
2498 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2499 #define OPTION_MASK_ISA_SSSE3_SET \
2500 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2501 #define OPTION_MASK_ISA_SSE4_1_SET \
2502 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2503 #define OPTION_MASK_ISA_SSE4_2_SET \
2504 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2505 #define OPTION_MASK_ISA_AVX_SET \
2506 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2507 #define OPTION_MASK_ISA_FMA_SET \
2508 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2510 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2512 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2514 #define OPTION_MASK_ISA_SSE4A_SET \
2515 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2516 #define OPTION_MASK_ISA_FMA4_SET \
2517 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2518 | OPTION_MASK_ISA_AVX_SET)
2519 #define OPTION_MASK_ISA_XOP_SET \
2520 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2521 #define OPTION_MASK_ISA_LWP_SET \
2524 /* AES and PCLMUL need SSE2 because they use xmm registers */
2525 #define OPTION_MASK_ISA_AES_SET \
2526 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2527 #define OPTION_MASK_ISA_PCLMUL_SET \
2528 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2530 #define OPTION_MASK_ISA_ABM_SET \
2531 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2533 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2534 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2535 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2536 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2537 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2538 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2539 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2541 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2542 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2543 #define OPTION_MASK_ISA_F16C_SET \
2544 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2546 /* Define a set of ISAs which aren't available when a given ISA is
2547 disabled. MMX and SSE ISAs are handled separately. */
2549 #define OPTION_MASK_ISA_MMX_UNSET \
2550 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2551 #define OPTION_MASK_ISA_3DNOW_UNSET \
2552 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2553 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2555 #define OPTION_MASK_ISA_SSE_UNSET \
2556 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2557 #define OPTION_MASK_ISA_SSE2_UNSET \
2558 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2559 #define OPTION_MASK_ISA_SSE3_UNSET \
2560 (OPTION_MASK_ISA_SSE3 \
2561 | OPTION_MASK_ISA_SSSE3_UNSET \
2562 | OPTION_MASK_ISA_SSE4A_UNSET )
2563 #define OPTION_MASK_ISA_SSSE3_UNSET \
2564 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2565 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2566 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2567 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2568 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2569 #define OPTION_MASK_ISA_AVX_UNSET \
2570 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2571 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2572 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2574 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2576 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2578 #define OPTION_MASK_ISA_SSE4A_UNSET \
2579 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2581 #define OPTION_MASK_ISA_FMA4_UNSET \
2582 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2583 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2584 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2586 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2587 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2588 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2589 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2590 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2591 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2592 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2593 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2594 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2595 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2597 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2598 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2599 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2601 /* Vectorization library interface and handlers. */
2602 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2604 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2605 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2607 /* Processor target table, indexed by processor number */
2610 const struct processor_costs *cost; /* Processor costs */
2611 const int align_loop; /* Default alignments. */
2612 const int align_loop_max_skip;
2613 const int align_jump;
2614 const int align_jump_max_skip;
2615 const int align_func;
2618 static const struct ptt processor_target_table[PROCESSOR_max] =
2620 {&i386_cost, 4, 3, 4, 3, 4},
2621 {&i486_cost, 16, 15, 16, 15, 16},
2622 {&pentium_cost, 16, 7, 16, 7, 16},
2623 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2624 {&geode_cost, 0, 0, 0, 0, 0},
2625 {&k6_cost, 32, 7, 32, 7, 32},
2626 {&athlon_cost, 16, 7, 16, 7, 16},
2627 {&pentium4_cost, 0, 0, 0, 0, 0},
2628 {&k8_cost, 16, 7, 16, 7, 16},
2629 {&nocona_cost, 0, 0, 0, 0, 0},
2630 /* Core 2 32-bit. */
2631 {&generic32_cost, 16, 10, 16, 10, 16},
2632 /* Core 2 64-bit. */
2633 {&generic64_cost, 16, 10, 16, 10, 16},
2634 /* Core i7 32-bit. */
2635 {&generic32_cost, 16, 10, 16, 10, 16},
2636 /* Core i7 64-bit. */
2637 {&generic64_cost, 16, 10, 16, 10, 16},
2638 {&generic32_cost, 16, 7, 16, 7, 16},
2639 {&generic64_cost, 16, 10, 16, 10, 16},
2640 {&amdfam10_cost, 32, 24, 32, 7, 32},
2641 {&bdver1_cost, 32, 24, 32, 7, 32},
2642 {&btver1_cost, 32, 24, 32, 7, 32},
2643 {&atom_cost, 16, 7, 16, 7, 16}
2646 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2675 /* Return true if a red-zone is in use. */
2678 ix86_using_red_zone (void)
2680 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2683 /* Implement TARGET_HANDLE_OPTION. */
2686 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2693 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2694 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2698 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2699 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2706 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2707 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2711 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2712 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2722 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2723 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2727 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2728 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2735 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2736 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2740 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2741 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2748 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2749 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2753 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2754 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2761 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2762 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2766 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2767 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2774 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2775 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2779 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2780 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2787 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2788 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2792 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2793 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2800 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2801 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2805 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2806 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2813 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2814 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2818 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2819 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2824 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2825 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2829 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2830 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2836 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2837 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2841 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2842 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2849 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2850 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2854 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2855 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2862 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2863 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2867 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2868 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2875 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2876 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2880 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2881 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2888 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2889 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2893 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2894 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2901 ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2902 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2906 ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2907 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2914 ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2915 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2919 ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2920 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2927 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2928 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2932 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2933 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2940 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2941 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2945 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2946 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2953 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2958 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2966 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2967 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2971 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2972 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2979 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2980 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2984 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2985 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2992 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2993 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2997 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2998 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
3005 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
3006 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
3010 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
3011 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
3018 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
3019 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
3023 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
3024 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
3031 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
3032 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
3036 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
3037 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
3044 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
3045 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
3049 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
3050 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
3059 /* Return a string that documents the current -m options. The caller is
3060 responsible for freeing the string. */
3063 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
3064 const char *fpmath, bool add_nl_p)
3066 struct ix86_target_opts
3068 const char *option; /* option string */
3069 int mask; /* isa mask options */
3072 /* This table is ordered so that options like -msse4.2 that imply
3073 preceding options while match those first. */
3074 static struct ix86_target_opts isa_opts[] =
3076 { "-m64", OPTION_MASK_ISA_64BIT },
3077 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3078 { "-mfma", OPTION_MASK_ISA_FMA },
3079 { "-mxop", OPTION_MASK_ISA_XOP },
3080 { "-mlwp", OPTION_MASK_ISA_LWP },
3081 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3082 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3083 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3084 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3085 { "-msse3", OPTION_MASK_ISA_SSE3 },
3086 { "-msse2", OPTION_MASK_ISA_SSE2 },
3087 { "-msse", OPTION_MASK_ISA_SSE },
3088 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3089 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3090 { "-mmmx", OPTION_MASK_ISA_MMX },
3091 { "-mabm", OPTION_MASK_ISA_ABM },
3092 { "-mbmi", OPTION_MASK_ISA_BMI },
3093 { "-mtbm", OPTION_MASK_ISA_TBM },
3094 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3095 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3096 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3097 { "-maes", OPTION_MASK_ISA_AES },
3098 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3099 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3100 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3101 { "-mf16c", OPTION_MASK_ISA_F16C },
3105 static struct ix86_target_opts flag_opts[] =
3107 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3108 { "-m80387", MASK_80387 },
3109 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3110 { "-malign-double", MASK_ALIGN_DOUBLE },
3111 { "-mcld", MASK_CLD },
3112 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3113 { "-mieee-fp", MASK_IEEE_FP },
3114 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3115 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3116 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3117 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3118 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3119 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3120 { "-mno-red-zone", MASK_NO_RED_ZONE },
3121 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3122 { "-mrecip", MASK_RECIP },
3123 { "-mrtd", MASK_RTD },
3124 { "-msseregparm", MASK_SSEREGPARM },
3125 { "-mstack-arg-probe", MASK_STACK_PROBE },
3126 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3127 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3128 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3129 { "-mvzeroupper", MASK_VZEROUPPER },
3132 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3135 char target_other[40];
3144 memset (opts, '\0', sizeof (opts));
3146 /* Add -march= option. */
3149 opts[num][0] = "-march=";
3150 opts[num++][1] = arch;
3153 /* Add -mtune= option. */
3156 opts[num][0] = "-mtune=";
3157 opts[num++][1] = tune;
3160 /* Pick out the options in isa options. */
3161 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3163 if ((isa & isa_opts[i].mask) != 0)
3165 opts[num++][0] = isa_opts[i].option;
3166 isa &= ~ isa_opts[i].mask;
3170 if (isa && add_nl_p)
3172 opts[num++][0] = isa_other;
3173 sprintf (isa_other, "(other isa: %#x)", isa);
3176 /* Add flag options. */
3177 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3179 if ((flags & flag_opts[i].mask) != 0)
3181 opts[num++][0] = flag_opts[i].option;
3182 flags &= ~ flag_opts[i].mask;
3186 if (flags && add_nl_p)
3188 opts[num++][0] = target_other;
3189 sprintf (target_other, "(other flags: %#x)", flags);
3192 /* Add -fpmath= option. */
3195 opts[num][0] = "-mfpmath=";
3196 opts[num++][1] = fpmath;
3203 gcc_assert (num < ARRAY_SIZE (opts));
3205 /* Size the string. */
3207 sep_len = (add_nl_p) ? 3 : 1;
3208 for (i = 0; i < num; i++)
3211 for (j = 0; j < 2; j++)
3213 len += strlen (opts[i][j]);
3216 /* Build the string. */
3217 ret = ptr = (char *) xmalloc (len);
3220 for (i = 0; i < num; i++)
3224 for (j = 0; j < 2; j++)
3225 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3232 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3240 for (j = 0; j < 2; j++)
3243 memcpy (ptr, opts[i][j], len2[j]);
3245 line_len += len2[j];
3250 gcc_assert (ret + len >= ptr);
3255 /* Return TRUE if software prefetching is beneficial for the
3259 software_prefetching_beneficial_p (void)
3263 case PROCESSOR_GEODE:
3265 case PROCESSOR_ATHLON:
3267 case PROCESSOR_AMDFAM10:
3268 case PROCESSOR_BTVER1:
3276 /* Return true, if profiling code should be emitted before
3277 prologue. Otherwise it returns false.
3278 Note: For x86 with "hotfix" it is sorried. */
3280 ix86_profile_before_prologue (void)
3282 return flag_fentry != 0;
3285 /* Function that is callable from the debugger to print the current
3288 ix86_debug_options (void)
3290 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3291 ix86_arch_string, ix86_tune_string,
3292 ix86_fpmath_string, true);
3296 fprintf (stderr, "%s\n\n", opts);
3300 fputs ("<no options>\n\n", stderr);
3305 /* Override various settings based on options. If MAIN_ARGS_P, the
3306 options are from the command line, otherwise they are from
3310 ix86_option_override_internal (bool main_args_p)
3313 unsigned int ix86_arch_mask, ix86_tune_mask;
3314 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3319 /* Comes from final.c -- no real reason to change it. */
3320 #define MAX_CODE_ALIGN 16
3328 PTA_PREFETCH_SSE = 1 << 4,
3330 PTA_3DNOW_A = 1 << 6,
3334 PTA_POPCNT = 1 << 10,
3336 PTA_SSE4A = 1 << 12,
3337 PTA_NO_SAHF = 1 << 13,
3338 PTA_SSE4_1 = 1 << 14,
3339 PTA_SSE4_2 = 1 << 15,
3341 PTA_PCLMUL = 1 << 17,
3344 PTA_MOVBE = 1 << 20,
3348 PTA_FSGSBASE = 1 << 24,
3349 PTA_RDRND = 1 << 25,
3353 /* if this reaches 32, need to widen struct pta flags below */
3358 const char *const name; /* processor name or nickname. */
3359 const enum processor_type processor;
3360 const enum attr_cpu schedule;
3361 const unsigned /*enum pta_flags*/ flags;
3363 const processor_alias_table[] =
3365 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3366 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3367 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3368 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3369 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3370 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3371 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3372 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3373 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3374 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3375 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3376 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3377 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3379 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3381 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3382 PTA_MMX | PTA_SSE | PTA_SSE2},
3383 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3384 PTA_MMX |PTA_SSE | PTA_SSE2},
3385 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3386 PTA_MMX | PTA_SSE | PTA_SSE2},
3387 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3388 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3389 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3390 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3391 | PTA_CX16 | PTA_NO_SAHF},
3392 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3393 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3394 | PTA_SSSE3 | PTA_CX16},
3395 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3396 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3397 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3398 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3399 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3400 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3401 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3402 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3403 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3404 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3405 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3406 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3407 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3408 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3409 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3410 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3411 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3412 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3413 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3414 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3416 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3417 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3418 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3419 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3420 {"x86-64", PROCESSOR_K8, CPU_K8,
3421 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3422 {"k8", PROCESSOR_K8, CPU_K8,
3423 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3424 | PTA_SSE2 | PTA_NO_SAHF},
3425 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3426 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3427 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3428 {"opteron", PROCESSOR_K8, CPU_K8,
3429 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3430 | PTA_SSE2 | PTA_NO_SAHF},
3431 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3432 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3433 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3434 {"athlon64", PROCESSOR_K8, CPU_K8,
3435 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3436 | PTA_SSE2 | PTA_NO_SAHF},
3437 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3438 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3439 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3440 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3441 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3442 | PTA_SSE2 | PTA_NO_SAHF},
3443 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3444 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3445 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3446 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3447 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3448 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3449 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3450 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3451 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3452 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3453 | PTA_XOP | PTA_LWP},
3454 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3455 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3456 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3457 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3458 0 /* flags are only used for -march switch. */ },
3459 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3460 PTA_64BIT /* flags are only used for -march switch. */ },
3463 int const pta_size = ARRAY_SIZE (processor_alias_table);
3465 /* Set up prefix/suffix so the error messages refer to either the command
3466 line argument, or the attribute(target). */
3475 prefix = "option(\"";
3480 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3481 SUBTARGET_OVERRIDE_OPTIONS;
3484 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3485 SUBSUBTARGET_OVERRIDE_OPTIONS;
3488 /* -fPIC is the default for x86_64. */
3489 if (TARGET_MACHO && TARGET_64BIT)
3492 /* Need to check -mtune=generic first. */
3493 if (ix86_tune_string)
3495 if (!strcmp (ix86_tune_string, "generic")
3496 || !strcmp (ix86_tune_string, "i686")
3497 /* As special support for cross compilers we read -mtune=native
3498 as -mtune=generic. With native compilers we won't see the
3499 -mtune=native, as it was changed by the driver. */
3500 || !strcmp (ix86_tune_string, "native"))
3503 ix86_tune_string = "generic64";
3505 ix86_tune_string = "generic32";
3507 /* If this call is for setting the option attribute, allow the
3508 generic32/generic64 that was previously set. */
3509 else if (!main_args_p
3510 && (!strcmp (ix86_tune_string, "generic32")
3511 || !strcmp (ix86_tune_string, "generic64")))
3513 else if (!strncmp (ix86_tune_string, "generic", 7))
3514 error ("bad value (%s) for %stune=%s %s",
3515 ix86_tune_string, prefix, suffix, sw);
3516 else if (!strcmp (ix86_tune_string, "x86-64"))
3517 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3518 "%stune=k8%s or %stune=generic%s instead as appropriate",
3519 prefix, suffix, prefix, suffix, prefix, suffix);
3523 if (ix86_arch_string)
3524 ix86_tune_string = ix86_arch_string;
3525 if (!ix86_tune_string)
3527 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3528 ix86_tune_defaulted = 1;
3531 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3532 need to use a sensible tune option. */
3533 if (!strcmp (ix86_tune_string, "generic")
3534 || !strcmp (ix86_tune_string, "x86-64")
3535 || !strcmp (ix86_tune_string, "i686"))
3538 ix86_tune_string = "generic64";
3540 ix86_tune_string = "generic32";
3544 if (ix86_stringop_string)
3546 if (!strcmp (ix86_stringop_string, "rep_byte"))
3547 stringop_alg = rep_prefix_1_byte;
3548 else if (!strcmp (ix86_stringop_string, "libcall"))
3549 stringop_alg = libcall;
3550 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3551 stringop_alg = rep_prefix_4_byte;
3552 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3554 /* rep; movq isn't available in 32-bit code. */
3555 stringop_alg = rep_prefix_8_byte;
3556 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3557 stringop_alg = loop_1_byte;
3558 else if (!strcmp (ix86_stringop_string, "loop"))
3559 stringop_alg = loop;
3560 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3561 stringop_alg = unrolled_loop;
3563 error ("bad value (%s) for %sstringop-strategy=%s %s",
3564 ix86_stringop_string, prefix, suffix, sw);
3567 if (!ix86_arch_string)
3568 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3570 ix86_arch_specified = 1;
3572 /* Validate -mabi= value. */
3573 if (ix86_abi_string)
3575 if (strcmp (ix86_abi_string, "sysv") == 0)
3576 ix86_abi = SYSV_ABI;
3577 else if (strcmp (ix86_abi_string, "ms") == 0)
3580 error ("unknown ABI (%s) for %sabi=%s %s",
3581 ix86_abi_string, prefix, suffix, sw);
3584 ix86_abi = DEFAULT_ABI;
3586 if (ix86_cmodel_string != 0)
3588 if (!strcmp (ix86_cmodel_string, "small"))
3589 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3590 else if (!strcmp (ix86_cmodel_string, "medium"))
3591 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3592 else if (!strcmp (ix86_cmodel_string, "large"))
3593 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3595 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3596 else if (!strcmp (ix86_cmodel_string, "32"))
3597 ix86_cmodel = CM_32;
3598 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3599 ix86_cmodel = CM_KERNEL;
3601 error ("bad value (%s) for %scmodel=%s %s",
3602 ix86_cmodel_string, prefix, suffix, sw);
3606 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3607 use of rip-relative addressing. This eliminates fixups that
3608 would otherwise be needed if this object is to be placed in a
3609 DLL, and is essentially just as efficient as direct addressing. */
3610 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3611 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3612 else if (TARGET_64BIT)
3613 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3615 ix86_cmodel = CM_32;
3617 if (ix86_asm_string != 0)
3620 && !strcmp (ix86_asm_string, "intel"))
3621 ix86_asm_dialect = ASM_INTEL;
3622 else if (!strcmp (ix86_asm_string, "att"))
3623 ix86_asm_dialect = ASM_ATT;
3625 error ("bad value (%s) for %sasm=%s %s",
3626 ix86_asm_string, prefix, suffix, sw);
3628 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3629 error ("code model %qs not supported in the %s bit mode",
3630 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3631 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3632 sorry ("%i-bit mode not compiled in",
3633 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3635 for (i = 0; i < pta_size; i++)
3636 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3638 ix86_schedule = processor_alias_table[i].schedule;
3639 ix86_arch = processor_alias_table[i].processor;
3640 /* Default cpu tuning to the architecture. */
3641 ix86_tune = ix86_arch;
3643 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3644 error ("CPU you selected does not support x86-64 "
3647 if (processor_alias_table[i].flags & PTA_MMX
3648 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3649 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3650 if (processor_alias_table[i].flags & PTA_3DNOW
3651 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3652 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3653 if (processor_alias_table[i].flags & PTA_3DNOW_A
3654 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3655 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3656 if (processor_alias_table[i].flags & PTA_SSE
3657 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3658 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3659 if (processor_alias_table[i].flags & PTA_SSE2
3660 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3661 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3662 if (processor_alias_table[i].flags & PTA_SSE3
3663 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3664 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3665 if (processor_alias_table[i].flags & PTA_SSSE3
3666 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3667 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3668 if (processor_alias_table[i].flags & PTA_SSE4_1
3669 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3670 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3671 if (processor_alias_table[i].flags & PTA_SSE4_2
3672 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3673 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3674 if (processor_alias_table[i].flags & PTA_AVX
3675 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3676 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3677 if (processor_alias_table[i].flags & PTA_FMA
3678 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3679 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3680 if (processor_alias_table[i].flags & PTA_SSE4A
3681 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3682 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3683 if (processor_alias_table[i].flags & PTA_FMA4
3684 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3685 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3686 if (processor_alias_table[i].flags & PTA_XOP
3687 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3688 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3689 if (processor_alias_table[i].flags & PTA_LWP
3690 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3691 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3692 if (processor_alias_table[i].flags & PTA_ABM
3693 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3694 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3695 if (processor_alias_table[i].flags & PTA_BMI
3696 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3697 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3698 if (processor_alias_table[i].flags & PTA_TBM
3699 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3700 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3701 if (processor_alias_table[i].flags & PTA_CX16
3702 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3703 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3704 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3705 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3706 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3707 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3708 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3709 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3710 if (processor_alias_table[i].flags & PTA_MOVBE
3711 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3712 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3713 if (processor_alias_table[i].flags & PTA_AES
3714 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3715 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3716 if (processor_alias_table[i].flags & PTA_PCLMUL
3717 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3718 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3719 if (processor_alias_table[i].flags & PTA_FSGSBASE
3720 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3721 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3722 if (processor_alias_table[i].flags & PTA_RDRND
3723 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3724 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3725 if (processor_alias_table[i].flags & PTA_F16C
3726 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3727 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3728 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3729 x86_prefetch_sse = true;
3734 if (!strcmp (ix86_arch_string, "generic"))
3735 error ("generic CPU can be used only for %stune=%s %s",
3736 prefix, suffix, sw);
3737 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3738 error ("bad value (%s) for %sarch=%s %s",
3739 ix86_arch_string, prefix, suffix, sw);
3741 ix86_arch_mask = 1u << ix86_arch;
3742 for (i = 0; i < X86_ARCH_LAST; ++i)
3743 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3745 for (i = 0; i < pta_size; i++)
3746 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3748 ix86_schedule = processor_alias_table[i].schedule;
3749 ix86_tune = processor_alias_table[i].processor;
3752 if (!(processor_alias_table[i].flags & PTA_64BIT))
3754 if (ix86_tune_defaulted)
3756 ix86_tune_string = "x86-64";
3757 for (i = 0; i < pta_size; i++)
3758 if (! strcmp (ix86_tune_string,
3759 processor_alias_table[i].name))
3761 ix86_schedule = processor_alias_table[i].schedule;
3762 ix86_tune = processor_alias_table[i].processor;
3765 error ("CPU you selected does not support x86-64 "
3771 /* Adjust tuning when compiling for 32-bit ABI. */
3774 case PROCESSOR_GENERIC64:
3775 ix86_tune = PROCESSOR_GENERIC32;
3776 ix86_schedule = CPU_PENTIUMPRO;
3779 case PROCESSOR_CORE2_64:
3780 ix86_tune = PROCESSOR_CORE2_32;
3783 case PROCESSOR_COREI7_64:
3784 ix86_tune = PROCESSOR_COREI7_32;
3791 /* Intel CPUs have always interpreted SSE prefetch instructions as
3792 NOPs; so, we can enable SSE prefetch instructions even when
3793 -mtune (rather than -march) points us to a processor that has them.
3794 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3795 higher processors. */
3797 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3798 x86_prefetch_sse = true;
3802 if (ix86_tune_specified && i == pta_size)
3803 error ("bad value (%s) for %stune=%s %s",
3804 ix86_tune_string, prefix, suffix, sw);
3806 ix86_tune_mask = 1u << ix86_tune;
3807 for (i = 0; i < X86_TUNE_LAST; ++i)
3808 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3810 #ifndef USE_IX86_FRAME_POINTER
3811 #define USE_IX86_FRAME_POINTER 0
3814 #ifndef USE_X86_64_FRAME_POINTER
3815 #define USE_X86_64_FRAME_POINTER 0
3818 /* Set the default values for switches whose default depends on TARGET_64BIT
3819 in case they weren't overwritten by command line options. */
3822 if (optimize > 1 && !global_options_set.x_flag_zee)
3824 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3825 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3826 if (flag_asynchronous_unwind_tables == 2)
3827 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3828 if (flag_pcc_struct_return == 2)
3829 flag_pcc_struct_return = 0;
3833 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3834 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3835 if (flag_asynchronous_unwind_tables == 2)
3836 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3837 if (flag_pcc_struct_return == 2)
3838 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3842 ix86_cost = &ix86_size_cost;
3844 ix86_cost = processor_target_table[ix86_tune].cost;
3846 /* Arrange to set up i386_stack_locals for all functions. */
3847 init_machine_status = ix86_init_machine_status;
3849 /* Validate -mregparm= value. */
3850 if (ix86_regparm_string)
3853 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3854 i = atoi (ix86_regparm_string);
3855 if (i < 0 || i > REGPARM_MAX)
3856 error ("%sregparm=%d%s is not between 0 and %d",
3857 prefix, i, suffix, REGPARM_MAX);
3862 ix86_regparm = REGPARM_MAX;
3864 /* If the user has provided any of the -malign-* options,
3865 warn and use that value only if -falign-* is not set.
3866 Remove this code in GCC 3.2 or later. */
3867 if (ix86_align_loops_string)
3869 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3870 prefix, suffix, suffix);
3871 if (align_loops == 0)
3873 i = atoi (ix86_align_loops_string);
3874 if (i < 0 || i > MAX_CODE_ALIGN)
3875 error ("%salign-loops=%d%s is not between 0 and %d",
3876 prefix, i, suffix, MAX_CODE_ALIGN);
3878 align_loops = 1 << i;
3882 if (ix86_align_jumps_string)
3884 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3885 prefix, suffix, suffix);
3886 if (align_jumps == 0)
3888 i = atoi (ix86_align_jumps_string);
3889 if (i < 0 || i > MAX_CODE_ALIGN)
3890 error ("%salign-loops=%d%s is not between 0 and %d",
3891 prefix, i, suffix, MAX_CODE_ALIGN);
3893 align_jumps = 1 << i;
3897 if (ix86_align_funcs_string)
3899 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3900 prefix, suffix, suffix);
3901 if (align_functions == 0)
3903 i = atoi (ix86_align_funcs_string);
3904 if (i < 0 || i > MAX_CODE_ALIGN)
3905 error ("%salign-loops=%d%s is not between 0 and %d",
3906 prefix, i, suffix, MAX_CODE_ALIGN);
3908 align_functions = 1 << i;
3912 /* Default align_* from the processor table. */
3913 if (align_loops == 0)
3915 align_loops = processor_target_table[ix86_tune].align_loop;
3916 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3918 if (align_jumps == 0)
3920 align_jumps = processor_target_table[ix86_tune].align_jump;
3921 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3923 if (align_functions == 0)
3925 align_functions = processor_target_table[ix86_tune].align_func;
3928 /* Validate -mbranch-cost= value, or provide default. */
3929 ix86_branch_cost = ix86_cost->branch_cost;
3930 if (ix86_branch_cost_string)
3932 i = atoi (ix86_branch_cost_string);
3934 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3936 ix86_branch_cost = i;
3938 if (ix86_section_threshold_string)
3940 i = atoi (ix86_section_threshold_string);
3942 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3944 ix86_section_threshold = i;
3947 if (ix86_tls_dialect_string)
3949 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3950 ix86_tls_dialect = TLS_DIALECT_GNU;
3951 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3952 ix86_tls_dialect = TLS_DIALECT_GNU2;
3954 error ("bad value (%s) for %stls-dialect=%s %s",
3955 ix86_tls_dialect_string, prefix, suffix, sw);
3958 if (ix87_precision_string)
3960 i = atoi (ix87_precision_string);
3961 if (i != 32 && i != 64 && i != 80)
3962 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3967 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3969 /* Enable by default the SSE and MMX builtins. Do allow the user to
3970 explicitly disable any of these. In particular, disabling SSE and
3971 MMX for kernel code is extremely useful. */
3972 if (!ix86_arch_specified)
3974 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3975 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3978 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3982 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3984 if (!ix86_arch_specified)
3986 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3988 /* i386 ABI does not specify red zone. It still makes sense to use it
3989 when programmer takes care to stack from being destroyed. */
3990 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3991 target_flags |= MASK_NO_RED_ZONE;
3994 /* Keep nonleaf frame pointers. */
3995 if (flag_omit_frame_pointer)
3996 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3997 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3998 flag_omit_frame_pointer = 1;
4000 /* If we're doing fast math, we don't care about comparison order
4001 wrt NaNs. This lets us use a shorter comparison sequence. */
4002 if (flag_finite_math_only)
4003 target_flags &= ~MASK_IEEE_FP;
4005 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4006 since the insns won't need emulation. */
4007 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
4008 target_flags &= ~MASK_NO_FANCY_MATH_387;
4010 /* Likewise, if the target doesn't have a 387, or we've specified
4011 software floating point, don't use 387 inline intrinsics. */
4013 target_flags |= MASK_NO_FANCY_MATH_387;
4015 /* Turn on MMX builtins for -msse. */
4018 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
4019 x86_prefetch_sse = true;
4022 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
4023 if (TARGET_SSE4_2 || TARGET_ABM)
4024 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
4026 /* Validate -mpreferred-stack-boundary= value or default it to
4027 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4028 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4029 if (ix86_preferred_stack_boundary_string)
4031 int min = (TARGET_64BIT ? 4 : 2);
4032 int max = (TARGET_SEH ? 4 : 12);
4034 i = atoi (ix86_preferred_stack_boundary_string);
4035 if (i < min || i > max)
4038 error ("%spreferred-stack-boundary%s is not supported "
4039 "for this target", prefix, suffix);
4041 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
4042 prefix, i, suffix, min, max);
4045 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
4048 /* Set the default value for -mstackrealign. */
4049 if (ix86_force_align_arg_pointer == -1)
4050 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4052 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4054 /* Validate -mincoming-stack-boundary= value or default it to
4055 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4056 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4057 if (ix86_incoming_stack_boundary_string)
4059 i = atoi (ix86_incoming_stack_boundary_string);
4060 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
4061 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4062 i, TARGET_64BIT ? 4 : 2);
4065 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
4066 ix86_incoming_stack_boundary
4067 = ix86_user_incoming_stack_boundary;
4071 /* Accept -msseregparm only if at least SSE support is enabled. */
4072 if (TARGET_SSEREGPARM
4074 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4076 ix86_fpmath = TARGET_FPMATH_DEFAULT;
4077 if (ix86_fpmath_string != 0)
4079 if (! strcmp (ix86_fpmath_string, "387"))
4080 ix86_fpmath = FPMATH_387;
4081 else if (! strcmp (ix86_fpmath_string, "sse"))
4085 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4086 ix86_fpmath = FPMATH_387;
4089 ix86_fpmath = FPMATH_SSE;
4091 else if (! strcmp (ix86_fpmath_string, "387,sse")
4092 || ! strcmp (ix86_fpmath_string, "387+sse")
4093 || ! strcmp (ix86_fpmath_string, "sse,387")
4094 || ! strcmp (ix86_fpmath_string, "sse+387")
4095 || ! strcmp (ix86_fpmath_string, "both"))
4099 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4100 ix86_fpmath = FPMATH_387;
4102 else if (!TARGET_80387)
4104 warning (0, "387 instruction set disabled, using SSE arithmetics");
4105 ix86_fpmath = FPMATH_SSE;
4108 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4111 error ("bad value (%s) for %sfpmath=%s %s",
4112 ix86_fpmath_string, prefix, suffix, sw);
4115 /* If the i387 is disabled, then do not return values in it. */
4117 target_flags &= ~MASK_FLOAT_RETURNS;
4119 /* Use external vectorized library in vectorizing intrinsics. */
4120 if (ix86_veclibabi_string)
4122 if (strcmp (ix86_veclibabi_string, "svml") == 0)
4123 ix86_veclib_handler = ix86_veclibabi_svml;
4124 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
4125 ix86_veclib_handler = ix86_veclibabi_acml;
4127 error ("unknown vectorization library ABI type (%s) for "
4128 "%sveclibabi=%s %s", ix86_veclibabi_string,
4129 prefix, suffix, sw);
4132 if ((!USE_IX86_FRAME_POINTER
4133 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4134 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4136 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4138 /* ??? Unwind info is not correct around the CFG unless either a frame
4139 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4140 unwind info generation to be aware of the CFG and propagating states
4142 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4143 || flag_exceptions || flag_non_call_exceptions)
4144 && flag_omit_frame_pointer
4145 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4147 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4148 warning (0, "unwind tables currently require either a frame pointer "
4149 "or %saccumulate-outgoing-args%s for correctness",
4151 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4154 /* If stack probes are required, the space used for large function
4155 arguments on the stack must also be probed, so enable
4156 -maccumulate-outgoing-args so this happens in the prologue. */
4157 if (TARGET_STACK_PROBE
4158 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4160 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4161 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4162 "for correctness", prefix, suffix);
4163 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4166 /* For sane SSE instruction set generation we need fcomi instruction.
4167 It is safe to enable all CMOVE instructions. */
4171 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4174 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4175 p = strchr (internal_label_prefix, 'X');
4176 internal_label_prefix_len = p - internal_label_prefix;
4180 /* When scheduling description is not available, disable scheduler pass
4181 so it won't slow down the compilation and make x87 code slower. */
4182 if (!TARGET_SCHEDULE)
4183 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4185 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4186 ix86_cost->simultaneous_prefetches,
4187 global_options.x_param_values,
4188 global_options_set.x_param_values);
4189 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4190 global_options.x_param_values,
4191 global_options_set.x_param_values);
4192 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4193 global_options.x_param_values,
4194 global_options_set.x_param_values);
4195 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4196 global_options.x_param_values,
4197 global_options_set.x_param_values);
4199 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4200 if (flag_prefetch_loop_arrays < 0
4203 && software_prefetching_beneficial_p ())
4204 flag_prefetch_loop_arrays = 1;
4206 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4207 can be optimized to ap = __builtin_next_arg (0). */
4208 if (!TARGET_64BIT && !flag_split_stack)
4209 targetm.expand_builtin_va_start = NULL;
4213 ix86_gen_leave = gen_leave_rex64;
4214 ix86_gen_add3 = gen_adddi3;
4215 ix86_gen_sub3 = gen_subdi3;
4216 ix86_gen_sub3_carry = gen_subdi3_carry;
4217 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4218 ix86_gen_monitor = gen_sse3_monitor64;
4219 ix86_gen_andsp = gen_anddi3;
4220 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4221 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4222 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4226 ix86_gen_leave = gen_leave;
4227 ix86_gen_add3 = gen_addsi3;
4228 ix86_gen_sub3 = gen_subsi3;
4229 ix86_gen_sub3_carry = gen_subsi3_carry;
4230 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4231 ix86_gen_monitor = gen_sse3_monitor;
4232 ix86_gen_andsp = gen_andsi3;
4233 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4234 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4235 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4239 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4241 target_flags |= MASK_CLD & ~target_flags_explicit;
4244 if (!TARGET_64BIT && flag_pic)
4246 if (flag_fentry > 0)
4247 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4251 else if (TARGET_SEH)
4253 if (flag_fentry == 0)
4254 sorry ("-mno-fentry isn%'t compatible with SEH");
4257 else if (flag_fentry < 0)
4259 #if defined(PROFILE_BEFORE_PROLOGUE)
4266 /* Save the initial options in case the user does function specific options */
4268 target_option_default_node = target_option_current_node
4269 = build_target_option_node ();
4273 /* When not optimize for size, enable vzeroupper optimization for
4274 TARGET_AVX with -fexpensive-optimizations. */
4276 && flag_expensive_optimizations
4277 && !(target_flags_explicit & MASK_VZEROUPPER))
4278 target_flags |= MASK_VZEROUPPER;
4282 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4283 target_flags &= ~MASK_VZEROUPPER;
4287 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4290 function_pass_avx256_p (const_rtx val)
4295 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4298 if (GET_CODE (val) == PARALLEL)
4303 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4305 r = XVECEXP (val, 0, i);
4306 if (GET_CODE (r) == EXPR_LIST
4308 && REG_P (XEXP (r, 0))
4309 && (GET_MODE (XEXP (r, 0)) == OImode
4310 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4318 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4321 ix86_option_override (void)
4323 ix86_option_override_internal (true);
4326 /* Update register usage after having seen the compiler flags. */
4329 ix86_conditional_register_usage (void)
4334 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 if (fixed_regs[i] > 1)
4337 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4338 if (call_used_regs[i] > 1)
4339 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4342 /* The PIC register, if it exists, is fixed. */
4343 j = PIC_OFFSET_TABLE_REGNUM;
4344 if (j != INVALID_REGNUM)
4345 fixed_regs[j] = call_used_regs[j] = 1;
4347 /* The MS_ABI changes the set of call-used registers. */
4348 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4350 call_used_regs[SI_REG] = 0;
4351 call_used_regs[DI_REG] = 0;
4352 call_used_regs[XMM6_REG] = 0;
4353 call_used_regs[XMM7_REG] = 0;
4354 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4355 call_used_regs[i] = 0;
4358 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4359 other call-clobbered regs for 64-bit. */
4362 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4364 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4365 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4366 && call_used_regs[i])
4367 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4370 /* If MMX is disabled, squash the registers. */
4372 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4373 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4374 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4376 /* If SSE is disabled, squash the registers. */
4378 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4379 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4380 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4382 /* If the FPU is disabled, squash the registers. */
4383 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4384 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4385 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4388 /* If 32-bit, squash the 64-bit registers. */
4391 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4393 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4399 /* Save the current options */
4402 ix86_function_specific_save (struct cl_target_option *ptr)
4404 ptr->arch = ix86_arch;
4405 ptr->schedule = ix86_schedule;
4406 ptr->tune = ix86_tune;
4407 ptr->fpmath = ix86_fpmath;
4408 ptr->branch_cost = ix86_branch_cost;
4409 ptr->tune_defaulted = ix86_tune_defaulted;
4410 ptr->arch_specified = ix86_arch_specified;
4411 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4412 ptr->ix86_target_flags_explicit = target_flags_explicit;
4414 /* The fields are char but the variables are not; make sure the
4415 values fit in the fields. */
4416 gcc_assert (ptr->arch == ix86_arch);
4417 gcc_assert (ptr->schedule == ix86_schedule);
4418 gcc_assert (ptr->tune == ix86_tune);
4419 gcc_assert (ptr->fpmath == ix86_fpmath);
4420 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4423 /* Restore the current options */
4426 ix86_function_specific_restore (struct cl_target_option *ptr)
4428 enum processor_type old_tune = ix86_tune;
4429 enum processor_type old_arch = ix86_arch;
4430 unsigned int ix86_arch_mask, ix86_tune_mask;
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4437 ix86_branch_cost = ptr->branch_cost;
4438 ix86_tune_defaulted = ptr->tune_defaulted;
4439 ix86_arch_specified = ptr->arch_specified;
4440 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4441 target_flags_explicit = ptr->ix86_target_flags_explicit;
4443 /* Recreate the arch feature tests if the arch changed */
4444 if (old_arch != ix86_arch)
4446 ix86_arch_mask = 1u << ix86_arch;
4447 for (i = 0; i < X86_ARCH_LAST; ++i)
4448 ix86_arch_features[i]
4449 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4452 /* Recreate the tune optimization tests */
4453 if (old_tune != ix86_tune)
4455 ix86_tune_mask = 1u << ix86_tune;
4456 for (i = 0; i < X86_TUNE_LAST; ++i)
4457 ix86_tune_features[i]
4458 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4462 /* Print the current options */
4465 ix86_function_specific_print (FILE *file, int indent,
4466 struct cl_target_option *ptr)
4469 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4470 NULL, NULL, NULL, false);
4472 fprintf (file, "%*sarch = %d (%s)\n",
4475 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4476 ? cpu_names[ptr->arch]
4479 fprintf (file, "%*stune = %d (%s)\n",
4482 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4483 ? cpu_names[ptr->tune]
4486 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4487 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4488 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4489 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4493 fprintf (file, "%*s%s\n", indent, "", target_string);
4494 free (target_string);
4499 /* Inner function to process the attribute((target(...))), take an argument and
4500 set the current options from the argument. If we have a list, recursively go
4504 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4509 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4510 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4511 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4512 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4527 enum ix86_opt_type type;
4532 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4533 IX86_ATTR_ISA ("abm", OPT_mabm),
4534 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4535 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4536 IX86_ATTR_ISA ("aes", OPT_maes),
4537 IX86_ATTR_ISA ("avx", OPT_mavx),
4538 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4539 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4540 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4541 IX86_ATTR_ISA ("sse", OPT_msse),
4542 IX86_ATTR_ISA ("sse2", OPT_msse2),
4543 IX86_ATTR_ISA ("sse3", OPT_msse3),
4544 IX86_ATTR_ISA ("sse4", OPT_msse4),
4545 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4546 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4547 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4548 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4549 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4550 IX86_ATTR_ISA ("xop", OPT_mxop),
4551 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4552 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4553 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4554 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4556 /* string options */
4557 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4558 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4559 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4562 IX86_ATTR_YES ("cld",
4566 IX86_ATTR_NO ("fancy-math-387",
4567 OPT_mfancy_math_387,
4568 MASK_NO_FANCY_MATH_387),
4570 IX86_ATTR_YES ("ieee-fp",
4574 IX86_ATTR_YES ("inline-all-stringops",
4575 OPT_minline_all_stringops,
4576 MASK_INLINE_ALL_STRINGOPS),
4578 IX86_ATTR_YES ("inline-stringops-dynamically",
4579 OPT_minline_stringops_dynamically,
4580 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4582 IX86_ATTR_NO ("align-stringops",
4583 OPT_mno_align_stringops,
4584 MASK_NO_ALIGN_STRINGOPS),
4586 IX86_ATTR_YES ("recip",
4592 /* If this is a list, recurse to get the options. */
4593 if (TREE_CODE (args) == TREE_LIST)
4597 for (; args; args = TREE_CHAIN (args))
4598 if (TREE_VALUE (args)
4599 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4605 else if (TREE_CODE (args) != STRING_CST)
4608 /* Handle multiple arguments separated by commas. */
4609 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4611 while (next_optstr && *next_optstr != '\0')
4613 char *p = next_optstr;
4615 char *comma = strchr (next_optstr, ',');
4616 const char *opt_string;
4617 size_t len, opt_len;
4622 enum ix86_opt_type type = ix86_opt_unknown;
4628 len = comma - next_optstr;
4629 next_optstr = comma + 1;
4637 /* Recognize no-xxx. */
4638 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4647 /* Find the option. */
4650 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4652 type = attrs[i].type;
4653 opt_len = attrs[i].len;
4654 if (ch == attrs[i].string[0]
4655 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4656 && memcmp (p, attrs[i].string, opt_len) == 0)
4659 mask = attrs[i].mask;
4660 opt_string = attrs[i].string;
4665 /* Process the option. */
4668 error ("attribute(target(\"%s\")) is unknown", orig_p);
4672 else if (type == ix86_opt_isa)
4673 ix86_handle_option (opt, p, opt_set_p);
4675 else if (type == ix86_opt_yes || type == ix86_opt_no)
4677 if (type == ix86_opt_no)
4678 opt_set_p = !opt_set_p;
4681 target_flags |= mask;
4683 target_flags &= ~mask;
4686 else if (type == ix86_opt_str)
4690 error ("option(\"%s\") was already specified", opt_string);
4694 p_strings[opt] = xstrdup (p + opt_len);
4704 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4707 ix86_valid_target_attribute_tree (tree args)
4709 const char *orig_arch_string = ix86_arch_string;
4710 const char *orig_tune_string = ix86_tune_string;
4711 const char *orig_fpmath_string = ix86_fpmath_string;
4712 int orig_tune_defaulted = ix86_tune_defaulted;
4713 int orig_arch_specified = ix86_arch_specified;
4714 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4717 struct cl_target_option *def
4718 = TREE_TARGET_OPTION (target_option_default_node);
4720 /* Process each of the options on the chain. */
4721 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4724 /* If the changed options are different from the default, rerun
4725 ix86_option_override_internal, and then save the options away.
4726 The string options are are attribute options, and will be undone
4727 when we copy the save structure. */
4728 if (ix86_isa_flags != def->x_ix86_isa_flags
4729 || target_flags != def->x_target_flags
4730 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4731 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4732 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4734 /* If we are using the default tune= or arch=, undo the string assigned,
4735 and use the default. */
4736 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4737 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4738 else if (!orig_arch_specified)
4739 ix86_arch_string = NULL;
4741 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4742 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4743 else if (orig_tune_defaulted)
4744 ix86_tune_string = NULL;
4746 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4747 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4748 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4749 else if (!TARGET_64BIT && TARGET_SSE)
4750 ix86_fpmath_string = "sse,387";
4752 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4753 ix86_option_override_internal (false);
4755 /* Add any builtin functions with the new isa if any. */
4756 ix86_add_new_builtins (ix86_isa_flags);
4758 /* Save the current options unless we are validating options for
4760 t = build_target_option_node ();
4762 ix86_arch_string = orig_arch_string;
4763 ix86_tune_string = orig_tune_string;
4764 ix86_fpmath_string = orig_fpmath_string;
4766 /* Free up memory allocated to hold the strings */
4767 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4768 if (option_strings[i])
4769 free (option_strings[i]);
4775 /* Hook to validate attribute((target("string"))). */
4778 ix86_valid_target_attribute_p (tree fndecl,
4779 tree ARG_UNUSED (name),
4781 int ARG_UNUSED (flags))
4783 struct cl_target_option cur_target;
4785 tree old_optimize = build_optimization_node ();
4786 tree new_target, new_optimize;
4787 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4789 /* If the function changed the optimization levels as well as setting target
4790 options, start with the optimizations specified. */
4791 if (func_optimize && func_optimize != old_optimize)
4792 cl_optimization_restore (&global_options,
4793 TREE_OPTIMIZATION (func_optimize));
4795 /* The target attributes may also change some optimization flags, so update
4796 the optimization options if necessary. */
4797 cl_target_option_save (&cur_target, &global_options);
4798 new_target = ix86_valid_target_attribute_tree (args);
4799 new_optimize = build_optimization_node ();
4806 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4808 if (old_optimize != new_optimize)
4809 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4812 cl_target_option_restore (&global_options, &cur_target);
4814 if (old_optimize != new_optimize)
4815 cl_optimization_restore (&global_options,
4816 TREE_OPTIMIZATION (old_optimize));
4822 /* Hook to determine if one function can safely inline another. */
4825 ix86_can_inline_p (tree caller, tree callee)
4828 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4829 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4831 /* If callee has no option attributes, then it is ok to inline. */
4835 /* If caller has no option attributes, but callee does then it is not ok to
4837 else if (!caller_tree)
4842 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4843 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4845 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4846 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4848 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4849 != callee_opts->x_ix86_isa_flags)
4852 /* See if we have the same non-isa options. */
4853 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4856 /* See if arch, tune, etc. are the same. */
4857 else if (caller_opts->arch != callee_opts->arch)
4860 else if (caller_opts->tune != callee_opts->tune)
4863 else if (caller_opts->fpmath != callee_opts->fpmath)
4866 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4877 /* Remember the last target of ix86_set_current_function. */
4878 static GTY(()) tree ix86_previous_fndecl;
4880 /* Establish appropriate back-end context for processing the function
4881 FNDECL. The argument might be NULL to indicate processing at top
4882 level, outside of any function scope. */
4884 ix86_set_current_function (tree fndecl)
4886 /* Only change the context if the function changes. This hook is called
4887 several times in the course of compiling a function, and we don't want to
4888 slow things down too much or call target_reinit when it isn't safe. */
4889 if (fndecl && fndecl != ix86_previous_fndecl)
4891 tree old_tree = (ix86_previous_fndecl
4892 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4895 tree new_tree = (fndecl
4896 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4899 ix86_previous_fndecl = fndecl;
4900 if (old_tree == new_tree)
4905 cl_target_option_restore (&global_options,
4906 TREE_TARGET_OPTION (new_tree));
4912 struct cl_target_option *def
4913 = TREE_TARGET_OPTION (target_option_current_node);
4915 cl_target_option_restore (&global_options, def);
4922 /* Return true if this goes in large data/bss. */
4925 ix86_in_large_data_p (tree exp)
4927 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4930 /* Functions are never large data. */
4931 if (TREE_CODE (exp) == FUNCTION_DECL)
4934 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4936 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4937 if (strcmp (section, ".ldata") == 0
4938 || strcmp (section, ".lbss") == 0)
4944 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4946 /* If this is an incomplete type with size 0, then we can't put it
4947 in data because it might be too big when completed. */
4948 if (!size || size > ix86_section_threshold)
4955 /* Switch to the appropriate section for output of DECL.
4956 DECL is either a `VAR_DECL' node or a constant of some sort.
4957 RELOC indicates whether forming the initial value of DECL requires
4958 link-time relocations. */
4960 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4964 x86_64_elf_select_section (tree decl, int reloc,
4965 unsigned HOST_WIDE_INT align)
4967 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4968 && ix86_in_large_data_p (decl))
4970 const char *sname = NULL;
4971 unsigned int flags = SECTION_WRITE;
4972 switch (categorize_decl_for_section (decl, reloc))
4977 case SECCAT_DATA_REL:
4978 sname = ".ldata.rel";
4980 case SECCAT_DATA_REL_LOCAL:
4981 sname = ".ldata.rel.local";
4983 case SECCAT_DATA_REL_RO:
4984 sname = ".ldata.rel.ro";
4986 case SECCAT_DATA_REL_RO_LOCAL:
4987 sname = ".ldata.rel.ro.local";
4991 flags |= SECTION_BSS;
4994 case SECCAT_RODATA_MERGE_STR:
4995 case SECCAT_RODATA_MERGE_STR_INIT:
4996 case SECCAT_RODATA_MERGE_CONST:
5000 case SECCAT_SRODATA:
5007 /* We don't split these for medium model. Place them into
5008 default sections and hope for best. */
5013 /* We might get called with string constants, but get_named_section
5014 doesn't like them as they are not DECLs. Also, we need to set
5015 flags in that case. */
5017 return get_section (sname, flags, NULL);
5018 return get_named_section (decl, sname, reloc);
5021 return default_elf_select_section (decl, reloc, align);
5024 /* Build up a unique section name, expressed as a
5025 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5026 RELOC indicates whether the initial value of EXP requires
5027 link-time relocations. */
5029 static void ATTRIBUTE_UNUSED
5030 x86_64_elf_unique_section (tree decl, int reloc)
5032 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5033 && ix86_in_large_data_p (decl))
5035 const char *prefix = NULL;
5036 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5037 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5039 switch (categorize_decl_for_section (decl, reloc))
5042 case SECCAT_DATA_REL:
5043 case SECCAT_DATA_REL_LOCAL:
5044 case SECCAT_DATA_REL_RO:
5045 case SECCAT_DATA_REL_RO_LOCAL:
5046 prefix = one_only ? ".ld" : ".ldata";
5049 prefix = one_only ? ".lb" : ".lbss";
5052 case SECCAT_RODATA_MERGE_STR:
5053 case SECCAT_RODATA_MERGE_STR_INIT:
5054 case SECCAT_RODATA_MERGE_CONST:
5055 prefix = one_only ? ".lr" : ".lrodata";
5057 case SECCAT_SRODATA:
5064 /* We don't split these for medium model. Place them into
5065 default sections and hope for best. */
5070 const char *name, *linkonce;
5073 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5074 name = targetm.strip_name_encoding (name);
5076 /* If we're using one_only, then there needs to be a .gnu.linkonce
5077 prefix to the section name. */
5078 linkonce = one_only ? ".gnu.linkonce" : "";
5080 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5082 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5086 default_unique_section (decl, reloc);
5089 #ifdef COMMON_ASM_OP
5090 /* This says how to output assembler code to declare an
5091 uninitialized external linkage data object.
5093 For medium model x86-64 we need to use .largecomm opcode for
5096 x86_elf_aligned_common (FILE *file,
5097 const char *name, unsigned HOST_WIDE_INT size,
5100 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5101 && size > (unsigned int)ix86_section_threshold)
5102 fputs (".largecomm\t", file);
5104 fputs (COMMON_ASM_OP, file);
5105 assemble_name (file, name);
5106 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5107 size, align / BITS_PER_UNIT);
5111 /* Utility function for targets to use in implementing
5112 ASM_OUTPUT_ALIGNED_BSS. */
5115 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5116 const char *name, unsigned HOST_WIDE_INT size,
5119 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5120 && size > (unsigned int)ix86_section_threshold)
5121 switch_to_section (get_named_section (decl, ".lbss", 0));
5123 switch_to_section (bss_section);
5124 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5125 #ifdef ASM_DECLARE_OBJECT_NAME
5126 last_assemble_variable_decl = decl;
5127 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5129 /* Standard thing is just output label for the object. */
5130 ASM_OUTPUT_LABEL (file, name);
5131 #endif /* ASM_DECLARE_OBJECT_NAME */
5132 ASM_OUTPUT_SKIP (file, size ? size : 1);
5135 static const struct default_options ix86_option_optimization_table[] =
5137 /* Turn off -fschedule-insns by default. It tends to make the
5138 problem with not enough registers even worse. */
5139 #ifdef INSN_SCHEDULING
5140 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5143 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5144 SUBTARGET_OPTIMIZATION_OPTIONS,
5146 { OPT_LEVELS_NONE, 0, NULL, 0 }
5149 /* Implement TARGET_OPTION_INIT_STRUCT. */
5152 ix86_option_init_struct (struct gcc_options *opts)
5155 /* The Darwin libraries never set errno, so we might as well
5156 avoid calling them when that's the only reason we would. */
5157 opts->x_flag_errno_math = 0;
5159 opts->x_flag_pcc_struct_return = 2;
5160 opts->x_flag_asynchronous_unwind_tables = 2;
5161 opts->x_flag_vect_cost_model = 1;
5164 /* Decide whether we must probe the stack before any space allocation
5165 on this target. It's essentially TARGET_STACK_PROBE except when
5166 -fstack-check causes the stack to be already probed differently. */
5169 ix86_target_stack_probe (void)
5171 /* Do not probe the stack twice if static stack checking is enabled. */
5172 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5175 return TARGET_STACK_PROBE;
5178 /* Decide whether we can make a sibling call to a function. DECL is the
5179 declaration of the function being targeted by the call and EXP is the
5180 CALL_EXPR representing the call. */
5183 ix86_function_ok_for_sibcall (tree decl, tree exp)
5185 tree type, decl_or_type;
5188 /* If we are generating position-independent code, we cannot sibcall
5189 optimize any indirect call, or a direct call to a global function,
5190 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5194 && (!decl || !targetm.binds_local_p (decl)))
5197 /* If we need to align the outgoing stack, then sibcalling would
5198 unalign the stack, which may break the called function. */
5199 if (ix86_minimum_incoming_stack_boundary (true)
5200 < PREFERRED_STACK_BOUNDARY)
5205 decl_or_type = decl;
5206 type = TREE_TYPE (decl);
5210 /* We're looking at the CALL_EXPR, we need the type of the function. */
5211 type = CALL_EXPR_FN (exp); /* pointer expression */
5212 type = TREE_TYPE (type); /* pointer type */
5213 type = TREE_TYPE (type); /* function type */
5214 decl_or_type = type;
5217 /* Check that the return value locations are the same. Like
5218 if we are returning floats on the 80387 register stack, we cannot
5219 make a sibcall from a function that doesn't return a float to a
5220 function that does or, conversely, from a function that does return
5221 a float to a function that doesn't; the necessary stack adjustment
5222 would not be executed. This is also the place we notice
5223 differences in the return value ABI. Note that it is ok for one
5224 of the functions to have void return type as long as the return
5225 value of the other is passed in a register. */
5226 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5227 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5229 if (STACK_REG_P (a) || STACK_REG_P (b))
5231 if (!rtx_equal_p (a, b))
5234 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5236 /* Disable sibcall if we need to generate vzeroupper after
5238 if (TARGET_VZEROUPPER
5239 && cfun->machine->callee_return_avx256_p
5240 && !cfun->machine->caller_return_avx256_p)
5243 else if (!rtx_equal_p (a, b))
5248 /* The SYSV ABI has more call-clobbered registers;
5249 disallow sibcalls from MS to SYSV. */
5250 if (cfun->machine->call_abi == MS_ABI
5251 && ix86_function_type_abi (type) == SYSV_ABI)
5256 /* If this call is indirect, we'll need to be able to use a
5257 call-clobbered register for the address of the target function.
5258 Make sure that all such registers are not used for passing
5259 parameters. Note that DLLIMPORT functions are indirect. */
5261 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5263 if (ix86_function_regparm (type, NULL) >= 3)
5265 /* ??? Need to count the actual number of registers to be used,
5266 not the possible number of registers. Fix later. */
5272 /* Otherwise okay. That also includes certain types of indirect calls. */
5276 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5277 and "sseregparm" calling convention attributes;
5278 arguments as in struct attribute_spec.handler. */
5281 ix86_handle_cconv_attribute (tree *node, tree name,
5283 int flags ATTRIBUTE_UNUSED,
5286 if (TREE_CODE (*node) != FUNCTION_TYPE
5287 && TREE_CODE (*node) != METHOD_TYPE
5288 && TREE_CODE (*node) != FIELD_DECL
5289 && TREE_CODE (*node) != TYPE_DECL)
5291 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5293 *no_add_attrs = true;
5297 /* Can combine regparm with all attributes but fastcall. */
5298 if (is_attribute_p ("regparm", name))
5302 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5304 error ("fastcall and regparm attributes are not compatible");
5307 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5309 error ("regparam and thiscall attributes are not compatible");
5312 cst = TREE_VALUE (args);
5313 if (TREE_CODE (cst) != INTEGER_CST)
5315 warning (OPT_Wattributes,
5316 "%qE attribute requires an integer constant argument",
5318 *no_add_attrs = true;
5320 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5322 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5324 *no_add_attrs = true;
5332 /* Do not warn when emulating the MS ABI. */
5333 if ((TREE_CODE (*node) != FUNCTION_TYPE
5334 && TREE_CODE (*node) != METHOD_TYPE)
5335 || ix86_function_type_abi (*node) != MS_ABI)
5336 warning (OPT_Wattributes, "%qE attribute ignored",
5338 *no_add_attrs = true;
5342 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5343 if (is_attribute_p ("fastcall", name))
5345 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5347 error ("fastcall and cdecl attributes are not compatible");
5349 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5351 error ("fastcall and stdcall attributes are not compatible");
5353 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5355 error ("fastcall and regparm attributes are not compatible");
5357 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5359 error ("fastcall and thiscall attributes are not compatible");
5363 /* Can combine stdcall with fastcall (redundant), regparm and
5365 else if (is_attribute_p ("stdcall", name))
5367 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5369 error ("stdcall and cdecl attributes are not compatible");
5371 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5373 error ("stdcall and fastcall attributes are not compatible");
5375 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5377 error ("stdcall and thiscall attributes are not compatible");
5381 /* Can combine cdecl with regparm and sseregparm. */
5382 else if (is_attribute_p ("cdecl", name))
5384 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5386 error ("stdcall and cdecl attributes are not compatible");
5388 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5390 error ("fastcall and cdecl attributes are not compatible");
5392 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5394 error ("cdecl and thiscall attributes are not compatible");
5397 else if (is_attribute_p ("thiscall", name))
5399 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5400 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5402 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5404 error ("stdcall and thiscall attributes are not compatible");
5406 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5408 error ("fastcall and thiscall attributes are not compatible");
5410 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5412 error ("cdecl and thiscall attributes are not compatible");
5416 /* Can combine sseregparm with all attributes. */
5421 /* Return 0 if the attributes for two types are incompatible, 1 if they
5422 are compatible, and 2 if they are nearly compatible (which causes a
5423 warning to be generated). */
5426 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5428 /* Check for mismatch of non-default calling convention. */
5429 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5431 if (TREE_CODE (type1) != FUNCTION_TYPE
5432 && TREE_CODE (type1) != METHOD_TYPE)
5435 /* Check for mismatched fastcall/regparm types. */
5436 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5437 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5438 || (ix86_function_regparm (type1, NULL)
5439 != ix86_function_regparm (type2, NULL)))
5442 /* Check for mismatched sseregparm types. */
5443 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5444 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5447 /* Check for mismatched thiscall types. */
5448 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5449 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5452 /* Check for mismatched return types (cdecl vs stdcall). */
5453 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5454 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5460 /* Return the regparm value for a function with the indicated TYPE and DECL.
5461 DECL may be NULL when calling function indirectly
5462 or considering a libcall. */
5465 ix86_function_regparm (const_tree type, const_tree decl)
5471 return (ix86_function_type_abi (type) == SYSV_ABI
5472 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5474 regparm = ix86_regparm;
5475 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5478 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5485 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5488 /* Use register calling convention for local functions when possible. */
5490 && TREE_CODE (decl) == FUNCTION_DECL
5492 && !(profile_flag && !flag_fentry))
5494 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5495 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5496 if (i && i->local && i->can_change_signature)
5498 int local_regparm, globals = 0, regno;
5500 /* Make sure no regparm register is taken by a
5501 fixed register variable. */
5502 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5503 if (fixed_regs[local_regparm])
5506 /* We don't want to use regparm(3) for nested functions as
5507 these use a static chain pointer in the third argument. */
5508 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5511 /* In 32-bit mode save a register for the split stack. */
5512 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5515 /* Each fixed register usage increases register pressure,
5516 so less registers should be used for argument passing.
5517 This functionality can be overriden by an explicit
5519 for (regno = 0; regno <= DI_REG; regno++)
5520 if (fixed_regs[regno])
5524 = globals < local_regparm ? local_regparm - globals : 0;
5526 if (local_regparm > regparm)
5527 regparm = local_regparm;
5534 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5535 DFmode (2) arguments in SSE registers for a function with the
5536 indicated TYPE and DECL. DECL may be NULL when calling function
5537 indirectly or considering a libcall. Otherwise return 0. */
5540 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5542 gcc_assert (!TARGET_64BIT);
5544 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5545 by the sseregparm attribute. */
5546 if (TARGET_SSEREGPARM
5547 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5554 error ("calling %qD with attribute sseregparm without "
5555 "SSE/SSE2 enabled", decl);
5557 error ("calling %qT with attribute sseregparm without "
5558 "SSE/SSE2 enabled", type);
5566 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5567 (and DFmode for SSE2) arguments in SSE registers. */
5568 if (decl && TARGET_SSE_MATH && optimize
5569 && !(profile_flag && !flag_fentry))
5571 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5572 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5573 if (i && i->local && i->can_change_signature)
5574 return TARGET_SSE2 ? 2 : 1;
5580 /* Return true if EAX is live at the start of the function. Used by
5581 ix86_expand_prologue to determine if we need special help before
5582 calling allocate_stack_worker. */
5585 ix86_eax_live_at_start_p (void)
5587 /* Cheat. Don't bother working forward from ix86_function_regparm
5588 to the function type to whether an actual argument is located in
5589 eax. Instead just look at cfg info, which is still close enough
5590 to correct at this point. This gives false positives for broken
5591 functions that might use uninitialized data that happens to be
5592 allocated in eax, but who cares? */
5593 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5597 ix86_keep_aggregate_return_pointer (tree fntype)
5601 attr = lookup_attribute ("callee_pop_aggregate_return",
5602 TYPE_ATTRIBUTES (fntype));
5604 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5606 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5609 /* Value is the number of bytes of arguments automatically
5610 popped when returning from a subroutine call.
5611 FUNDECL is the declaration node of the function (as a tree),
5612 FUNTYPE is the data type of the function (as a tree),
5613 or for a library call it is an identifier node for the subroutine name.
5614 SIZE is the number of bytes of arguments passed on the stack.
5616 On the 80386, the RTD insn may be used to pop them if the number
5617 of args is fixed, but if the number is variable then the caller
5618 must pop them all. RTD can't be used for library calls now
5619 because the library is compiled with the Unix compiler.
5620 Use of RTD is a selectable option, since it is incompatible with
5621 standard Unix calling sequences. If the option is not selected,
5622 the caller must always pop the args.
5624 The attribute stdcall is equivalent to RTD on a per module basis. */
5627 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5631 /* None of the 64-bit ABIs pop arguments. */
5635 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5637 /* Cdecl functions override -mrtd, and never pop the stack. */
5638 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5640 /* Stdcall and fastcall functions will pop the stack if not
5642 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5643 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5644 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5647 if (rtd && ! stdarg_p (funtype))
5651 /* Lose any fake structure return argument if it is passed on the stack. */
5652 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5653 && !ix86_keep_aggregate_return_pointer (funtype))
5655 int nregs = ix86_function_regparm (funtype, fundecl);
5657 return GET_MODE_SIZE (Pmode);
5663 /* Argument support functions. */
5665 /* Return true when register may be used to pass function parameters. */
5667 ix86_function_arg_regno_p (int regno)
5670 const int *parm_regs;
5675 return (regno < REGPARM_MAX
5676 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5678 return (regno < REGPARM_MAX
5679 || (TARGET_MMX && MMX_REGNO_P (regno)
5680 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5681 || (TARGET_SSE && SSE_REGNO_P (regno)
5682 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5687 if (SSE_REGNO_P (regno) && TARGET_SSE)
5692 if (TARGET_SSE && SSE_REGNO_P (regno)
5693 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5697 /* TODO: The function should depend on current function ABI but
5698 builtins.c would need updating then. Therefore we use the
5701 /* RAX is used as hidden argument to va_arg functions. */
5702 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5705 if (ix86_abi == MS_ABI)
5706 parm_regs = x86_64_ms_abi_int_parameter_registers;
5708 parm_regs = x86_64_int_parameter_registers;
5709 for (i = 0; i < (ix86_abi == MS_ABI
5710 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5711 if (regno == parm_regs[i])
5716 /* Return if we do not know how to pass TYPE solely in registers. */
5719 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5721 if (must_pass_in_stack_var_size_or_pad (mode, type))
5724 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5725 The layout_type routine is crafty and tries to trick us into passing
5726 currently unsupported vector types on the stack by using TImode. */
5727 return (!TARGET_64BIT && mode == TImode
5728 && type && TREE_CODE (type) != VECTOR_TYPE);
5731 /* It returns the size, in bytes, of the area reserved for arguments passed
5732 in registers for the function represented by fndecl dependent to the used
5735 ix86_reg_parm_stack_space (const_tree fndecl)
5737 enum calling_abi call_abi = SYSV_ABI;
5738 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5739 call_abi = ix86_function_abi (fndecl);
5741 call_abi = ix86_function_type_abi (fndecl);
5742 if (call_abi == MS_ABI)
5747 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5750 ix86_function_type_abi (const_tree fntype)
5752 if (TARGET_64BIT && fntype != NULL)
5754 enum calling_abi abi = ix86_abi;
5755 if (abi == SYSV_ABI)
5757 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5760 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5768 ix86_function_ms_hook_prologue (const_tree fn)
5770 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5772 if (decl_function_context (fn) != NULL_TREE)
5773 error_at (DECL_SOURCE_LOCATION (fn),
5774 "ms_hook_prologue is not compatible with nested function");
5781 static enum calling_abi
5782 ix86_function_abi (const_tree fndecl)
5786 return ix86_function_type_abi (TREE_TYPE (fndecl));
5789 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5792 ix86_cfun_abi (void)
5794 if (! cfun || ! TARGET_64BIT)
5796 return cfun->machine->call_abi;
5799 /* Write the extra assembler code needed to declare a function properly. */
5802 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5805 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5809 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5810 unsigned int filler_cc = 0xcccccccc;
5812 for (i = 0; i < filler_count; i += 4)
5813 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5816 #ifdef SUBTARGET_ASM_UNWIND_INIT
5817 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5820 ASM_OUTPUT_LABEL (asm_out_file, fname);
5822 /* Output magic byte marker, if hot-patch attribute is set. */
5827 /* leaq [%rsp + 0], %rsp */
5828 asm_fprintf (asm_out_file, ASM_BYTE
5829 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5833 /* movl.s %edi, %edi
5835 movl.s %esp, %ebp */
5836 asm_fprintf (asm_out_file, ASM_BYTE
5837 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5843 extern void init_regs (void);
5845 /* Implementation of call abi switching target hook. Specific to FNDECL
5846 the specific call register sets are set. See also
5847 ix86_conditional_register_usage for more details. */
5849 ix86_call_abi_override (const_tree fndecl)
5851 if (fndecl == NULL_TREE)
5852 cfun->machine->call_abi = ix86_abi;
5854 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5857 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5858 re-initialization of init_regs each time we switch function context since
5859 this is needed only during RTL expansion. */
5861 ix86_maybe_switch_abi (void)
5864 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5868 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5869 for a call to a function whose data type is FNTYPE.
5870 For a library call, FNTYPE is 0. */
5873 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5874 tree fntype, /* tree ptr for function decl */
5875 rtx libname, /* SYMBOL_REF of library name or 0 */
5879 struct cgraph_local_info *i;
5882 memset (cum, 0, sizeof (*cum));
5884 /* Initialize for the current callee. */
5887 cfun->machine->callee_pass_avx256_p = false;
5888 cfun->machine->callee_return_avx256_p = false;
5893 i = cgraph_local_info (fndecl);
5894 cum->call_abi = ix86_function_abi (fndecl);
5895 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5900 cum->call_abi = ix86_function_type_abi (fntype);
5902 fnret_type = TREE_TYPE (fntype);
5907 if (TARGET_VZEROUPPER && fnret_type)
5909 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5911 if (function_pass_avx256_p (fnret_value))
5913 /* The return value of this function uses 256bit AVX modes. */
5915 cfun->machine->callee_return_avx256_p = true;
5917 cfun->machine->caller_return_avx256_p = true;
5921 cum->caller = caller;
5923 /* Set up the number of registers to use for passing arguments. */
5925 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5926 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5927 "or subtarget optimization implying it");
5928 cum->nregs = ix86_regparm;
5931 cum->nregs = (cum->call_abi == SYSV_ABI
5932 ? X86_64_REGPARM_MAX
5933 : X86_64_MS_REGPARM_MAX);
5937 cum->sse_nregs = SSE_REGPARM_MAX;
5940 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5941 ? X86_64_SSE_REGPARM_MAX
5942 : X86_64_MS_SSE_REGPARM_MAX);
5946 cum->mmx_nregs = MMX_REGPARM_MAX;
5947 cum->warn_avx = true;
5948 cum->warn_sse = true;
5949 cum->warn_mmx = true;
5951 /* Because type might mismatch in between caller and callee, we need to
5952 use actual type of function for local calls.
5953 FIXME: cgraph_analyze can be told to actually record if function uses
5954 va_start so for local functions maybe_vaarg can be made aggressive
5956 FIXME: once typesytem is fixed, we won't need this code anymore. */
5957 if (i && i->local && i->can_change_signature)
5958 fntype = TREE_TYPE (fndecl);
5959 cum->maybe_vaarg = (fntype
5960 ? (!prototype_p (fntype) || stdarg_p (fntype))
5965 /* If there are variable arguments, then we won't pass anything
5966 in registers in 32-bit mode. */
5967 if (stdarg_p (fntype))
5978 /* Use ecx and edx registers if function has fastcall attribute,
5979 else look for regparm information. */
5982 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5985 cum->fastcall = 1; /* Same first register as in fastcall. */
5987 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5993 cum->nregs = ix86_function_regparm (fntype, fndecl);
5996 /* Set up the number of SSE registers used for passing SFmode
5997 and DFmode arguments. Warn for mismatching ABI. */
5998 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6002 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6003 But in the case of vector types, it is some vector mode.
6005 When we have only some of our vector isa extensions enabled, then there
6006 are some modes for which vector_mode_supported_p is false. For these
6007 modes, the generic vector support in gcc will choose some non-vector mode
6008 in order to implement the type. By computing the natural mode, we'll
6009 select the proper ABI location for the operand and not depend on whatever
6010 the middle-end decides to do with these vector types.
6012 The midde-end can't deal with the vector types > 16 bytes. In this
6013 case, we return the original mode and warn ABI change if CUM isn't
6016 static enum machine_mode
6017 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
6019 enum machine_mode mode = TYPE_MODE (type);
6021 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6023 HOST_WIDE_INT size = int_size_in_bytes (type);
6024 if ((size == 8 || size == 16 || size == 32)
6025 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6026 && TYPE_VECTOR_SUBPARTS (type) > 1)
6028 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6030 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6031 mode = MIN_MODE_VECTOR_FLOAT;
6033 mode = MIN_MODE_VECTOR_INT;
6035 /* Get the mode which has this inner mode and number of units. */
6036 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6037 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6038 && GET_MODE_INNER (mode) == innermode)
6040 if (size == 32 && !TARGET_AVX)
6042 static bool warnedavx;
6049 warning (0, "AVX vector argument without AVX "
6050 "enabled changes the ABI");
6052 return TYPE_MODE (type);
6065 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6066 this may not agree with the mode that the type system has chosen for the
6067 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6068 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6071 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6076 if (orig_mode != BLKmode)
6077 tmp = gen_rtx_REG (orig_mode, regno);
6080 tmp = gen_rtx_REG (mode, regno);
6081 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6082 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6088 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6089 of this code is to classify each 8bytes of incoming argument by the register
6090 class and assign registers accordingly. */
6092 /* Return the union class of CLASS1 and CLASS2.
6093 See the x86-64 PS ABI for details. */
6095 static enum x86_64_reg_class
6096 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6098 /* Rule #1: If both classes are equal, this is the resulting class. */
6099 if (class1 == class2)
6102 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6104 if (class1 == X86_64_NO_CLASS)
6106 if (class2 == X86_64_NO_CLASS)
6109 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6110 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6111 return X86_64_MEMORY_CLASS;
6113 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6114 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6115 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6116 return X86_64_INTEGERSI_CLASS;
6117 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6118 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6119 return X86_64_INTEGER_CLASS;
6121 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6123 if (class1 == X86_64_X87_CLASS
6124 || class1 == X86_64_X87UP_CLASS
6125 || class1 == X86_64_COMPLEX_X87_CLASS
6126 || class2 == X86_64_X87_CLASS
6127 || class2 == X86_64_X87UP_CLASS
6128 || class2 == X86_64_COMPLEX_X87_CLASS)
6129 return X86_64_MEMORY_CLASS;
6131 /* Rule #6: Otherwise class SSE is used. */
6132 return X86_64_SSE_CLASS;
6135 /* Classify the argument of type TYPE and mode MODE.
6136 CLASSES will be filled by the register class used to pass each word
6137 of the operand. The number of words is returned. In case the parameter
6138 should be passed in memory, 0 is returned. As a special case for zero
6139 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6141 BIT_OFFSET is used internally for handling records and specifies offset
6142 of the offset in bits modulo 256 to avoid overflow cases.
6144 See the x86-64 PS ABI for details.
6148 classify_argument (enum machine_mode mode, const_tree type,
6149 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6151 HOST_WIDE_INT bytes =
6152 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6153 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6155 /* Variable sized entities are always passed/returned in memory. */
6159 if (mode != VOIDmode
6160 && targetm.calls.must_pass_in_stack (mode, type))
6163 if (type && AGGREGATE_TYPE_P (type))
6167 enum x86_64_reg_class subclasses[MAX_CLASSES];
6169 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6173 for (i = 0; i < words; i++)
6174 classes[i] = X86_64_NO_CLASS;
6176 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6177 signalize memory class, so handle it as special case. */
6180 classes[0] = X86_64_NO_CLASS;
6184 /* Classify each field of record and merge classes. */
6185 switch (TREE_CODE (type))
6188 /* And now merge the fields of structure. */
6189 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6191 if (TREE_CODE (field) == FIELD_DECL)
6195 if (TREE_TYPE (field) == error_mark_node)
6198 /* Bitfields are always classified as integer. Handle them
6199 early, since later code would consider them to be
6200 misaligned integers. */
6201 if (DECL_BIT_FIELD (field))
6203 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6204 i < ((int_bit_position (field) + (bit_offset % 64))
6205 + tree_low_cst (DECL_SIZE (field), 0)
6208 merge_classes (X86_64_INTEGER_CLASS,
6215 type = TREE_TYPE (field);
6217 /* Flexible array member is ignored. */
6218 if (TYPE_MODE (type) == BLKmode
6219 && TREE_CODE (type) == ARRAY_TYPE
6220 && TYPE_SIZE (type) == NULL_TREE
6221 && TYPE_DOMAIN (type) != NULL_TREE
6222 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6227 if (!warned && warn_psabi)
6230 inform (input_location,
6231 "the ABI of passing struct with"
6232 " a flexible array member has"
6233 " changed in GCC 4.4");
6237 num = classify_argument (TYPE_MODE (type), type,
6239 (int_bit_position (field)
6240 + bit_offset) % 256);
6243 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6244 for (i = 0; i < num && (i + pos) < words; i++)
6246 merge_classes (subclasses[i], classes[i + pos]);
6253 /* Arrays are handled as small records. */
6256 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6257 TREE_TYPE (type), subclasses, bit_offset);
6261 /* The partial classes are now full classes. */
6262 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6263 subclasses[0] = X86_64_SSE_CLASS;
6264 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6265 && !((bit_offset % 64) == 0 && bytes == 4))
6266 subclasses[0] = X86_64_INTEGER_CLASS;
6268 for (i = 0; i < words; i++)
6269 classes[i] = subclasses[i % num];
6274 case QUAL_UNION_TYPE:
6275 /* Unions are similar to RECORD_TYPE but offset is always 0.
6277 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6279 if (TREE_CODE (field) == FIELD_DECL)
6283 if (TREE_TYPE (field) == error_mark_node)
6286 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6287 TREE_TYPE (field), subclasses,
6291 for (i = 0; i < num; i++)
6292 classes[i] = merge_classes (subclasses[i], classes[i]);
6303 /* When size > 16 bytes, if the first one isn't
6304 X86_64_SSE_CLASS or any other ones aren't
6305 X86_64_SSEUP_CLASS, everything should be passed in
6307 if (classes[0] != X86_64_SSE_CLASS)
6310 for (i = 1; i < words; i++)
6311 if (classes[i] != X86_64_SSEUP_CLASS)
6315 /* Final merger cleanup. */
6316 for (i = 0; i < words; i++)
6318 /* If one class is MEMORY, everything should be passed in
6320 if (classes[i] == X86_64_MEMORY_CLASS)
6323 /* The X86_64_SSEUP_CLASS should be always preceded by
6324 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6325 if (classes[i] == X86_64_SSEUP_CLASS
6326 && classes[i - 1] != X86_64_SSE_CLASS
6327 && classes[i - 1] != X86_64_SSEUP_CLASS)
6329 /* The first one should never be X86_64_SSEUP_CLASS. */
6330 gcc_assert (i != 0);
6331 classes[i] = X86_64_SSE_CLASS;
6334 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6335 everything should be passed in memory. */
6336 if (classes[i] == X86_64_X87UP_CLASS
6337 && (classes[i - 1] != X86_64_X87_CLASS))
6341 /* The first one should never be X86_64_X87UP_CLASS. */
6342 gcc_assert (i != 0);
6343 if (!warned && warn_psabi)
6346 inform (input_location,
6347 "the ABI of passing union with long double"
6348 " has changed in GCC 4.4");
6356 /* Compute alignment needed. We align all types to natural boundaries with
6357 exception of XFmode that is aligned to 64bits. */
6358 if (mode != VOIDmode && mode != BLKmode)
6360 int mode_alignment = GET_MODE_BITSIZE (mode);
6363 mode_alignment = 128;
6364 else if (mode == XCmode)
6365 mode_alignment = 256;
6366 if (COMPLEX_MODE_P (mode))
6367 mode_alignment /= 2;
6368 /* Misaligned fields are always returned in memory. */
6369 if (bit_offset % mode_alignment)
6373 /* for V1xx modes, just use the base mode */
6374 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6375 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6376 mode = GET_MODE_INNER (mode);
6378 /* Classification of atomic types. */
6383 classes[0] = X86_64_SSE_CLASS;
6386 classes[0] = X86_64_SSE_CLASS;
6387 classes[1] = X86_64_SSEUP_CLASS;
6397 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6401 classes[0] = X86_64_INTEGERSI_CLASS;
6404 else if (size <= 64)
6406 classes[0] = X86_64_INTEGER_CLASS;
6409 else if (size <= 64+32)
6411 classes[0] = X86_64_INTEGER_CLASS;
6412 classes[1] = X86_64_INTEGERSI_CLASS;
6415 else if (size <= 64+64)
6417 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6425 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6429 /* OImode shouldn't be used directly. */
6434 if (!(bit_offset % 64))
6435 classes[0] = X86_64_SSESF_CLASS;
6437 classes[0] = X86_64_SSE_CLASS;
6440 classes[0] = X86_64_SSEDF_CLASS;
6443 classes[0] = X86_64_X87_CLASS;
6444 classes[1] = X86_64_X87UP_CLASS;
6447 classes[0] = X86_64_SSE_CLASS;
6448 classes[1] = X86_64_SSEUP_CLASS;
6451 classes[0] = X86_64_SSE_CLASS;
6452 if (!(bit_offset % 64))
6458 if (!warned && warn_psabi)
6461 inform (input_location,
6462 "the ABI of passing structure with complex float"
6463 " member has changed in GCC 4.4");
6465 classes[1] = X86_64_SSESF_CLASS;
6469 classes[0] = X86_64_SSEDF_CLASS;
6470 classes[1] = X86_64_SSEDF_CLASS;
6473 classes[0] = X86_64_COMPLEX_X87_CLASS;
6476 /* This modes is larger than 16 bytes. */
6484 classes[0] = X86_64_SSE_CLASS;
6485 classes[1] = X86_64_SSEUP_CLASS;
6486 classes[2] = X86_64_SSEUP_CLASS;
6487 classes[3] = X86_64_SSEUP_CLASS;
6495 classes[0] = X86_64_SSE_CLASS;
6496 classes[1] = X86_64_SSEUP_CLASS;
6504 classes[0] = X86_64_SSE_CLASS;
6510 gcc_assert (VECTOR_MODE_P (mode));
6515 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6517 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6518 classes[0] = X86_64_INTEGERSI_CLASS;
6520 classes[0] = X86_64_INTEGER_CLASS;
6521 classes[1] = X86_64_INTEGER_CLASS;
6522 return 1 + (bytes > 8);
6526 /* Examine the argument and return set number of register required in each
6527 class. Return 0 iff parameter should be passed in memory. */
6529 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6530 int *int_nregs, int *sse_nregs)
6532 enum x86_64_reg_class regclass[MAX_CLASSES];
6533 int n = classify_argument (mode, type, regclass, 0);
6539 for (n--; n >= 0; n--)
6540 switch (regclass[n])
6542 case X86_64_INTEGER_CLASS:
6543 case X86_64_INTEGERSI_CLASS:
6546 case X86_64_SSE_CLASS:
6547 case X86_64_SSESF_CLASS:
6548 case X86_64_SSEDF_CLASS:
6551 case X86_64_NO_CLASS:
6552 case X86_64_SSEUP_CLASS:
6554 case X86_64_X87_CLASS:
6555 case X86_64_X87UP_CLASS:
6559 case X86_64_COMPLEX_X87_CLASS:
6560 return in_return ? 2 : 0;
6561 case X86_64_MEMORY_CLASS:
6567 /* Construct container for the argument used by GCC interface. See
6568 FUNCTION_ARG for the detailed description. */
6571 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6572 const_tree type, int in_return, int nintregs, int nsseregs,
6573 const int *intreg, int sse_regno)
6575 /* The following variables hold the static issued_error state. */
6576 static bool issued_sse_arg_error;
6577 static bool issued_sse_ret_error;
6578 static bool issued_x87_ret_error;
6580 enum machine_mode tmpmode;
6582 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6583 enum x86_64_reg_class regclass[MAX_CLASSES];
6587 int needed_sseregs, needed_intregs;
6588 rtx exp[MAX_CLASSES];
6591 n = classify_argument (mode, type, regclass, 0);
6594 if (!examine_argument (mode, type, in_return, &needed_intregs,
6597 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6600 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6601 some less clueful developer tries to use floating-point anyway. */
6602 if (needed_sseregs && !TARGET_SSE)
6606 if (!issued_sse_ret_error)
6608 error ("SSE register return with SSE disabled");
6609 issued_sse_ret_error = true;
6612 else if (!issued_sse_arg_error)
6614 error ("SSE register argument with SSE disabled");
6615 issued_sse_arg_error = true;
6620 /* Likewise, error if the ABI requires us to return values in the
6621 x87 registers and the user specified -mno-80387. */
6622 if (!TARGET_80387 && in_return)
6623 for (i = 0; i < n; i++)
6624 if (regclass[i] == X86_64_X87_CLASS
6625 || regclass[i] == X86_64_X87UP_CLASS
6626 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6628 if (!issued_x87_ret_error)
6630 error ("x87 register return with x87 disabled");
6631 issued_x87_ret_error = true;
6636 /* First construct simple cases. Avoid SCmode, since we want to use
6637 single register to pass this type. */
6638 if (n == 1 && mode != SCmode)
6639 switch (regclass[0])
6641 case X86_64_INTEGER_CLASS:
6642 case X86_64_INTEGERSI_CLASS:
6643 return gen_rtx_REG (mode, intreg[0]);
6644 case X86_64_SSE_CLASS:
6645 case X86_64_SSESF_CLASS:
6646 case X86_64_SSEDF_CLASS:
6647 if (mode != BLKmode)
6648 return gen_reg_or_parallel (mode, orig_mode,
6649 SSE_REGNO (sse_regno));
6651 case X86_64_X87_CLASS:
6652 case X86_64_COMPLEX_X87_CLASS:
6653 return gen_rtx_REG (mode, FIRST_STACK_REG);
6654 case X86_64_NO_CLASS:
6655 /* Zero sized array, struct or class. */
6660 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6661 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6662 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6664 && regclass[0] == X86_64_SSE_CLASS
6665 && regclass[1] == X86_64_SSEUP_CLASS
6666 && regclass[2] == X86_64_SSEUP_CLASS
6667 && regclass[3] == X86_64_SSEUP_CLASS
6669 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6672 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6673 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6674 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6675 && regclass[1] == X86_64_INTEGER_CLASS
6676 && (mode == CDImode || mode == TImode || mode == TFmode)
6677 && intreg[0] + 1 == intreg[1])
6678 return gen_rtx_REG (mode, intreg[0]);
6680 /* Otherwise figure out the entries of the PARALLEL. */
6681 for (i = 0; i < n; i++)
6685 switch (regclass[i])
6687 case X86_64_NO_CLASS:
6689 case X86_64_INTEGER_CLASS:
6690 case X86_64_INTEGERSI_CLASS:
6691 /* Merge TImodes on aligned occasions here too. */
6692 if (i * 8 + 8 > bytes)
6693 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6694 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6698 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6699 if (tmpmode == BLKmode)
6701 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6702 gen_rtx_REG (tmpmode, *intreg),
6706 case X86_64_SSESF_CLASS:
6707 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6708 gen_rtx_REG (SFmode,
6709 SSE_REGNO (sse_regno)),
6713 case X86_64_SSEDF_CLASS:
6714 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6715 gen_rtx_REG (DFmode,
6716 SSE_REGNO (sse_regno)),
6720 case X86_64_SSE_CLASS:
6728 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6738 && regclass[1] == X86_64_SSEUP_CLASS
6739 && regclass[2] == X86_64_SSEUP_CLASS
6740 && regclass[3] == X86_64_SSEUP_CLASS);
6747 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6748 gen_rtx_REG (tmpmode,
6749 SSE_REGNO (sse_regno)),
6758 /* Empty aligned struct, union or class. */
6762 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6763 for (i = 0; i < nexps; i++)
6764 XVECEXP (ret, 0, i) = exp [i];
6768 /* Update the data in CUM to advance over an argument of mode MODE
6769 and data type TYPE. (TYPE is null for libcalls where that information
6770 may not be available.) */
6773 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6774 const_tree type, HOST_WIDE_INT bytes,
6775 HOST_WIDE_INT words)
6791 cum->words += words;
6792 cum->nregs -= words;
6793 cum->regno += words;
6795 if (cum->nregs <= 0)
6803 /* OImode shouldn't be used directly. */
6807 if (cum->float_in_sse < 2)
6810 if (cum->float_in_sse < 1)
6827 if (!type || !AGGREGATE_TYPE_P (type))
6829 cum->sse_words += words;
6830 cum->sse_nregs -= 1;
6831 cum->sse_regno += 1;
6832 if (cum->sse_nregs <= 0)
6846 if (!type || !AGGREGATE_TYPE_P (type))
6848 cum->mmx_words += words;
6849 cum->mmx_nregs -= 1;
6850 cum->mmx_regno += 1;
6851 if (cum->mmx_nregs <= 0)
6862 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6863 const_tree type, HOST_WIDE_INT words, bool named)
6865 int int_nregs, sse_nregs;
6867 /* Unnamed 256bit vector mode parameters are passed on stack. */
6868 if (!named && VALID_AVX256_REG_MODE (mode))
6871 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6872 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6874 cum->nregs -= int_nregs;
6875 cum->sse_nregs -= sse_nregs;
6876 cum->regno += int_nregs;
6877 cum->sse_regno += sse_nregs;
6881 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6882 cum->words = (cum->words + align - 1) & ~(align - 1);
6883 cum->words += words;
6888 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6889 HOST_WIDE_INT words)
6891 /* Otherwise, this should be passed indirect. */
6892 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6894 cum->words += words;
6902 /* Update the data in CUM to advance over an argument of mode MODE and
6903 data type TYPE. (TYPE is null for libcalls where that information
6904 may not be available.) */
6907 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6908 const_tree type, bool named)
6910 HOST_WIDE_INT bytes, words;
6912 if (mode == BLKmode)
6913 bytes = int_size_in_bytes (type);
6915 bytes = GET_MODE_SIZE (mode);
6916 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6919 mode = type_natural_mode (type, NULL);
6921 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6922 function_arg_advance_ms_64 (cum, bytes, words);
6923 else if (TARGET_64BIT)
6924 function_arg_advance_64 (cum, mode, type, words, named);
6926 function_arg_advance_32 (cum, mode, type, bytes, words);
6929 /* Define where to put the arguments to a function.
6930 Value is zero to push the argument on the stack,
6931 or a hard register in which to store the argument.
6933 MODE is the argument's machine mode.
6934 TYPE is the data type of the argument (as a tree).
6935 This is null for libcalls where that information may
6937 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6938 the preceding args and about the function being called.
6939 NAMED is nonzero if this argument is a named parameter
6940 (otherwise it is an extra parameter matching an ellipsis). */
6943 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6944 enum machine_mode orig_mode, const_tree type,
6945 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6947 static bool warnedsse, warnedmmx;
6949 /* Avoid the AL settings for the Unix64 ABI. */
6950 if (mode == VOIDmode)
6966 if (words <= cum->nregs)
6968 int regno = cum->regno;
6970 /* Fastcall allocates the first two DWORD (SImode) or
6971 smaller arguments to ECX and EDX if it isn't an
6977 || (type && AGGREGATE_TYPE_P (type)))
6980 /* ECX not EAX is the first allocated register. */
6981 if (regno == AX_REG)
6984 return gen_rtx_REG (mode, regno);
6989 if (cum->float_in_sse < 2)
6992 if (cum->float_in_sse < 1)
6996 /* In 32bit, we pass TImode in xmm registers. */
7003 if (!type || !AGGREGATE_TYPE_P (type))
7005 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
7008 warning (0, "SSE vector argument without SSE enabled "
7012 return gen_reg_or_parallel (mode, orig_mode,
7013 cum->sse_regno + FIRST_SSE_REG);
7018 /* OImode shouldn't be used directly. */
7027 if (!type || !AGGREGATE_TYPE_P (type))
7030 return gen_reg_or_parallel (mode, orig_mode,
7031 cum->sse_regno + FIRST_SSE_REG);
7041 if (!type || !AGGREGATE_TYPE_P (type))
7043 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
7046 warning (0, "MMX vector argument without MMX enabled "
7050 return gen_reg_or_parallel (mode, orig_mode,
7051 cum->mmx_regno + FIRST_MMX_REG);
7060 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7061 enum machine_mode orig_mode, const_tree type, bool named)
7063 /* Handle a hidden AL argument containing number of registers
7064 for varargs x86-64 functions. */
7065 if (mode == VOIDmode)
7066 return GEN_INT (cum->maybe_vaarg
7067 ? (cum->sse_nregs < 0
7068 ? X86_64_SSE_REGPARM_MAX
7083 /* Unnamed 256bit vector mode parameters are passed on stack. */
7089 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7091 &x86_64_int_parameter_registers [cum->regno],
7096 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7097 enum machine_mode orig_mode, bool named,
7098 HOST_WIDE_INT bytes)
7102 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7103 We use value of -2 to specify that current function call is MSABI. */
7104 if (mode == VOIDmode)
7105 return GEN_INT (-2);
7107 /* If we've run out of registers, it goes on the stack. */
7108 if (cum->nregs == 0)
7111 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7113 /* Only floating point modes are passed in anything but integer regs. */
7114 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7117 regno = cum->regno + FIRST_SSE_REG;
7122 /* Unnamed floating parameters are passed in both the
7123 SSE and integer registers. */
7124 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7125 t2 = gen_rtx_REG (mode, regno);
7126 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7127 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7128 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7131 /* Handle aggregated types passed in register. */
7132 if (orig_mode == BLKmode)
7134 if (bytes > 0 && bytes <= 8)
7135 mode = (bytes > 4 ? DImode : SImode);
7136 if (mode == BLKmode)
7140 return gen_reg_or_parallel (mode, orig_mode, regno);
7143 /* Return where to put the arguments to a function.
7144 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7146 MODE is the argument's machine mode. TYPE is the data type of the
7147 argument. It is null for libcalls where that information may not be
7148 available. CUM gives information about the preceding args and about
7149 the function being called. NAMED is nonzero if this argument is a
7150 named parameter (otherwise it is an extra parameter matching an
7154 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7155 const_tree type, bool named)
7157 enum machine_mode mode = omode;
7158 HOST_WIDE_INT bytes, words;
7161 if (mode == BLKmode)
7162 bytes = int_size_in_bytes (type);
7164 bytes = GET_MODE_SIZE (mode);
7165 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7167 /* To simplify the code below, represent vector types with a vector mode
7168 even if MMX/SSE are not active. */
7169 if (type && TREE_CODE (type) == VECTOR_TYPE)
7170 mode = type_natural_mode (type, cum);
7172 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7173 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7174 else if (TARGET_64BIT)
7175 arg = function_arg_64 (cum, mode, omode, type, named);
7177 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7179 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7181 /* This argument uses 256bit AVX modes. */
7183 cfun->machine->callee_pass_avx256_p = true;
7185 cfun->machine->caller_pass_avx256_p = true;
7191 /* A C expression that indicates when an argument must be passed by
7192 reference. If nonzero for an argument, a copy of that argument is
7193 made in memory and a pointer to the argument is passed instead of
7194 the argument itself. The pointer is passed in whatever way is
7195 appropriate for passing a pointer to that type. */
7198 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7199 enum machine_mode mode ATTRIBUTE_UNUSED,
7200 const_tree type, bool named ATTRIBUTE_UNUSED)
7202 /* See Windows x64 Software Convention. */
7203 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7205 int msize = (int) GET_MODE_SIZE (mode);
7208 /* Arrays are passed by reference. */
7209 if (TREE_CODE (type) == ARRAY_TYPE)
7212 if (AGGREGATE_TYPE_P (type))
7214 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7215 are passed by reference. */
7216 msize = int_size_in_bytes (type);
7220 /* __m128 is passed by reference. */
7222 case 1: case 2: case 4: case 8:
7228 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7234 /* Return true when TYPE should be 128bit aligned for 32bit argument
7235 passing ABI. XXX: This function is obsolete and is only used for
7236 checking psABI compatibility with previous versions of GCC. */
7239 ix86_compat_aligned_value_p (const_tree type)
7241 enum machine_mode mode = TYPE_MODE (type);
7242 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7246 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7248 if (TYPE_ALIGN (type) < 128)
7251 if (AGGREGATE_TYPE_P (type))
7253 /* Walk the aggregates recursively. */
7254 switch (TREE_CODE (type))
7258 case QUAL_UNION_TYPE:
7262 /* Walk all the structure fields. */
7263 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7265 if (TREE_CODE (field) == FIELD_DECL
7266 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7273 /* Just for use if some languages passes arrays by value. */
7274 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7285 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7286 XXX: This function is obsolete and is only used for checking psABI
7287 compatibility with previous versions of GCC. */
7290 ix86_compat_function_arg_boundary (enum machine_mode mode,
7291 const_tree type, unsigned int align)
7293 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7294 natural boundaries. */
7295 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7297 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7298 make an exception for SSE modes since these require 128bit
7301 The handling here differs from field_alignment. ICC aligns MMX
7302 arguments to 4 byte boundaries, while structure fields are aligned
7303 to 8 byte boundaries. */
7306 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7307 align = PARM_BOUNDARY;
7311 if (!ix86_compat_aligned_value_p (type))
7312 align = PARM_BOUNDARY;
7315 if (align > BIGGEST_ALIGNMENT)
7316 align = BIGGEST_ALIGNMENT;
7320 /* Return true when TYPE should be 128bit aligned for 32bit argument
7324 ix86_contains_aligned_value_p (const_tree type)
7326 enum machine_mode mode = TYPE_MODE (type);
7328 if (mode == XFmode || mode == XCmode)
7331 if (TYPE_ALIGN (type) < 128)
7334 if (AGGREGATE_TYPE_P (type))
7336 /* Walk the aggregates recursively. */
7337 switch (TREE_CODE (type))
7341 case QUAL_UNION_TYPE:
7345 /* Walk all the structure fields. */
7346 for (field = TYPE_FIELDS (type);
7348 field = DECL_CHAIN (field))
7350 if (TREE_CODE (field) == FIELD_DECL
7351 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7358 /* Just for use if some languages passes arrays by value. */
7359 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7368 return TYPE_ALIGN (type) >= 128;
7373 /* Gives the alignment boundary, in bits, of an argument with the
7374 specified mode and type. */
7377 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7382 /* Since the main variant type is used for call, we convert it to
7383 the main variant type. */
7384 type = TYPE_MAIN_VARIANT (type);
7385 align = TYPE_ALIGN (type);
7388 align = GET_MODE_ALIGNMENT (mode);
7389 if (align < PARM_BOUNDARY)
7390 align = PARM_BOUNDARY;
7394 unsigned int saved_align = align;
7398 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7401 if (mode == XFmode || mode == XCmode)
7402 align = PARM_BOUNDARY;
7404 else if (!ix86_contains_aligned_value_p (type))
7405 align = PARM_BOUNDARY;
7408 align = PARM_BOUNDARY;
7413 && align != ix86_compat_function_arg_boundary (mode, type,
7417 inform (input_location,
7418 "The ABI for passing parameters with %d-byte"
7419 " alignment has changed in GCC 4.6",
7420 align / BITS_PER_UNIT);
7427 /* Return true if N is a possible register number of function value. */
7430 ix86_function_value_regno_p (const unsigned int regno)
7437 case FIRST_FLOAT_REG:
7438 /* TODO: The function should depend on current function ABI but
7439 builtins.c would need updating then. Therefore we use the
7441 if (TARGET_64BIT && ix86_abi == MS_ABI)
7443 return TARGET_FLOAT_RETURNS_IN_80387;
7449 if (TARGET_MACHO || TARGET_64BIT)
7457 /* Define how to find the value returned by a function.
7458 VALTYPE is the data type of the value (as a tree).
7459 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7460 otherwise, FUNC is 0. */
7463 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7464 const_tree fntype, const_tree fn)
7468 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7469 we normally prevent this case when mmx is not available. However
7470 some ABIs may require the result to be returned like DImode. */
7471 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7472 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7474 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7475 we prevent this case when sse is not available. However some ABIs
7476 may require the result to be returned like integer TImode. */
7477 else if (mode == TImode
7478 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7479 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7481 /* 32-byte vector modes in %ymm0. */
7482 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7483 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7485 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7486 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7487 regno = FIRST_FLOAT_REG;
7489 /* Most things go in %eax. */
7492 /* Override FP return register with %xmm0 for local functions when
7493 SSE math is enabled or for functions with sseregparm attribute. */
7494 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7496 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7497 if ((sse_level >= 1 && mode == SFmode)
7498 || (sse_level == 2 && mode == DFmode))
7499 regno = FIRST_SSE_REG;
7502 /* OImode shouldn't be used directly. */
7503 gcc_assert (mode != OImode);
7505 return gen_rtx_REG (orig_mode, regno);
7509 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7514 /* Handle libcalls, which don't provide a type node. */
7515 if (valtype == NULL)
7527 return gen_rtx_REG (mode, FIRST_SSE_REG);
7530 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7534 return gen_rtx_REG (mode, AX_REG);
7538 ret = construct_container (mode, orig_mode, valtype, 1,
7539 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7540 x86_64_int_return_registers, 0);
7542 /* For zero sized structures, construct_container returns NULL, but we
7543 need to keep rest of compiler happy by returning meaningful value. */
7545 ret = gen_rtx_REG (orig_mode, AX_REG);
7551 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7553 unsigned int regno = AX_REG;
7557 switch (GET_MODE_SIZE (mode))
7560 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7561 && !COMPLEX_MODE_P (mode))
7562 regno = FIRST_SSE_REG;
7566 if (mode == SFmode || mode == DFmode)
7567 regno = FIRST_SSE_REG;
7573 return gen_rtx_REG (orig_mode, regno);
7577 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7578 enum machine_mode orig_mode, enum machine_mode mode)
7580 const_tree fn, fntype;
7583 if (fntype_or_decl && DECL_P (fntype_or_decl))
7584 fn = fntype_or_decl;
7585 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7587 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7588 return function_value_ms_64 (orig_mode, mode);
7589 else if (TARGET_64BIT)
7590 return function_value_64 (orig_mode, mode, valtype);
7592 return function_value_32 (orig_mode, mode, fntype, fn);
7596 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7597 bool outgoing ATTRIBUTE_UNUSED)
7599 enum machine_mode mode, orig_mode;
7601 orig_mode = TYPE_MODE (valtype);
7602 mode = type_natural_mode (valtype, NULL);
7603 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7607 ix86_libcall_value (enum machine_mode mode)
7609 return ix86_function_value_1 (NULL, NULL, mode, mode);
7612 /* Return true iff type is returned in memory. */
7614 static bool ATTRIBUTE_UNUSED
7615 return_in_memory_32 (const_tree type, enum machine_mode mode)
7619 if (mode == BLKmode)
7622 size = int_size_in_bytes (type);
7624 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7627 if (VECTOR_MODE_P (mode) || mode == TImode)
7629 /* User-created vectors small enough to fit in EAX. */
7633 /* MMX/3dNow values are returned in MM0,
7634 except when it doesn't exits or the ABI prescribes otherwise. */
7636 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7638 /* SSE values are returned in XMM0, except when it doesn't exist. */
7642 /* AVX values are returned in YMM0, except when it doesn't exist. */
7653 /* OImode shouldn't be used directly. */
7654 gcc_assert (mode != OImode);
7659 static bool ATTRIBUTE_UNUSED
7660 return_in_memory_64 (const_tree type, enum machine_mode mode)
7662 int needed_intregs, needed_sseregs;
7663 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7666 static bool ATTRIBUTE_UNUSED
7667 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7669 HOST_WIDE_INT size = int_size_in_bytes (type);
7671 /* __m128 is returned in xmm0. */
7672 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7673 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7676 /* Otherwise, the size must be exactly in [1248]. */
7677 return size != 1 && size != 2 && size != 4 && size != 8;
7681 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7683 #ifdef SUBTARGET_RETURN_IN_MEMORY
7684 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7686 const enum machine_mode mode = type_natural_mode (type, NULL);
7690 if (ix86_function_type_abi (fntype) == MS_ABI)
7691 return return_in_memory_ms_64 (type, mode);
7693 return return_in_memory_64 (type, mode);
7696 return return_in_memory_32 (type, mode);
7700 /* When returning SSE vector types, we have a choice of either
7701 (1) being abi incompatible with a -march switch, or
7702 (2) generating an error.
7703 Given no good solution, I think the safest thing is one warning.
7704 The user won't be able to use -Werror, but....
7706 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7707 called in response to actually generating a caller or callee that
7708 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7709 via aggregate_value_p for general type probing from tree-ssa. */
7712 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7714 static bool warnedsse, warnedmmx;
7716 if (!TARGET_64BIT && type)
7718 /* Look at the return type of the function, not the function type. */
7719 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7721 if (!TARGET_SSE && !warnedsse)
7724 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7727 warning (0, "SSE vector return without SSE enabled "
7732 if (!TARGET_MMX && !warnedmmx)
7734 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7737 warning (0, "MMX vector return without MMX enabled "
7747 /* Create the va_list data type. */
7749 /* Returns the calling convention specific va_list date type.
7750 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7753 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7755 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7757 /* For i386 we use plain pointer to argument area. */
7758 if (!TARGET_64BIT || abi == MS_ABI)
7759 return build_pointer_type (char_type_node);
7761 record = lang_hooks.types.make_type (RECORD_TYPE);
7762 type_decl = build_decl (BUILTINS_LOCATION,
7763 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7765 f_gpr = build_decl (BUILTINS_LOCATION,
7766 FIELD_DECL, get_identifier ("gp_offset"),
7767 unsigned_type_node);
7768 f_fpr = build_decl (BUILTINS_LOCATION,
7769 FIELD_DECL, get_identifier ("fp_offset"),
7770 unsigned_type_node);
7771 f_ovf = build_decl (BUILTINS_LOCATION,
7772 FIELD_DECL, get_identifier ("overflow_arg_area"),
7774 f_sav = build_decl (BUILTINS_LOCATION,
7775 FIELD_DECL, get_identifier ("reg_save_area"),
7778 va_list_gpr_counter_field = f_gpr;
7779 va_list_fpr_counter_field = f_fpr;
7781 DECL_FIELD_CONTEXT (f_gpr) = record;
7782 DECL_FIELD_CONTEXT (f_fpr) = record;
7783 DECL_FIELD_CONTEXT (f_ovf) = record;
7784 DECL_FIELD_CONTEXT (f_sav) = record;
7786 TYPE_STUB_DECL (record) = type_decl;
7787 TYPE_NAME (record) = type_decl;
7788 TYPE_FIELDS (record) = f_gpr;
7789 DECL_CHAIN (f_gpr) = f_fpr;
7790 DECL_CHAIN (f_fpr) = f_ovf;
7791 DECL_CHAIN (f_ovf) = f_sav;
7793 layout_type (record);
7795 /* The correct type is an array type of one element. */
7796 return build_array_type (record, build_index_type (size_zero_node));
7799 /* Setup the builtin va_list data type and for 64-bit the additional
7800 calling convention specific va_list data types. */
7803 ix86_build_builtin_va_list (void)
7805 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7807 /* Initialize abi specific va_list builtin types. */
7811 if (ix86_abi == MS_ABI)
7813 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7814 if (TREE_CODE (t) != RECORD_TYPE)
7815 t = build_variant_type_copy (t);
7816 sysv_va_list_type_node = t;
7821 if (TREE_CODE (t) != RECORD_TYPE)
7822 t = build_variant_type_copy (t);
7823 sysv_va_list_type_node = t;
7825 if (ix86_abi != MS_ABI)
7827 t = ix86_build_builtin_va_list_abi (MS_ABI);
7828 if (TREE_CODE (t) != RECORD_TYPE)
7829 t = build_variant_type_copy (t);
7830 ms_va_list_type_node = t;
7835 if (TREE_CODE (t) != RECORD_TYPE)
7836 t = build_variant_type_copy (t);
7837 ms_va_list_type_node = t;
7844 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7847 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7853 /* GPR size of varargs save area. */
7854 if (cfun->va_list_gpr_size)
7855 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7857 ix86_varargs_gpr_size = 0;
7859 /* FPR size of varargs save area. We don't need it if we don't pass
7860 anything in SSE registers. */
7861 if (TARGET_SSE && cfun->va_list_fpr_size)
7862 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7864 ix86_varargs_fpr_size = 0;
7866 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7869 save_area = frame_pointer_rtx;
7870 set = get_varargs_alias_set ();
7872 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7873 if (max > X86_64_REGPARM_MAX)
7874 max = X86_64_REGPARM_MAX;
7876 for (i = cum->regno; i < max; i++)
7878 mem = gen_rtx_MEM (Pmode,
7879 plus_constant (save_area, i * UNITS_PER_WORD));
7880 MEM_NOTRAP_P (mem) = 1;
7881 set_mem_alias_set (mem, set);
7882 emit_move_insn (mem, gen_rtx_REG (Pmode,
7883 x86_64_int_parameter_registers[i]));
7886 if (ix86_varargs_fpr_size)
7888 enum machine_mode smode;
7891 /* Now emit code to save SSE registers. The AX parameter contains number
7892 of SSE parameter registers used to call this function, though all we
7893 actually check here is the zero/non-zero status. */
7895 label = gen_label_rtx ();
7896 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7897 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7900 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7901 we used movdqa (i.e. TImode) instead? Perhaps even better would
7902 be if we could determine the real mode of the data, via a hook
7903 into pass_stdarg. Ignore all that for now. */
7905 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7906 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7908 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7909 if (max > X86_64_SSE_REGPARM_MAX)
7910 max = X86_64_SSE_REGPARM_MAX;
7912 for (i = cum->sse_regno; i < max; ++i)
7914 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7915 mem = gen_rtx_MEM (smode, mem);
7916 MEM_NOTRAP_P (mem) = 1;
7917 set_mem_alias_set (mem, set);
7918 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7920 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7928 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7930 alias_set_type set = get_varargs_alias_set ();
7933 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7937 mem = gen_rtx_MEM (Pmode,
7938 plus_constant (virtual_incoming_args_rtx,
7939 i * UNITS_PER_WORD));
7940 MEM_NOTRAP_P (mem) = 1;
7941 set_mem_alias_set (mem, set);
7943 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7944 emit_move_insn (mem, reg);
7949 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7950 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7953 CUMULATIVE_ARGS next_cum;
7956 /* This argument doesn't appear to be used anymore. Which is good,
7957 because the old code here didn't suppress rtl generation. */
7958 gcc_assert (!no_rtl);
7963 fntype = TREE_TYPE (current_function_decl);
7965 /* For varargs, we do not want to skip the dummy va_dcl argument.
7966 For stdargs, we do want to skip the last named argument. */
7968 if (stdarg_p (fntype))
7969 ix86_function_arg_advance (&next_cum, mode, type, true);
7971 if (cum->call_abi == MS_ABI)
7972 setup_incoming_varargs_ms_64 (&next_cum);
7974 setup_incoming_varargs_64 (&next_cum);
7977 /* Checks if TYPE is of kind va_list char *. */
7980 is_va_list_char_pointer (tree type)
7984 /* For 32-bit it is always true. */
7987 canonic = ix86_canonical_va_list_type (type);
7988 return (canonic == ms_va_list_type_node
7989 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7992 /* Implement va_start. */
7995 ix86_va_start (tree valist, rtx nextarg)
7997 HOST_WIDE_INT words, n_gpr, n_fpr;
7998 tree f_gpr, f_fpr, f_ovf, f_sav;
7999 tree gpr, fpr, ovf, sav, t;
8003 if (flag_split_stack
8004 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8006 unsigned int scratch_regno;
8008 /* When we are splitting the stack, we can't refer to the stack
8009 arguments using internal_arg_pointer, because they may be on
8010 the old stack. The split stack prologue will arrange to
8011 leave a pointer to the old stack arguments in a scratch
8012 register, which we here copy to a pseudo-register. The split
8013 stack prologue can't set the pseudo-register directly because
8014 it (the prologue) runs before any registers have been saved. */
8016 scratch_regno = split_stack_prologue_scratch_regno ();
8017 if (scratch_regno != INVALID_REGNUM)
8021 reg = gen_reg_rtx (Pmode);
8022 cfun->machine->split_stack_varargs_pointer = reg;
8025 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8029 push_topmost_sequence ();
8030 emit_insn_after (seq, entry_of_function ());
8031 pop_topmost_sequence ();
8035 /* Only 64bit target needs something special. */
8036 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8038 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8039 std_expand_builtin_va_start (valist, nextarg);
8044 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8045 next = expand_binop (ptr_mode, add_optab,
8046 cfun->machine->split_stack_varargs_pointer,
8047 crtl->args.arg_offset_rtx,
8048 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8049 convert_move (va_r, next, 0);
8054 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8055 f_fpr = DECL_CHAIN (f_gpr);
8056 f_ovf = DECL_CHAIN (f_fpr);
8057 f_sav = DECL_CHAIN (f_ovf);
8059 valist = build_simple_mem_ref (valist);
8060 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8061 /* The following should be folded into the MEM_REF offset. */
8062 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8064 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8066 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8068 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8071 /* Count number of gp and fp argument registers used. */
8072 words = crtl->args.info.words;
8073 n_gpr = crtl->args.info.regno;
8074 n_fpr = crtl->args.info.sse_regno;
8076 if (cfun->va_list_gpr_size)
8078 type = TREE_TYPE (gpr);
8079 t = build2 (MODIFY_EXPR, type,
8080 gpr, build_int_cst (type, n_gpr * 8));
8081 TREE_SIDE_EFFECTS (t) = 1;
8082 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8085 if (TARGET_SSE && cfun->va_list_fpr_size)
8087 type = TREE_TYPE (fpr);
8088 t = build2 (MODIFY_EXPR, type, fpr,
8089 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8090 TREE_SIDE_EFFECTS (t) = 1;
8091 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8094 /* Find the overflow area. */
8095 type = TREE_TYPE (ovf);
8096 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8097 ovf_rtx = crtl->args.internal_arg_pointer;
8099 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8100 t = make_tree (type, ovf_rtx);
8102 t = build2 (POINTER_PLUS_EXPR, type, t,
8103 size_int (words * UNITS_PER_WORD));
8104 t = build2 (MODIFY_EXPR, type, ovf, t);
8105 TREE_SIDE_EFFECTS (t) = 1;
8106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8108 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8110 /* Find the register save area.
8111 Prologue of the function save it right above stack frame. */
8112 type = TREE_TYPE (sav);
8113 t = make_tree (type, frame_pointer_rtx);
8114 if (!ix86_varargs_gpr_size)
8115 t = build2 (POINTER_PLUS_EXPR, type, t,
8116 size_int (-8 * X86_64_REGPARM_MAX));
8117 t = build2 (MODIFY_EXPR, type, sav, t);
8118 TREE_SIDE_EFFECTS (t) = 1;
8119 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8123 /* Implement va_arg. */
8126 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8129 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8130 tree f_gpr, f_fpr, f_ovf, f_sav;
8131 tree gpr, fpr, ovf, sav, t;
8133 tree lab_false, lab_over = NULL_TREE;
8138 enum machine_mode nat_mode;
8139 unsigned int arg_boundary;
8141 /* Only 64bit target needs something special. */
8142 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8143 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8145 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8146 f_fpr = DECL_CHAIN (f_gpr);
8147 f_ovf = DECL_CHAIN (f_fpr);
8148 f_sav = DECL_CHAIN (f_ovf);
8150 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8151 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8152 valist = build_va_arg_indirect_ref (valist);
8153 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8154 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8155 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8157 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8159 type = build_pointer_type (type);
8160 size = int_size_in_bytes (type);
8161 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8163 nat_mode = type_natural_mode (type, NULL);
8172 /* Unnamed 256bit vector mode parameters are passed on stack. */
8173 if (ix86_cfun_abi () == SYSV_ABI)
8180 container = construct_container (nat_mode, TYPE_MODE (type),
8181 type, 0, X86_64_REGPARM_MAX,
8182 X86_64_SSE_REGPARM_MAX, intreg,
8187 /* Pull the value out of the saved registers. */
8189 addr = create_tmp_var (ptr_type_node, "addr");
8193 int needed_intregs, needed_sseregs;
8195 tree int_addr, sse_addr;
8197 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8198 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8200 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8202 need_temp = (!REG_P (container)
8203 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8204 || TYPE_ALIGN (type) > 128));
8206 /* In case we are passing structure, verify that it is consecutive block
8207 on the register save area. If not we need to do moves. */
8208 if (!need_temp && !REG_P (container))
8210 /* Verify that all registers are strictly consecutive */
8211 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8215 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8217 rtx slot = XVECEXP (container, 0, i);
8218 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8219 || INTVAL (XEXP (slot, 1)) != i * 16)
8227 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8229 rtx slot = XVECEXP (container, 0, i);
8230 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8231 || INTVAL (XEXP (slot, 1)) != i * 8)
8243 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8244 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8247 /* First ensure that we fit completely in registers. */
8250 t = build_int_cst (TREE_TYPE (gpr),
8251 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8252 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8253 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8254 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8255 gimplify_and_add (t, pre_p);
8259 t = build_int_cst (TREE_TYPE (fpr),
8260 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8261 + X86_64_REGPARM_MAX * 8);
8262 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8263 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8264 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8265 gimplify_and_add (t, pre_p);
8268 /* Compute index to start of area used for integer regs. */
8271 /* int_addr = gpr + sav; */
8272 t = fold_convert (sizetype, gpr);
8273 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8274 gimplify_assign (int_addr, t, pre_p);
8278 /* sse_addr = fpr + sav; */
8279 t = fold_convert (sizetype, fpr);
8280 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8281 gimplify_assign (sse_addr, t, pre_p);
8285 int i, prev_size = 0;
8286 tree temp = create_tmp_var (type, "va_arg_tmp");
8289 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8290 gimplify_assign (addr, t, pre_p);
8292 for (i = 0; i < XVECLEN (container, 0); i++)
8294 rtx slot = XVECEXP (container, 0, i);
8295 rtx reg = XEXP (slot, 0);
8296 enum machine_mode mode = GET_MODE (reg);
8302 tree dest_addr, dest;
8303 int cur_size = GET_MODE_SIZE (mode);
8305 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8306 prev_size = INTVAL (XEXP (slot, 1));
8307 if (prev_size + cur_size > size)
8309 cur_size = size - prev_size;
8310 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8311 if (mode == BLKmode)
8314 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8315 if (mode == GET_MODE (reg))
8316 addr_type = build_pointer_type (piece_type);
8318 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8320 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8323 if (SSE_REGNO_P (REGNO (reg)))
8325 src_addr = sse_addr;
8326 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8330 src_addr = int_addr;
8331 src_offset = REGNO (reg) * 8;
8333 src_addr = fold_convert (addr_type, src_addr);
8334 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8335 size_int (src_offset));
8337 dest_addr = fold_convert (daddr_type, addr);
8338 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8339 size_int (prev_size));
8340 if (cur_size == GET_MODE_SIZE (mode))
8342 src = build_va_arg_indirect_ref (src_addr);
8343 dest = build_va_arg_indirect_ref (dest_addr);
8345 gimplify_assign (dest, src, pre_p);
8350 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8351 3, dest_addr, src_addr,
8352 size_int (cur_size));
8353 gimplify_and_add (copy, pre_p);
8355 prev_size += cur_size;
8361 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8362 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8363 gimplify_assign (gpr, t, pre_p);
8368 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8369 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8370 gimplify_assign (fpr, t, pre_p);
8373 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8375 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8378 /* ... otherwise out of the overflow area. */
8380 /* When we align parameter on stack for caller, if the parameter
8381 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8382 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8383 here with caller. */
8384 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8385 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8386 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8388 /* Care for on-stack alignment if needed. */
8389 if (arg_boundary <= 64 || size == 0)
8393 HOST_WIDE_INT align = arg_boundary / 8;
8394 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8395 size_int (align - 1));
8396 t = fold_convert (sizetype, t);
8397 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8399 t = fold_convert (TREE_TYPE (ovf), t);
8402 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8403 gimplify_assign (addr, t, pre_p);
8405 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8406 size_int (rsize * UNITS_PER_WORD));
8407 gimplify_assign (unshare_expr (ovf), t, pre_p);
8410 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8412 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8413 addr = fold_convert (ptrtype, addr);
8416 addr = build_va_arg_indirect_ref (addr);
8417 return build_va_arg_indirect_ref (addr);
8420 /* Return true if OPNUM's MEM should be matched
8421 in movabs* patterns. */
8424 ix86_check_movabs (rtx insn, int opnum)
8428 set = PATTERN (insn);
8429 if (GET_CODE (set) == PARALLEL)
8430 set = XVECEXP (set, 0, 0);
8431 gcc_assert (GET_CODE (set) == SET);
8432 mem = XEXP (set, opnum);
8433 while (GET_CODE (mem) == SUBREG)
8434 mem = SUBREG_REG (mem);
8435 gcc_assert (MEM_P (mem));
8436 return volatile_ok || !MEM_VOLATILE_P (mem);
8439 /* Initialize the table of extra 80387 mathematical constants. */
8442 init_ext_80387_constants (void)
8444 static const char * cst[5] =
8446 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8447 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8448 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8449 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8450 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8454 for (i = 0; i < 5; i++)
8456 real_from_string (&ext_80387_constants_table[i], cst[i]);
8457 /* Ensure each constant is rounded to XFmode precision. */
8458 real_convert (&ext_80387_constants_table[i],
8459 XFmode, &ext_80387_constants_table[i]);
8462 ext_80387_constants_init = 1;
8465 /* Return non-zero if the constant is something that
8466 can be loaded with a special instruction. */
8469 standard_80387_constant_p (rtx x)
8471 enum machine_mode mode = GET_MODE (x);
8475 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8478 if (x == CONST0_RTX (mode))
8480 if (x == CONST1_RTX (mode))
8483 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8485 /* For XFmode constants, try to find a special 80387 instruction when
8486 optimizing for size or on those CPUs that benefit from them. */
8488 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8492 if (! ext_80387_constants_init)
8493 init_ext_80387_constants ();
8495 for (i = 0; i < 5; i++)
8496 if (real_identical (&r, &ext_80387_constants_table[i]))
8500 /* Load of the constant -0.0 or -1.0 will be split as
8501 fldz;fchs or fld1;fchs sequence. */
8502 if (real_isnegzero (&r))
8504 if (real_identical (&r, &dconstm1))
8510 /* Return the opcode of the special instruction to be used to load
8514 standard_80387_constant_opcode (rtx x)
8516 switch (standard_80387_constant_p (x))
8540 /* Return the CONST_DOUBLE representing the 80387 constant that is
8541 loaded by the specified special instruction. The argument IDX
8542 matches the return value from standard_80387_constant_p. */
8545 standard_80387_constant_rtx (int idx)
8549 if (! ext_80387_constants_init)
8550 init_ext_80387_constants ();
8566 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8570 /* Return 1 if X is all 0s and 2 if x is all 1s
8571 in supported SSE vector mode. */
8574 standard_sse_constant_p (rtx x)
8576 enum machine_mode mode = GET_MODE (x);
8578 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8580 if (vector_all_ones_operand (x, mode))
8596 /* Return the opcode of the special instruction to be used to load
8600 standard_sse_constant_opcode (rtx insn, rtx x)
8602 switch (standard_sse_constant_p (x))
8605 switch (get_attr_mode (insn))
8608 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8610 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8611 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8613 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8615 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8616 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8618 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8620 return "vxorps\t%x0, %x0, %x0";
8622 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8623 return "vxorps\t%x0, %x0, %x0";
8625 return "vxorpd\t%x0, %x0, %x0";
8627 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8628 return "vxorps\t%x0, %x0, %x0";
8630 return "vpxor\t%x0, %x0, %x0";
8635 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8642 /* Returns true if OP contains a symbol reference */
8645 symbolic_reference_mentioned_p (rtx op)
8650 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8653 fmt = GET_RTX_FORMAT (GET_CODE (op));
8654 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8660 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8661 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8665 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8672 /* Return true if it is appropriate to emit `ret' instructions in the
8673 body of a function. Do this only if the epilogue is simple, needing a
8674 couple of insns. Prior to reloading, we can't tell how many registers
8675 must be saved, so return false then. Return false if there is no frame
8676 marker to de-allocate. */
8679 ix86_can_use_return_insn_p (void)
8681 struct ix86_frame frame;
8683 if (! reload_completed || frame_pointer_needed)
8686 /* Don't allow more than 32k pop, since that's all we can do
8687 with one instruction. */
8688 if (crtl->args.pops_args && crtl->args.size >= 32768)
8691 ix86_compute_frame_layout (&frame);
8692 return (frame.stack_pointer_offset == UNITS_PER_WORD
8693 && (frame.nregs + frame.nsseregs) == 0);
8696 /* Value should be nonzero if functions must have frame pointers.
8697 Zero means the frame pointer need not be set up (and parms may
8698 be accessed via the stack pointer) in functions that seem suitable. */
8701 ix86_frame_pointer_required (void)
8703 /* If we accessed previous frames, then the generated code expects
8704 to be able to access the saved ebp value in our frame. */
8705 if (cfun->machine->accesses_prev_frame)
8708 /* Several x86 os'es need a frame pointer for other reasons,
8709 usually pertaining to setjmp. */
8710 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8713 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8714 turns off the frame pointer by default. Turn it back on now if
8715 we've not got a leaf function. */
8716 if (TARGET_OMIT_LEAF_FRAME_POINTER
8717 && (!current_function_is_leaf
8718 || ix86_current_function_calls_tls_descriptor))
8721 if (crtl->profile && !flag_fentry)
8727 /* Record that the current function accesses previous call frames. */
8730 ix86_setup_frame_addresses (void)
8732 cfun->machine->accesses_prev_frame = 1;
8735 #ifndef USE_HIDDEN_LINKONCE
8736 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8737 # define USE_HIDDEN_LINKONCE 1
8739 # define USE_HIDDEN_LINKONCE 0
8743 static int pic_labels_used;
8745 /* Fills in the label name that should be used for a pc thunk for
8746 the given register. */
8749 get_pc_thunk_name (char name[32], unsigned int regno)
8751 gcc_assert (!TARGET_64BIT);
8753 if (USE_HIDDEN_LINKONCE)
8754 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8756 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8760 /* This function generates code for -fpic that loads %ebx with
8761 the return address of the caller and then returns. */
8764 ix86_code_end (void)
8769 for (regno = AX_REG; regno <= SP_REG; regno++)
8774 if (!(pic_labels_used & (1 << regno)))
8777 get_pc_thunk_name (name, regno);
8779 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8780 get_identifier (name),
8781 build_function_type (void_type_node, void_list_node));
8782 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8783 NULL_TREE, void_type_node);
8784 TREE_PUBLIC (decl) = 1;
8785 TREE_STATIC (decl) = 1;
8790 switch_to_section (darwin_sections[text_coal_section]);
8791 fputs ("\t.weak_definition\t", asm_out_file);
8792 assemble_name (asm_out_file, name);
8793 fputs ("\n\t.private_extern\t", asm_out_file);
8794 assemble_name (asm_out_file, name);
8795 putc ('\n', asm_out_file);
8796 ASM_OUTPUT_LABEL (asm_out_file, name);
8797 DECL_WEAK (decl) = 1;
8801 if (USE_HIDDEN_LINKONCE)
8803 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8805 targetm.asm_out.unique_section (decl, 0);
8806 switch_to_section (get_named_section (decl, NULL, 0));
8808 targetm.asm_out.globalize_label (asm_out_file, name);
8809 fputs ("\t.hidden\t", asm_out_file);
8810 assemble_name (asm_out_file, name);
8811 putc ('\n', asm_out_file);
8812 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8816 switch_to_section (text_section);
8817 ASM_OUTPUT_LABEL (asm_out_file, name);
8820 DECL_INITIAL (decl) = make_node (BLOCK);
8821 current_function_decl = decl;
8822 init_function_start (decl);
8823 first_function_block_is_cold = false;
8824 /* Make sure unwind info is emitted for the thunk if needed. */
8825 final_start_function (emit_barrier (), asm_out_file, 1);
8827 /* Pad stack IP move with 4 instructions (two NOPs count
8828 as one instruction). */
8829 if (TARGET_PAD_SHORT_FUNCTION)
8834 fputs ("\tnop\n", asm_out_file);
8837 xops[0] = gen_rtx_REG (Pmode, regno);
8838 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8839 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8840 fputs ("\tret\n", asm_out_file);
8841 final_end_function ();
8842 init_insn_lengths ();
8843 free_after_compilation (cfun);
8845 current_function_decl = NULL;
8848 if (flag_split_stack)
8849 file_end_indicate_split_stack ();
8852 /* Emit code for the SET_GOT patterns. */
8855 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8861 if (TARGET_VXWORKS_RTP && flag_pic)
8863 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8864 xops[2] = gen_rtx_MEM (Pmode,
8865 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8866 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8868 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8869 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8870 an unadorned address. */
8871 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8872 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8873 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8877 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8879 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8881 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8884 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8887 output_asm_insn ("call\t%a2", xops);
8888 #ifdef DWARF2_UNWIND_INFO
8889 /* The call to next label acts as a push. */
8890 if (dwarf2out_do_frame ())
8894 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8895 gen_rtx_PLUS (Pmode,
8898 RTX_FRAME_RELATED_P (insn) = 1;
8899 dwarf2out_frame_debug (insn, true);
8906 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8907 is what will be referenced by the Mach-O PIC subsystem. */
8909 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8912 targetm.asm_out.internal_label (asm_out_file, "L",
8913 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8917 output_asm_insn ("pop%z0\t%0", xops);
8918 #ifdef DWARF2_UNWIND_INFO
8919 /* The pop is a pop and clobbers dest, but doesn't restore it
8920 for unwind info purposes. */
8921 if (dwarf2out_do_frame ())
8925 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8926 dwarf2out_frame_debug (insn, true);
8927 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8928 gen_rtx_PLUS (Pmode,
8931 RTX_FRAME_RELATED_P (insn) = 1;
8932 dwarf2out_frame_debug (insn, true);
8941 get_pc_thunk_name (name, REGNO (dest));
8942 pic_labels_used |= 1 << REGNO (dest);
8944 #ifdef DWARF2_UNWIND_INFO
8945 /* Ensure all queued register saves are flushed before the
8947 if (dwarf2out_do_frame ())
8948 dwarf2out_flush_queued_reg_saves ();
8950 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8951 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8952 output_asm_insn ("call\t%X2", xops);
8953 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8954 is what will be referenced by the Mach-O PIC subsystem. */
8957 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8959 targetm.asm_out.internal_label (asm_out_file, "L",
8960 CODE_LABEL_NUMBER (label));
8967 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8968 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8970 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8975 /* Generate an "push" pattern for input ARG. */
8980 struct machine_function *m = cfun->machine;
8982 if (m->fs.cfa_reg == stack_pointer_rtx)
8983 m->fs.cfa_offset += UNITS_PER_WORD;
8984 m->fs.sp_offset += UNITS_PER_WORD;
8986 return gen_rtx_SET (VOIDmode,
8988 gen_rtx_PRE_DEC (Pmode,
8989 stack_pointer_rtx)),
8993 /* Generate an "pop" pattern for input ARG. */
8998 return gen_rtx_SET (VOIDmode,
9001 gen_rtx_POST_INC (Pmode,
9002 stack_pointer_rtx)));
9005 /* Return >= 0 if there is an unused call-clobbered register available
9006 for the entire function. */
9009 ix86_select_alt_pic_regnum (void)
9011 if (current_function_is_leaf
9013 && !ix86_current_function_calls_tls_descriptor)
9016 /* Can't use the same register for both PIC and DRAP. */
9018 drap = REGNO (crtl->drap_reg);
9021 for (i = 2; i >= 0; --i)
9022 if (i != drap && !df_regs_ever_live_p (i))
9026 return INVALID_REGNUM;
9029 /* Return 1 if we need to save REGNO. */
9031 ix86_save_reg (unsigned int regno, int maybe_eh_return)
9033 if (pic_offset_table_rtx
9034 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9035 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9037 || crtl->calls_eh_return
9038 || crtl->uses_const_pool))
9040 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
9045 if (crtl->calls_eh_return && maybe_eh_return)
9050 unsigned test = EH_RETURN_DATA_REGNO (i);
9051 if (test == INVALID_REGNUM)
9058 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9061 return (df_regs_ever_live_p (regno)
9062 && !call_used_regs[regno]
9063 && !fixed_regs[regno]
9064 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9067 /* Return number of saved general prupose registers. */
9070 ix86_nsaved_regs (void)
9075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9076 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9081 /* Return number of saved SSE registrers. */
9084 ix86_nsaved_sseregs (void)
9089 if (ix86_cfun_abi () != MS_ABI)
9091 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9092 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9097 /* Given FROM and TO register numbers, say whether this elimination is
9098 allowed. If stack alignment is needed, we can only replace argument
9099 pointer with hard frame pointer, or replace frame pointer with stack
9100 pointer. Otherwise, frame pointer elimination is automatically
9101 handled and all other eliminations are valid. */
9104 ix86_can_eliminate (const int from, const int to)
9106 if (stack_realign_fp)
9107 return ((from == ARG_POINTER_REGNUM
9108 && to == HARD_FRAME_POINTER_REGNUM)
9109 || (from == FRAME_POINTER_REGNUM
9110 && to == STACK_POINTER_REGNUM));
9112 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9115 /* Return the offset between two registers, one to be eliminated, and the other
9116 its replacement, at the start of a routine. */
9119 ix86_initial_elimination_offset (int from, int to)
9121 struct ix86_frame frame;
9122 ix86_compute_frame_layout (&frame);
9124 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9125 return frame.hard_frame_pointer_offset;
9126 else if (from == FRAME_POINTER_REGNUM
9127 && to == HARD_FRAME_POINTER_REGNUM)
9128 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9131 gcc_assert (to == STACK_POINTER_REGNUM);
9133 if (from == ARG_POINTER_REGNUM)
9134 return frame.stack_pointer_offset;
9136 gcc_assert (from == FRAME_POINTER_REGNUM);
9137 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9141 /* In a dynamically-aligned function, we can't know the offset from
9142 stack pointer to frame pointer, so we must ensure that setjmp
9143 eliminates fp against the hard fp (%ebp) rather than trying to
9144 index from %esp up to the top of the frame across a gap that is
9145 of unknown (at compile-time) size. */
9147 ix86_builtin_setjmp_frame_value (void)
9149 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9152 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9153 field in the TCB, so they can not be used together. */
9156 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9157 struct gcc_options *opts ATTRIBUTE_UNUSED)
9161 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9163 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9166 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9169 error ("%<-fsplit-stack%> requires "
9170 "assembler support for CFI directives");
9178 /* When using -fsplit-stack, the allocation routines set a field in
9179 the TCB to the bottom of the stack plus this much space, measured
9182 #define SPLIT_STACK_AVAILABLE 256
9184 /* Fill structure ix86_frame about frame of currently computed function. */
9187 ix86_compute_frame_layout (struct ix86_frame *frame)
9189 unsigned int stack_alignment_needed;
9190 HOST_WIDE_INT offset;
9191 unsigned int preferred_alignment;
9192 HOST_WIDE_INT size = get_frame_size ();
9193 HOST_WIDE_INT to_allocate;
9195 frame->nregs = ix86_nsaved_regs ();
9196 frame->nsseregs = ix86_nsaved_sseregs ();
9198 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9199 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9201 /* MS ABI seem to require stack alignment to be always 16 except for function
9202 prologues and leaf. */
9203 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
9204 && (!current_function_is_leaf || cfun->calls_alloca != 0
9205 || ix86_current_function_calls_tls_descriptor))
9207 preferred_alignment = 16;
9208 stack_alignment_needed = 16;
9209 crtl->preferred_stack_boundary = 128;
9210 crtl->stack_alignment_needed = 128;
9213 gcc_assert (!size || stack_alignment_needed);
9214 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9215 gcc_assert (preferred_alignment <= stack_alignment_needed);
9217 /* For SEH we have to limit the amount of code movement into the prologue.
9218 At present we do this via a BLOCKAGE, at which point there's very little
9219 scheduling that can be done, which means that there's very little point
9220 in doing anything except PUSHs. */
9222 cfun->machine->use_fast_prologue_epilogue = false;
9224 /* During reload iteration the amount of registers saved can change.
9225 Recompute the value as needed. Do not recompute when amount of registers
9226 didn't change as reload does multiple calls to the function and does not
9227 expect the decision to change within single iteration. */
9228 else if (!optimize_function_for_size_p (cfun)
9229 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9231 int count = frame->nregs;
9232 struct cgraph_node *node = cgraph_node (current_function_decl);
9234 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9236 /* The fast prologue uses move instead of push to save registers. This
9237 is significantly longer, but also executes faster as modern hardware
9238 can execute the moves in parallel, but can't do that for push/pop.
9240 Be careful about choosing what prologue to emit: When function takes
9241 many instructions to execute we may use slow version as well as in
9242 case function is known to be outside hot spot (this is known with
9243 feedback only). Weight the size of function by number of registers
9244 to save as it is cheap to use one or two push instructions but very
9245 slow to use many of them. */
9247 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9248 if (node->frequency < NODE_FREQUENCY_NORMAL
9249 || (flag_branch_probabilities
9250 && node->frequency < NODE_FREQUENCY_HOT))
9251 cfun->machine->use_fast_prologue_epilogue = false;
9253 cfun->machine->use_fast_prologue_epilogue
9254 = !expensive_function_p (count);
9256 if (TARGET_PROLOGUE_USING_MOVE
9257 && cfun->machine->use_fast_prologue_epilogue)
9258 frame->save_regs_using_mov = true;
9260 frame->save_regs_using_mov = false;
9262 /* If static stack checking is enabled and done with probes, the registers
9263 need to be saved before allocating the frame. */
9264 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9265 frame->save_regs_using_mov = false;
9267 /* Skip return address. */
9268 offset = UNITS_PER_WORD;
9270 /* Skip pushed static chain. */
9271 if (ix86_static_chain_on_stack)
9272 offset += UNITS_PER_WORD;
9274 /* Skip saved base pointer. */
9275 if (frame_pointer_needed)
9276 offset += UNITS_PER_WORD;
9277 frame->hfp_save_offset = offset;
9279 /* The traditional frame pointer location is at the top of the frame. */
9280 frame->hard_frame_pointer_offset = offset;
9282 /* Register save area */
9283 offset += frame->nregs * UNITS_PER_WORD;
9284 frame->reg_save_offset = offset;
9286 /* Align and set SSE register save area. */
9287 if (frame->nsseregs)
9289 /* The only ABI that has saved SSE registers (Win64) also has a
9290 16-byte aligned default stack, and thus we don't need to be
9291 within the re-aligned local stack frame to save them. */
9292 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9293 offset = (offset + 16 - 1) & -16;
9294 offset += frame->nsseregs * 16;
9296 frame->sse_reg_save_offset = offset;
9298 /* The re-aligned stack starts here. Values before this point are not
9299 directly comparable with values below this point. In order to make
9300 sure that no value happens to be the same before and after, force
9301 the alignment computation below to add a non-zero value. */
9302 if (stack_realign_fp)
9303 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9306 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9307 offset += frame->va_arg_size;
9309 /* Align start of frame for local function. */
9310 if (stack_realign_fp
9311 || offset != frame->sse_reg_save_offset
9313 || !current_function_is_leaf
9314 || cfun->calls_alloca
9315 || ix86_current_function_calls_tls_descriptor)
9316 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9318 /* Frame pointer points here. */
9319 frame->frame_pointer_offset = offset;
9323 /* Add outgoing arguments area. Can be skipped if we eliminated
9324 all the function calls as dead code.
9325 Skipping is however impossible when function calls alloca. Alloca
9326 expander assumes that last crtl->outgoing_args_size
9327 of stack frame are unused. */
9328 if (ACCUMULATE_OUTGOING_ARGS
9329 && (!current_function_is_leaf || cfun->calls_alloca
9330 || ix86_current_function_calls_tls_descriptor))
9332 offset += crtl->outgoing_args_size;
9333 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9336 frame->outgoing_arguments_size = 0;
9338 /* Align stack boundary. Only needed if we're calling another function
9340 if (!current_function_is_leaf || cfun->calls_alloca
9341 || ix86_current_function_calls_tls_descriptor)
9342 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9344 /* We've reached end of stack frame. */
9345 frame->stack_pointer_offset = offset;
9347 /* Size prologue needs to allocate. */
9348 to_allocate = offset - frame->sse_reg_save_offset;
9350 if ((!to_allocate && frame->nregs <= 1)
9351 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9352 frame->save_regs_using_mov = false;
9354 if (ix86_using_red_zone ()
9355 && current_function_sp_is_unchanging
9356 && current_function_is_leaf
9357 && !ix86_current_function_calls_tls_descriptor)
9359 frame->red_zone_size = to_allocate;
9360 if (frame->save_regs_using_mov)
9361 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9362 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9363 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9366 frame->red_zone_size = 0;
9367 frame->stack_pointer_offset -= frame->red_zone_size;
9369 /* The SEH frame pointer location is near the bottom of the frame.
9370 This is enforced by the fact that the difference between the
9371 stack pointer and the frame pointer is limited to 240 bytes in
9372 the unwind data structure. */
9377 /* If we can leave the frame pointer where it is, do so. */
9378 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9379 if (diff > 240 || (diff & 15) != 0)
9381 /* Ideally we'd determine what portion of the local stack frame
9382 (within the constraint of the lowest 240) is most heavily used.
9383 But without that complication, simply bias the frame pointer
9384 by 128 bytes so as to maximize the amount of the local stack
9385 frame that is addressable with 8-bit offsets. */
9386 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9391 /* This is semi-inlined memory_address_length, but simplified
9392 since we know that we're always dealing with reg+offset, and
9393 to avoid having to create and discard all that rtl. */
9396 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9402 /* EBP and R13 cannot be encoded without an offset. */
9403 len = (regno == BP_REG || regno == R13_REG);
9405 else if (IN_RANGE (offset, -128, 127))
9408 /* ESP and R12 must be encoded with a SIB byte. */
9409 if (regno == SP_REG || regno == R12_REG)
9415 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9416 The valid base registers are taken from CFUN->MACHINE->FS. */
9419 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9421 const struct machine_function *m = cfun->machine;
9422 rtx base_reg = NULL;
9423 HOST_WIDE_INT base_offset = 0;
9425 if (m->use_fast_prologue_epilogue)
9427 /* Choose the base register most likely to allow the most scheduling
9428 opportunities. Generally FP is valid througout the function,
9429 while DRAP must be reloaded within the epilogue. But choose either
9430 over the SP due to increased encoding size. */
9434 base_reg = hard_frame_pointer_rtx;
9435 base_offset = m->fs.fp_offset - cfa_offset;
9437 else if (m->fs.drap_valid)
9439 base_reg = crtl->drap_reg;
9440 base_offset = 0 - cfa_offset;
9442 else if (m->fs.sp_valid)
9444 base_reg = stack_pointer_rtx;
9445 base_offset = m->fs.sp_offset - cfa_offset;
9450 HOST_WIDE_INT toffset;
9453 /* Choose the base register with the smallest address encoding.
9454 With a tie, choose FP > DRAP > SP. */
9457 base_reg = stack_pointer_rtx;
9458 base_offset = m->fs.sp_offset - cfa_offset;
9459 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9461 if (m->fs.drap_valid)
9463 toffset = 0 - cfa_offset;
9464 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9467 base_reg = crtl->drap_reg;
9468 base_offset = toffset;
9474 toffset = m->fs.fp_offset - cfa_offset;
9475 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9478 base_reg = hard_frame_pointer_rtx;
9479 base_offset = toffset;
9484 gcc_assert (base_reg != NULL);
9486 return plus_constant (base_reg, base_offset);
9489 /* Emit code to save registers in the prologue. */
9492 ix86_emit_save_regs (void)
9497 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9498 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9500 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9501 RTX_FRAME_RELATED_P (insn) = 1;
9505 /* Emit a single register save at CFA - CFA_OFFSET. */
9508 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9509 HOST_WIDE_INT cfa_offset)
9511 struct machine_function *m = cfun->machine;
9512 rtx reg = gen_rtx_REG (mode, regno);
9513 rtx mem, addr, base, insn;
9515 addr = choose_baseaddr (cfa_offset);
9516 mem = gen_frame_mem (mode, addr);
9518 /* For SSE saves, we need to indicate the 128-bit alignment. */
9519 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9521 insn = emit_move_insn (mem, reg);
9522 RTX_FRAME_RELATED_P (insn) = 1;
9525 if (GET_CODE (base) == PLUS)
9526 base = XEXP (base, 0);
9527 gcc_checking_assert (REG_P (base));
9529 /* When saving registers into a re-aligned local stack frame, avoid
9530 any tricky guessing by dwarf2out. */
9531 if (m->fs.realigned)
9533 gcc_checking_assert (stack_realign_drap);
9535 if (regno == REGNO (crtl->drap_reg))
9537 /* A bit of a hack. We force the DRAP register to be saved in
9538 the re-aligned stack frame, which provides us with a copy
9539 of the CFA that will last past the prologue. Install it. */
9540 gcc_checking_assert (cfun->machine->fs.fp_valid);
9541 addr = plus_constant (hard_frame_pointer_rtx,
9542 cfun->machine->fs.fp_offset - cfa_offset);
9543 mem = gen_rtx_MEM (mode, addr);
9544 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9548 /* The frame pointer is a stable reference within the
9549 aligned frame. Use it. */
9550 gcc_checking_assert (cfun->machine->fs.fp_valid);
9551 addr = plus_constant (hard_frame_pointer_rtx,
9552 cfun->machine->fs.fp_offset - cfa_offset);
9553 mem = gen_rtx_MEM (mode, addr);
9554 add_reg_note (insn, REG_CFA_EXPRESSION,
9555 gen_rtx_SET (VOIDmode, mem, reg));
9559 /* The memory may not be relative to the current CFA register,
9560 which means that we may need to generate a new pattern for
9561 use by the unwind info. */
9562 else if (base != m->fs.cfa_reg)
9564 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9565 mem = gen_rtx_MEM (mode, addr);
9566 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9570 /* Emit code to save registers using MOV insns.
9571 First register is stored at CFA - CFA_OFFSET. */
9573 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9577 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9578 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9580 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9581 cfa_offset -= UNITS_PER_WORD;
9585 /* Emit code to save SSE registers using MOV insns.
9586 First register is stored at CFA - CFA_OFFSET. */
9588 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9592 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9593 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9595 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9600 static GTY(()) rtx queued_cfa_restores;
9602 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9603 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9604 Don't add the note if the previously saved value will be left untouched
9605 within stack red-zone till return, as unwinders can find the same value
9606 in the register and on the stack. */
9609 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9611 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9616 add_reg_note (insn, REG_CFA_RESTORE, reg);
9617 RTX_FRAME_RELATED_P (insn) = 1;
9621 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9624 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9627 ix86_add_queued_cfa_restore_notes (rtx insn)
9630 if (!queued_cfa_restores)
9632 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9634 XEXP (last, 1) = REG_NOTES (insn);
9635 REG_NOTES (insn) = queued_cfa_restores;
9636 queued_cfa_restores = NULL_RTX;
9637 RTX_FRAME_RELATED_P (insn) = 1;
9640 /* Expand prologue or epilogue stack adjustment.
9641 The pattern exist to put a dependency on all ebp-based memory accesses.
9642 STYLE should be negative if instructions should be marked as frame related,
9643 zero if %r11 register is live and cannot be freely used and positive
9647 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9648 int style, bool set_cfa)
9650 struct machine_function *m = cfun->machine;
9652 bool add_frame_related_expr = false;
9655 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9656 else if (x86_64_immediate_operand (offset, DImode))
9657 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9661 /* r11 is used by indirect sibcall return as well, set before the
9662 epilogue and used after the epilogue. */
9664 tmp = gen_rtx_REG (DImode, R11_REG);
9667 gcc_assert (src != hard_frame_pointer_rtx
9668 && dest != hard_frame_pointer_rtx);
9669 tmp = hard_frame_pointer_rtx;
9671 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9673 add_frame_related_expr = true;
9675 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9678 insn = emit_insn (insn);
9680 ix86_add_queued_cfa_restore_notes (insn);
9686 gcc_assert (m->fs.cfa_reg == src);
9687 m->fs.cfa_offset += INTVAL (offset);
9688 m->fs.cfa_reg = dest;
9690 r = gen_rtx_PLUS (Pmode, src, offset);
9691 r = gen_rtx_SET (VOIDmode, dest, r);
9692 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9693 RTX_FRAME_RELATED_P (insn) = 1;
9697 RTX_FRAME_RELATED_P (insn) = 1;
9698 if (add_frame_related_expr)
9700 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9701 r = gen_rtx_SET (VOIDmode, dest, r);
9702 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9706 if (dest == stack_pointer_rtx)
9708 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9709 bool valid = m->fs.sp_valid;
9711 if (src == hard_frame_pointer_rtx)
9713 valid = m->fs.fp_valid;
9714 ooffset = m->fs.fp_offset;
9716 else if (src == crtl->drap_reg)
9718 valid = m->fs.drap_valid;
9723 /* Else there are two possibilities: SP itself, which we set
9724 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9725 taken care of this by hand along the eh_return path. */
9726 gcc_checking_assert (src == stack_pointer_rtx
9727 || offset == const0_rtx);
9730 m->fs.sp_offset = ooffset - INTVAL (offset);
9731 m->fs.sp_valid = valid;
9735 /* Find an available register to be used as dynamic realign argument
9736 pointer regsiter. Such a register will be written in prologue and
9737 used in begin of body, so it must not be
9738 1. parameter passing register.
9740 We reuse static-chain register if it is available. Otherwise, we
9741 use DI for i386 and R13 for x86-64. We chose R13 since it has
9744 Return: the regno of chosen register. */
9747 find_drap_reg (void)
9749 tree decl = cfun->decl;
9753 /* Use R13 for nested function or function need static chain.
9754 Since function with tail call may use any caller-saved
9755 registers in epilogue, DRAP must not use caller-saved
9756 register in such case. */
9757 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9764 /* Use DI for nested function or function need static chain.
9765 Since function with tail call may use any caller-saved
9766 registers in epilogue, DRAP must not use caller-saved
9767 register in such case. */
9768 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9771 /* Reuse static chain register if it isn't used for parameter
9773 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9774 && !lookup_attribute ("fastcall",
9775 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9776 && !lookup_attribute ("thiscall",
9777 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9784 /* Return minimum incoming stack alignment. */
9787 ix86_minimum_incoming_stack_boundary (bool sibcall)
9789 unsigned int incoming_stack_boundary;
9791 /* Prefer the one specified at command line. */
9792 if (ix86_user_incoming_stack_boundary)
9793 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9794 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9795 if -mstackrealign is used, it isn't used for sibcall check and
9796 estimated stack alignment is 128bit. */
9799 && ix86_force_align_arg_pointer
9800 && crtl->stack_alignment_estimated == 128)
9801 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9803 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9805 /* Incoming stack alignment can be changed on individual functions
9806 via force_align_arg_pointer attribute. We use the smallest
9807 incoming stack boundary. */
9808 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9809 && lookup_attribute (ix86_force_align_arg_pointer_string,
9810 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9811 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9813 /* The incoming stack frame has to be aligned at least at
9814 parm_stack_boundary. */
9815 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9816 incoming_stack_boundary = crtl->parm_stack_boundary;
9818 /* Stack at entrance of main is aligned by runtime. We use the
9819 smallest incoming stack boundary. */
9820 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9821 && DECL_NAME (current_function_decl)
9822 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9823 && DECL_FILE_SCOPE_P (current_function_decl))
9824 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9826 return incoming_stack_boundary;
9829 /* Update incoming stack boundary and estimated stack alignment. */
9832 ix86_update_stack_boundary (void)
9834 ix86_incoming_stack_boundary
9835 = ix86_minimum_incoming_stack_boundary (false);
9837 /* x86_64 vararg needs 16byte stack alignment for register save
9841 && crtl->stack_alignment_estimated < 128)
9842 crtl->stack_alignment_estimated = 128;
9845 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9846 needed or an rtx for DRAP otherwise. */
9849 ix86_get_drap_rtx (void)
9851 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9852 crtl->need_drap = true;
9854 if (stack_realign_drap)
9856 /* Assign DRAP to vDRAP and returns vDRAP */
9857 unsigned int regno = find_drap_reg ();
9862 arg_ptr = gen_rtx_REG (Pmode, regno);
9863 crtl->drap_reg = arg_ptr;
9866 drap_vreg = copy_to_reg (arg_ptr);
9870 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9873 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9874 RTX_FRAME_RELATED_P (insn) = 1;
9882 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9885 ix86_internal_arg_pointer (void)
9887 return virtual_incoming_args_rtx;
9890 struct scratch_reg {
9895 /* Return a short-lived scratch register for use on function entry.
9896 In 32-bit mode, it is valid only after the registers are saved
9897 in the prologue. This register must be released by means of
9898 release_scratch_register_on_entry once it is dead. */
9901 get_scratch_register_on_entry (struct scratch_reg *sr)
9909 /* We always use R11 in 64-bit mode. */
9914 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9916 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9917 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9918 int regparm = ix86_function_regparm (fntype, decl);
9920 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9922 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9923 for the static chain register. */
9924 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9925 && drap_regno != AX_REG)
9927 else if (regparm < 2 && drap_regno != DX_REG)
9929 /* ecx is the static chain register. */
9930 else if (regparm < 3 && !fastcall_p && !static_chain_p
9931 && drap_regno != CX_REG)
9933 else if (ix86_save_reg (BX_REG, true))
9935 /* esi is the static chain register. */
9936 else if (!(regparm == 3 && static_chain_p)
9937 && ix86_save_reg (SI_REG, true))
9939 else if (ix86_save_reg (DI_REG, true))
9943 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9948 sr->reg = gen_rtx_REG (Pmode, regno);
9951 rtx insn = emit_insn (gen_push (sr->reg));
9952 RTX_FRAME_RELATED_P (insn) = 1;
9956 /* Release a scratch register obtained from the preceding function. */
9959 release_scratch_register_on_entry (struct scratch_reg *sr)
9963 rtx x, insn = emit_insn (gen_pop (sr->reg));
9965 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9966 RTX_FRAME_RELATED_P (insn) = 1;
9967 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9968 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9969 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9973 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9975 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9978 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9980 /* We skip the probe for the first interval + a small dope of 4 words and
9981 probe that many bytes past the specified size to maintain a protection
9982 area at the botton of the stack. */
9983 const int dope = 4 * UNITS_PER_WORD;
9984 rtx size_rtx = GEN_INT (size);
9986 /* See if we have a constant small number of probes to generate. If so,
9987 that's the easy case. The run-time loop is made up of 11 insns in the
9988 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9989 for n # of intervals. */
9990 if (size <= 5 * PROBE_INTERVAL)
9992 HOST_WIDE_INT i, adjust;
9993 bool first_probe = true;
9995 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9996 values of N from 1 until it exceeds SIZE. If only one probe is
9997 needed, this will not generate any code. Then adjust and probe
9998 to PROBE_INTERVAL + SIZE. */
9999 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10003 adjust = 2 * PROBE_INTERVAL + dope;
10004 first_probe = false;
10007 adjust = PROBE_INTERVAL;
10009 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10010 plus_constant (stack_pointer_rtx, -adjust)));
10011 emit_stack_probe (stack_pointer_rtx);
10015 adjust = size + PROBE_INTERVAL + dope;
10017 adjust = size + PROBE_INTERVAL - i;
10019 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10020 plus_constant (stack_pointer_rtx, -adjust)));
10021 emit_stack_probe (stack_pointer_rtx);
10023 /* Adjust back to account for the additional first interval. */
10024 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10025 plus_constant (stack_pointer_rtx,
10026 PROBE_INTERVAL + dope)));
10029 /* Otherwise, do the same as above, but in a loop. Note that we must be
10030 extra careful with variables wrapping around because we might be at
10031 the very top (or the very bottom) of the address space and we have
10032 to be able to handle this case properly; in particular, we use an
10033 equality test for the loop condition. */
10036 HOST_WIDE_INT rounded_size;
10037 struct scratch_reg sr;
10039 get_scratch_register_on_entry (&sr);
10042 /* Step 1: round SIZE to the previous multiple of the interval. */
10044 rounded_size = size & -PROBE_INTERVAL;
10047 /* Step 2: compute initial and final value of the loop counter. */
10049 /* SP = SP_0 + PROBE_INTERVAL. */
10050 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10051 plus_constant (stack_pointer_rtx,
10052 - (PROBE_INTERVAL + dope))));
10054 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10055 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10056 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10057 gen_rtx_PLUS (Pmode, sr.reg,
10058 stack_pointer_rtx)));
10061 /* Step 3: the loop
10063 while (SP != LAST_ADDR)
10065 SP = SP + PROBE_INTERVAL
10069 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10070 values of N from 1 until it is equal to ROUNDED_SIZE. */
10072 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10075 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10076 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10078 if (size != rounded_size)
10080 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10081 plus_constant (stack_pointer_rtx,
10082 rounded_size - size)));
10083 emit_stack_probe (stack_pointer_rtx);
10086 /* Adjust back to account for the additional first interval. */
10087 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10088 plus_constant (stack_pointer_rtx,
10089 PROBE_INTERVAL + dope)));
10091 release_scratch_register_on_entry (&sr);
10094 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10095 cfun->machine->fs.sp_offset += size;
10097 /* Make sure nothing is scheduled before we are done. */
10098 emit_insn (gen_blockage ());
10101 /* Adjust the stack pointer up to REG while probing it. */
10104 output_adjust_stack_and_probe (rtx reg)
10106 static int labelno = 0;
10107 char loop_lab[32], end_lab[32];
10110 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10111 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10113 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10115 /* Jump to END_LAB if SP == LAST_ADDR. */
10116 xops[0] = stack_pointer_rtx;
10118 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10119 fputs ("\tje\t", asm_out_file);
10120 assemble_name_raw (asm_out_file, end_lab);
10121 fputc ('\n', asm_out_file);
10123 /* SP = SP + PROBE_INTERVAL. */
10124 xops[1] = GEN_INT (PROBE_INTERVAL);
10125 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10128 xops[1] = const0_rtx;
10129 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10131 fprintf (asm_out_file, "\tjmp\t");
10132 assemble_name_raw (asm_out_file, loop_lab);
10133 fputc ('\n', asm_out_file);
10135 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10140 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10141 inclusive. These are offsets from the current stack pointer. */
10144 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10146 /* See if we have a constant small number of probes to generate. If so,
10147 that's the easy case. The run-time loop is made up of 7 insns in the
10148 generic case while the compile-time loop is made up of n insns for n #
10150 if (size <= 7 * PROBE_INTERVAL)
10154 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10155 it exceeds SIZE. If only one probe is needed, this will not
10156 generate any code. Then probe at FIRST + SIZE. */
10157 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10158 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10160 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10163 /* Otherwise, do the same as above, but in a loop. Note that we must be
10164 extra careful with variables wrapping around because we might be at
10165 the very top (or the very bottom) of the address space and we have
10166 to be able to handle this case properly; in particular, we use an
10167 equality test for the loop condition. */
10170 HOST_WIDE_INT rounded_size, last;
10171 struct scratch_reg sr;
10173 get_scratch_register_on_entry (&sr);
10176 /* Step 1: round SIZE to the previous multiple of the interval. */
10178 rounded_size = size & -PROBE_INTERVAL;
10181 /* Step 2: compute initial and final value of the loop counter. */
10183 /* TEST_OFFSET = FIRST. */
10184 emit_move_insn (sr.reg, GEN_INT (-first));
10186 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10187 last = first + rounded_size;
10190 /* Step 3: the loop
10192 while (TEST_ADDR != LAST_ADDR)
10194 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10198 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10199 until it is equal to ROUNDED_SIZE. */
10201 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10204 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10205 that SIZE is equal to ROUNDED_SIZE. */
10207 if (size != rounded_size)
10208 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10211 rounded_size - size));
10213 release_scratch_register_on_entry (&sr);
10216 /* Make sure nothing is scheduled before we are done. */
10217 emit_insn (gen_blockage ());
10220 /* Probe a range of stack addresses from REG to END, inclusive. These are
10221 offsets from the current stack pointer. */
10224 output_probe_stack_range (rtx reg, rtx end)
10226 static int labelno = 0;
10227 char loop_lab[32], end_lab[32];
10230 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10231 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10233 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10235 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10238 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10239 fputs ("\tje\t", asm_out_file);
10240 assemble_name_raw (asm_out_file, end_lab);
10241 fputc ('\n', asm_out_file);
10243 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10244 xops[1] = GEN_INT (PROBE_INTERVAL);
10245 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10247 /* Probe at TEST_ADDR. */
10248 xops[0] = stack_pointer_rtx;
10250 xops[2] = const0_rtx;
10251 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10253 fprintf (asm_out_file, "\tjmp\t");
10254 assemble_name_raw (asm_out_file, loop_lab);
10255 fputc ('\n', asm_out_file);
10257 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10262 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10263 to be generated in correct form. */
10265 ix86_finalize_stack_realign_flags (void)
10267 /* Check if stack realign is really needed after reload, and
10268 stores result in cfun */
10269 unsigned int incoming_stack_boundary
10270 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10271 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10272 unsigned int stack_realign = (incoming_stack_boundary
10273 < (current_function_is_leaf
10274 ? crtl->max_used_stack_slot_alignment
10275 : crtl->stack_alignment_needed));
10277 if (crtl->stack_realign_finalized)
10279 /* After stack_realign_needed is finalized, we can't no longer
10281 gcc_assert (crtl->stack_realign_needed == stack_realign);
10285 crtl->stack_realign_needed = stack_realign;
10286 crtl->stack_realign_finalized = true;
10290 /* Expand the prologue into a bunch of separate insns. */
10293 ix86_expand_prologue (void)
10295 struct machine_function *m = cfun->machine;
10298 struct ix86_frame frame;
10299 HOST_WIDE_INT allocate;
10300 bool int_registers_saved;
10302 ix86_finalize_stack_realign_flags ();
10304 /* DRAP should not coexist with stack_realign_fp */
10305 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10307 memset (&m->fs, 0, sizeof (m->fs));
10309 /* Initialize CFA state for before the prologue. */
10310 m->fs.cfa_reg = stack_pointer_rtx;
10311 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10313 /* Track SP offset to the CFA. We continue tracking this after we've
10314 swapped the CFA register away from SP. In the case of re-alignment
10315 this is fudged; we're interested to offsets within the local frame. */
10316 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10317 m->fs.sp_valid = true;
10319 ix86_compute_frame_layout (&frame);
10321 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10323 /* We should have already generated an error for any use of
10324 ms_hook on a nested function. */
10325 gcc_checking_assert (!ix86_static_chain_on_stack);
10327 /* Check if profiling is active and we shall use profiling before
10328 prologue variant. If so sorry. */
10329 if (crtl->profile && flag_fentry != 0)
10330 sorry ("ms_hook_prologue attribute isn%'t compatible "
10331 "with -mfentry for 32-bit");
10333 /* In ix86_asm_output_function_label we emitted:
10334 8b ff movl.s %edi,%edi
10336 8b ec movl.s %esp,%ebp
10338 This matches the hookable function prologue in Win32 API
10339 functions in Microsoft Windows XP Service Pack 2 and newer.
10340 Wine uses this to enable Windows apps to hook the Win32 API
10341 functions provided by Wine.
10343 What that means is that we've already set up the frame pointer. */
10345 if (frame_pointer_needed
10346 && !(crtl->drap_reg && crtl->stack_realign_needed))
10350 /* We've decided to use the frame pointer already set up.
10351 Describe this to the unwinder by pretending that both
10352 push and mov insns happen right here.
10354 Putting the unwind info here at the end of the ms_hook
10355 is done so that we can make absolutely certain we get
10356 the required byte sequence at the start of the function,
10357 rather than relying on an assembler that can produce
10358 the exact encoding required.
10360 However it does mean (in the unpatched case) that we have
10361 a 1 insn window where the asynchronous unwind info is
10362 incorrect. However, if we placed the unwind info at
10363 its correct location we would have incorrect unwind info
10364 in the patched case. Which is probably all moot since
10365 I don't expect Wine generates dwarf2 unwind info for the
10366 system libraries that use this feature. */
10368 insn = emit_insn (gen_blockage ());
10370 push = gen_push (hard_frame_pointer_rtx);
10371 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10372 stack_pointer_rtx);
10373 RTX_FRAME_RELATED_P (push) = 1;
10374 RTX_FRAME_RELATED_P (mov) = 1;
10376 RTX_FRAME_RELATED_P (insn) = 1;
10377 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10378 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10380 /* Note that gen_push incremented m->fs.cfa_offset, even
10381 though we didn't emit the push insn here. */
10382 m->fs.cfa_reg = hard_frame_pointer_rtx;
10383 m->fs.fp_offset = m->fs.cfa_offset;
10384 m->fs.fp_valid = true;
10388 /* The frame pointer is not needed so pop %ebp again.
10389 This leaves us with a pristine state. */
10390 emit_insn (gen_pop (hard_frame_pointer_rtx));
10394 /* The first insn of a function that accepts its static chain on the
10395 stack is to push the register that would be filled in by a direct
10396 call. This insn will be skipped by the trampoline. */
10397 else if (ix86_static_chain_on_stack)
10399 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10400 emit_insn (gen_blockage ());
10402 /* We don't want to interpret this push insn as a register save,
10403 only as a stack adjustment. The real copy of the register as
10404 a save will be done later, if needed. */
10405 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10406 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10407 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10408 RTX_FRAME_RELATED_P (insn) = 1;
10411 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10412 of DRAP is needed and stack realignment is really needed after reload */
10413 if (stack_realign_drap)
10415 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10417 /* Only need to push parameter pointer reg if it is caller saved. */
10418 if (!call_used_regs[REGNO (crtl->drap_reg)])
10420 /* Push arg pointer reg */
10421 insn = emit_insn (gen_push (crtl->drap_reg));
10422 RTX_FRAME_RELATED_P (insn) = 1;
10425 /* Grab the argument pointer. */
10426 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10427 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10428 RTX_FRAME_RELATED_P (insn) = 1;
10429 m->fs.cfa_reg = crtl->drap_reg;
10430 m->fs.cfa_offset = 0;
10432 /* Align the stack. */
10433 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10435 GEN_INT (-align_bytes)));
10436 RTX_FRAME_RELATED_P (insn) = 1;
10438 /* Replicate the return address on the stack so that return
10439 address can be reached via (argp - 1) slot. This is needed
10440 to implement macro RETURN_ADDR_RTX and intrinsic function
10441 expand_builtin_return_addr etc. */
10442 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10443 t = gen_frame_mem (Pmode, t);
10444 insn = emit_insn (gen_push (t));
10445 RTX_FRAME_RELATED_P (insn) = 1;
10447 /* For the purposes of frame and register save area addressing,
10448 we've started over with a new frame. */
10449 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10450 m->fs.realigned = true;
10453 if (frame_pointer_needed && !m->fs.fp_valid)
10455 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10456 slower on all targets. Also sdb doesn't like it. */
10457 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10458 RTX_FRAME_RELATED_P (insn) = 1;
10460 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10462 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10463 RTX_FRAME_RELATED_P (insn) = 1;
10465 if (m->fs.cfa_reg == stack_pointer_rtx)
10466 m->fs.cfa_reg = hard_frame_pointer_rtx;
10467 m->fs.fp_offset = m->fs.sp_offset;
10468 m->fs.fp_valid = true;
10472 int_registers_saved = (frame.nregs == 0);
10474 if (!int_registers_saved)
10476 /* If saving registers via PUSH, do so now. */
10477 if (!frame.save_regs_using_mov)
10479 ix86_emit_save_regs ();
10480 int_registers_saved = true;
10481 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10484 /* When using red zone we may start register saving before allocating
10485 the stack frame saving one cycle of the prologue. However, avoid
10486 doing this if we have to probe the stack; at least on x86_64 the
10487 stack probe can turn into a call that clobbers a red zone location. */
10488 else if (ix86_using_red_zone ()
10489 && (! TARGET_STACK_PROBE
10490 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10492 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10493 int_registers_saved = true;
10497 if (stack_realign_fp)
10499 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10500 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10502 /* The computation of the size of the re-aligned stack frame means
10503 that we must allocate the size of the register save area before
10504 performing the actual alignment. Otherwise we cannot guarantee
10505 that there's enough storage above the realignment point. */
10506 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10507 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10508 GEN_INT (m->fs.sp_offset
10509 - frame.sse_reg_save_offset),
10512 /* Align the stack. */
10513 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10515 GEN_INT (-align_bytes)));
10517 /* For the purposes of register save area addressing, the stack
10518 pointer is no longer valid. As for the value of sp_offset,
10519 see ix86_compute_frame_layout, which we need to match in order
10520 to pass verification of stack_pointer_offset at the end. */
10521 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10522 m->fs.sp_valid = false;
10525 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10527 if (flag_stack_usage)
10529 /* We start to count from ARG_POINTER. */
10530 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10532 /* If it was realigned, take into account the fake frame. */
10533 if (stack_realign_drap)
10535 if (ix86_static_chain_on_stack)
10536 stack_size += UNITS_PER_WORD;
10538 if (!call_used_regs[REGNO (crtl->drap_reg)])
10539 stack_size += UNITS_PER_WORD;
10541 /* This over-estimates by 1 minimal-stack-alignment-unit but
10542 mitigates that by counting in the new return address slot. */
10543 current_function_dynamic_stack_size
10544 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10547 current_function_static_stack_size = stack_size;
10550 /* The stack has already been decremented by the instruction calling us
10551 so we need to probe unconditionally to preserve the protection area. */
10552 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10554 /* We expect the registers to be saved when probes are used. */
10555 gcc_assert (int_registers_saved);
10557 if (STACK_CHECK_MOVING_SP)
10559 ix86_adjust_stack_and_probe (allocate);
10564 HOST_WIDE_INT size = allocate;
10566 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10567 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10569 if (TARGET_STACK_PROBE)
10570 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10572 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10578 else if (!ix86_target_stack_probe ()
10579 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10581 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10582 GEN_INT (-allocate), -1,
10583 m->fs.cfa_reg == stack_pointer_rtx);
10587 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10589 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10591 bool eax_live = false;
10592 bool r10_live = false;
10595 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10596 if (!TARGET_64BIT_MS_ABI)
10597 eax_live = ix86_eax_live_at_start_p ();
10601 emit_insn (gen_push (eax));
10602 allocate -= UNITS_PER_WORD;
10606 r10 = gen_rtx_REG (Pmode, R10_REG);
10607 emit_insn (gen_push (r10));
10608 allocate -= UNITS_PER_WORD;
10611 emit_move_insn (eax, GEN_INT (allocate));
10612 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10614 /* Use the fact that AX still contains ALLOCATE. */
10615 adjust_stack_insn = (TARGET_64BIT
10616 ? gen_pro_epilogue_adjust_stack_di_sub
10617 : gen_pro_epilogue_adjust_stack_si_sub);
10619 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10620 stack_pointer_rtx, eax));
10622 /* Note that SEH directives need to continue tracking the stack
10623 pointer even after the frame pointer has been set up. */
10624 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10626 if (m->fs.cfa_reg == stack_pointer_rtx)
10627 m->fs.cfa_offset += allocate;
10629 RTX_FRAME_RELATED_P (insn) = 1;
10630 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10631 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10632 plus_constant (stack_pointer_rtx,
10635 m->fs.sp_offset += allocate;
10637 if (r10_live && eax_live)
10639 t = choose_baseaddr (m->fs.sp_offset - allocate);
10640 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10641 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10642 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10644 else if (eax_live || r10_live)
10646 t = choose_baseaddr (m->fs.sp_offset - allocate);
10647 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10650 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10652 /* If we havn't already set up the frame pointer, do so now. */
10653 if (frame_pointer_needed && !m->fs.fp_valid)
10655 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10656 GEN_INT (frame.stack_pointer_offset
10657 - frame.hard_frame_pointer_offset));
10658 insn = emit_insn (insn);
10659 RTX_FRAME_RELATED_P (insn) = 1;
10660 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10662 if (m->fs.cfa_reg == stack_pointer_rtx)
10663 m->fs.cfa_reg = hard_frame_pointer_rtx;
10664 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10665 m->fs.fp_valid = true;
10668 if (!int_registers_saved)
10669 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10670 if (frame.nsseregs)
10671 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10673 pic_reg_used = false;
10674 if (pic_offset_table_rtx
10675 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10678 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10680 if (alt_pic_reg_used != INVALID_REGNUM)
10681 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10683 pic_reg_used = true;
10690 if (ix86_cmodel == CM_LARGE_PIC)
10692 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10693 rtx label = gen_label_rtx ();
10694 emit_label (label);
10695 LABEL_PRESERVE_P (label) = 1;
10696 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10697 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10698 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10699 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10700 pic_offset_table_rtx, tmp_reg));
10703 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10706 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10709 /* In the pic_reg_used case, make sure that the got load isn't deleted
10710 when mcount needs it. Blockage to avoid call movement across mcount
10711 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10713 if (crtl->profile && !flag_fentry && pic_reg_used)
10714 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10716 if (crtl->drap_reg && !crtl->stack_realign_needed)
10718 /* vDRAP is setup but after reload it turns out stack realign
10719 isn't necessary, here we will emit prologue to setup DRAP
10720 without stack realign adjustment */
10721 t = choose_baseaddr (0);
10722 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10725 /* Prevent instructions from being scheduled into register save push
10726 sequence when access to the redzone area is done through frame pointer.
10727 The offset between the frame pointer and the stack pointer is calculated
10728 relative to the value of the stack pointer at the end of the function
10729 prologue, and moving instructions that access redzone area via frame
10730 pointer inside push sequence violates this assumption. */
10731 if (frame_pointer_needed && frame.red_zone_size)
10732 emit_insn (gen_memory_blockage ());
10734 /* Emit cld instruction if stringops are used in the function. */
10735 if (TARGET_CLD && ix86_current_function_needs_cld)
10736 emit_insn (gen_cld ());
10738 /* SEH requires that the prologue end within 256 bytes of the start of
10739 the function. Prevent instruction schedules that would extend that. */
10741 emit_insn (gen_blockage ());
10744 /* Emit code to restore REG using a POP insn. */
10747 ix86_emit_restore_reg_using_pop (rtx reg)
10749 struct machine_function *m = cfun->machine;
10750 rtx insn = emit_insn (gen_pop (reg));
10752 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10753 m->fs.sp_offset -= UNITS_PER_WORD;
10755 if (m->fs.cfa_reg == crtl->drap_reg
10756 && REGNO (reg) == REGNO (crtl->drap_reg))
10758 /* Previously we'd represented the CFA as an expression
10759 like *(%ebp - 8). We've just popped that value from
10760 the stack, which means we need to reset the CFA to
10761 the drap register. This will remain until we restore
10762 the stack pointer. */
10763 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10764 RTX_FRAME_RELATED_P (insn) = 1;
10766 /* This means that the DRAP register is valid for addressing too. */
10767 m->fs.drap_valid = true;
10771 if (m->fs.cfa_reg == stack_pointer_rtx)
10773 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10774 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10775 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10776 RTX_FRAME_RELATED_P (insn) = 1;
10778 m->fs.cfa_offset -= UNITS_PER_WORD;
10781 /* When the frame pointer is the CFA, and we pop it, we are
10782 swapping back to the stack pointer as the CFA. This happens
10783 for stack frames that don't allocate other data, so we assume
10784 the stack pointer is now pointing at the return address, i.e.
10785 the function entry state, which makes the offset be 1 word. */
10786 if (reg == hard_frame_pointer_rtx)
10788 m->fs.fp_valid = false;
10789 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10791 m->fs.cfa_reg = stack_pointer_rtx;
10792 m->fs.cfa_offset -= UNITS_PER_WORD;
10794 add_reg_note (insn, REG_CFA_DEF_CFA,
10795 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10796 GEN_INT (m->fs.cfa_offset)));
10797 RTX_FRAME_RELATED_P (insn) = 1;
10802 /* Emit code to restore saved registers using POP insns. */
10805 ix86_emit_restore_regs_using_pop (void)
10807 unsigned int regno;
10809 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10810 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10811 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10814 /* Emit code and notes for the LEAVE instruction. */
10817 ix86_emit_leave (void)
10819 struct machine_function *m = cfun->machine;
10820 rtx insn = emit_insn (ix86_gen_leave ());
10822 ix86_add_queued_cfa_restore_notes (insn);
10824 gcc_assert (m->fs.fp_valid);
10825 m->fs.sp_valid = true;
10826 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10827 m->fs.fp_valid = false;
10829 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10831 m->fs.cfa_reg = stack_pointer_rtx;
10832 m->fs.cfa_offset = m->fs.sp_offset;
10834 add_reg_note (insn, REG_CFA_DEF_CFA,
10835 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10836 RTX_FRAME_RELATED_P (insn) = 1;
10837 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10842 /* Emit code to restore saved registers using MOV insns.
10843 First register is restored from CFA - CFA_OFFSET. */
10845 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10846 int maybe_eh_return)
10848 struct machine_function *m = cfun->machine;
10849 unsigned int regno;
10851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10852 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10854 rtx reg = gen_rtx_REG (Pmode, regno);
10857 mem = choose_baseaddr (cfa_offset);
10858 mem = gen_frame_mem (Pmode, mem);
10859 insn = emit_move_insn (reg, mem);
10861 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10863 /* Previously we'd represented the CFA as an expression
10864 like *(%ebp - 8). We've just popped that value from
10865 the stack, which means we need to reset the CFA to
10866 the drap register. This will remain until we restore
10867 the stack pointer. */
10868 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10869 RTX_FRAME_RELATED_P (insn) = 1;
10871 /* This means that the DRAP register is valid for addressing. */
10872 m->fs.drap_valid = true;
10875 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10877 cfa_offset -= UNITS_PER_WORD;
10881 /* Emit code to restore saved registers using MOV insns.
10882 First register is restored from CFA - CFA_OFFSET. */
10884 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10885 int maybe_eh_return)
10887 unsigned int regno;
10889 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10890 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10892 rtx reg = gen_rtx_REG (V4SFmode, regno);
10895 mem = choose_baseaddr (cfa_offset);
10896 mem = gen_rtx_MEM (V4SFmode, mem);
10897 set_mem_align (mem, 128);
10898 emit_move_insn (reg, mem);
10900 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10906 /* Restore function stack, frame, and registers. */
10909 ix86_expand_epilogue (int style)
10911 struct machine_function *m = cfun->machine;
10912 struct machine_frame_state frame_state_save = m->fs;
10913 struct ix86_frame frame;
10914 bool restore_regs_via_mov;
10917 ix86_finalize_stack_realign_flags ();
10918 ix86_compute_frame_layout (&frame);
10920 m->fs.sp_valid = (!frame_pointer_needed
10921 || (current_function_sp_is_unchanging
10922 && !stack_realign_fp));
10923 gcc_assert (!m->fs.sp_valid
10924 || m->fs.sp_offset == frame.stack_pointer_offset);
10926 /* The FP must be valid if the frame pointer is present. */
10927 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10928 gcc_assert (!m->fs.fp_valid
10929 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10931 /* We must have *some* valid pointer to the stack frame. */
10932 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10934 /* The DRAP is never valid at this point. */
10935 gcc_assert (!m->fs.drap_valid);
10937 /* See the comment about red zone and frame
10938 pointer usage in ix86_expand_prologue. */
10939 if (frame_pointer_needed && frame.red_zone_size)
10940 emit_insn (gen_memory_blockage ());
10942 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10943 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10945 /* Determine the CFA offset of the end of the red-zone. */
10946 m->fs.red_zone_offset = 0;
10947 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10949 /* The red-zone begins below the return address. */
10950 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10952 /* When the register save area is in the aligned portion of
10953 the stack, determine the maximum runtime displacement that
10954 matches up with the aligned frame. */
10955 if (stack_realign_drap)
10956 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10960 /* Special care must be taken for the normal return case of a function
10961 using eh_return: the eax and edx registers are marked as saved, but
10962 not restored along this path. Adjust the save location to match. */
10963 if (crtl->calls_eh_return && style != 2)
10964 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10966 /* EH_RETURN requires the use of moves to function properly. */
10967 if (crtl->calls_eh_return)
10968 restore_regs_via_mov = true;
10969 /* SEH requires the use of pops to identify the epilogue. */
10970 else if (TARGET_SEH)
10971 restore_regs_via_mov = false;
10972 /* If we're only restoring one register and sp is not valid then
10973 using a move instruction to restore the register since it's
10974 less work than reloading sp and popping the register. */
10975 else if (!m->fs.sp_valid && frame.nregs <= 1)
10976 restore_regs_via_mov = true;
10977 else if (TARGET_EPILOGUE_USING_MOVE
10978 && cfun->machine->use_fast_prologue_epilogue
10979 && (frame.nregs > 1
10980 || m->fs.sp_offset != frame.reg_save_offset))
10981 restore_regs_via_mov = true;
10982 else if (frame_pointer_needed
10984 && m->fs.sp_offset != frame.reg_save_offset)
10985 restore_regs_via_mov = true;
10986 else if (frame_pointer_needed
10987 && TARGET_USE_LEAVE
10988 && cfun->machine->use_fast_prologue_epilogue
10989 && frame.nregs == 1)
10990 restore_regs_via_mov = true;
10992 restore_regs_via_mov = false;
10994 if (restore_regs_via_mov || frame.nsseregs)
10996 /* Ensure that the entire register save area is addressable via
10997 the stack pointer, if we will restore via sp. */
10999 && m->fs.sp_offset > 0x7fffffff
11000 && !(m->fs.fp_valid || m->fs.drap_valid)
11001 && (frame.nsseregs + frame.nregs) != 0)
11003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11004 GEN_INT (m->fs.sp_offset
11005 - frame.sse_reg_save_offset),
11007 m->fs.cfa_reg == stack_pointer_rtx);
11011 /* If there are any SSE registers to restore, then we have to do it
11012 via moves, since there's obviously no pop for SSE regs. */
11013 if (frame.nsseregs)
11014 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11017 if (restore_regs_via_mov)
11022 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11024 /* eh_return epilogues need %ecx added to the stack pointer. */
11027 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11029 /* Stack align doesn't work with eh_return. */
11030 gcc_assert (!stack_realign_drap);
11031 /* Neither does regparm nested functions. */
11032 gcc_assert (!ix86_static_chain_on_stack);
11034 if (frame_pointer_needed)
11036 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11037 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
11038 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11040 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11041 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11043 /* Note that we use SA as a temporary CFA, as the return
11044 address is at the proper place relative to it. We
11045 pretend this happens at the FP restore insn because
11046 prior to this insn the FP would be stored at the wrong
11047 offset relative to SA, and after this insn we have no
11048 other reasonable register to use for the CFA. We don't
11049 bother resetting the CFA to the SP for the duration of
11050 the return insn. */
11051 add_reg_note (insn, REG_CFA_DEF_CFA,
11052 plus_constant (sa, UNITS_PER_WORD));
11053 ix86_add_queued_cfa_restore_notes (insn);
11054 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11055 RTX_FRAME_RELATED_P (insn) = 1;
11057 m->fs.cfa_reg = sa;
11058 m->fs.cfa_offset = UNITS_PER_WORD;
11059 m->fs.fp_valid = false;
11061 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11062 const0_rtx, style, false);
11066 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11067 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
11068 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11069 ix86_add_queued_cfa_restore_notes (insn);
11071 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11072 if (m->fs.cfa_offset != UNITS_PER_WORD)
11074 m->fs.cfa_offset = UNITS_PER_WORD;
11075 add_reg_note (insn, REG_CFA_DEF_CFA,
11076 plus_constant (stack_pointer_rtx,
11078 RTX_FRAME_RELATED_P (insn) = 1;
11081 m->fs.sp_offset = UNITS_PER_WORD;
11082 m->fs.sp_valid = true;
11087 /* SEH requires that the function end with (1) a stack adjustment
11088 if necessary, (2) a sequence of pops, and (3) a return or
11089 jump instruction. Prevent insns from the function body from
11090 being scheduled into this sequence. */
11093 /* Prevent a catch region from being adjacent to the standard
11094 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11095 several other flags that would be interesting to test are
11097 if (flag_non_call_exceptions)
11098 emit_insn (gen_nops (const1_rtx));
11100 emit_insn (gen_blockage ());
11103 /* First step is to deallocate the stack frame so that we can
11104 pop the registers. */
11105 if (!m->fs.sp_valid)
11107 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11108 GEN_INT (m->fs.fp_offset
11109 - frame.reg_save_offset),
11112 else if (m->fs.sp_offset != frame.reg_save_offset)
11114 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11115 GEN_INT (m->fs.sp_offset
11116 - frame.reg_save_offset),
11118 m->fs.cfa_reg == stack_pointer_rtx);
11121 ix86_emit_restore_regs_using_pop ();
11124 /* If we used a stack pointer and haven't already got rid of it,
11126 if (m->fs.fp_valid)
11128 /* If the stack pointer is valid and pointing at the frame
11129 pointer store address, then we only need a pop. */
11130 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11131 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11132 /* Leave results in shorter dependency chains on CPUs that are
11133 able to grok it fast. */
11134 else if (TARGET_USE_LEAVE
11135 || optimize_function_for_size_p (cfun)
11136 || !cfun->machine->use_fast_prologue_epilogue)
11137 ix86_emit_leave ();
11140 pro_epilogue_adjust_stack (stack_pointer_rtx,
11141 hard_frame_pointer_rtx,
11142 const0_rtx, style, !using_drap);
11143 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11149 int param_ptr_offset = UNITS_PER_WORD;
11152 gcc_assert (stack_realign_drap);
11154 if (ix86_static_chain_on_stack)
11155 param_ptr_offset += UNITS_PER_WORD;
11156 if (!call_used_regs[REGNO (crtl->drap_reg)])
11157 param_ptr_offset += UNITS_PER_WORD;
11159 insn = emit_insn (gen_rtx_SET
11160 (VOIDmode, stack_pointer_rtx,
11161 gen_rtx_PLUS (Pmode,
11163 GEN_INT (-param_ptr_offset))));
11164 m->fs.cfa_reg = stack_pointer_rtx;
11165 m->fs.cfa_offset = param_ptr_offset;
11166 m->fs.sp_offset = param_ptr_offset;
11167 m->fs.realigned = false;
11169 add_reg_note (insn, REG_CFA_DEF_CFA,
11170 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11171 GEN_INT (param_ptr_offset)));
11172 RTX_FRAME_RELATED_P (insn) = 1;
11174 if (!call_used_regs[REGNO (crtl->drap_reg)])
11175 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11178 /* At this point the stack pointer must be valid, and we must have
11179 restored all of the registers. We may not have deallocated the
11180 entire stack frame. We've delayed this until now because it may
11181 be possible to merge the local stack deallocation with the
11182 deallocation forced by ix86_static_chain_on_stack. */
11183 gcc_assert (m->fs.sp_valid);
11184 gcc_assert (!m->fs.fp_valid);
11185 gcc_assert (!m->fs.realigned);
11186 if (m->fs.sp_offset != UNITS_PER_WORD)
11188 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11189 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11193 /* Sibcall epilogues don't want a return instruction. */
11196 m->fs = frame_state_save;
11200 /* Emit vzeroupper if needed. */
11201 if (TARGET_VZEROUPPER
11202 && !TREE_THIS_VOLATILE (cfun->decl)
11203 && !cfun->machine->caller_return_avx256_p)
11204 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11206 if (crtl->args.pops_args && crtl->args.size)
11208 rtx popc = GEN_INT (crtl->args.pops_args);
11210 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11211 address, do explicit add, and jump indirectly to the caller. */
11213 if (crtl->args.pops_args >= 65536)
11215 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11218 /* There is no "pascal" calling convention in any 64bit ABI. */
11219 gcc_assert (!TARGET_64BIT);
11221 insn = emit_insn (gen_pop (ecx));
11222 m->fs.cfa_offset -= UNITS_PER_WORD;
11223 m->fs.sp_offset -= UNITS_PER_WORD;
11225 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11226 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11227 add_reg_note (insn, REG_CFA_REGISTER,
11228 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11229 RTX_FRAME_RELATED_P (insn) = 1;
11231 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11233 emit_jump_insn (gen_return_indirect_internal (ecx));
11236 emit_jump_insn (gen_return_pop_internal (popc));
11239 emit_jump_insn (gen_return_internal ());
11241 /* Restore the state back to the state from the prologue,
11242 so that it's correct for the next epilogue. */
11243 m->fs = frame_state_save;
11246 /* Reset from the function's potential modifications. */
11249 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11250 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11252 if (pic_offset_table_rtx)
11253 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11255 /* Mach-O doesn't support labels at the end of objects, so if
11256 it looks like we might want one, insert a NOP. */
11258 rtx insn = get_last_insn ();
11261 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11262 insn = PREV_INSN (insn);
11266 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11267 fputs ("\tnop\n", file);
11273 /* Return a scratch register to use in the split stack prologue. The
11274 split stack prologue is used for -fsplit-stack. It is the first
11275 instructions in the function, even before the regular prologue.
11276 The scratch register can be any caller-saved register which is not
11277 used for parameters or for the static chain. */
11279 static unsigned int
11280 split_stack_prologue_scratch_regno (void)
11289 is_fastcall = (lookup_attribute ("fastcall",
11290 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11292 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11296 if (DECL_STATIC_CHAIN (cfun->decl))
11298 sorry ("-fsplit-stack does not support fastcall with "
11299 "nested function");
11300 return INVALID_REGNUM;
11304 else if (regparm < 3)
11306 if (!DECL_STATIC_CHAIN (cfun->decl))
11312 sorry ("-fsplit-stack does not support 2 register "
11313 " parameters for a nested function");
11314 return INVALID_REGNUM;
11321 /* FIXME: We could make this work by pushing a register
11322 around the addition and comparison. */
11323 sorry ("-fsplit-stack does not support 3 register parameters");
11324 return INVALID_REGNUM;
11329 /* A SYMBOL_REF for the function which allocates new stackspace for
11332 static GTY(()) rtx split_stack_fn;
11334 /* A SYMBOL_REF for the more stack function when using the large
11337 static GTY(()) rtx split_stack_fn_large;
11339 /* Handle -fsplit-stack. These are the first instructions in the
11340 function, even before the regular prologue. */
11343 ix86_expand_split_stack_prologue (void)
11345 struct ix86_frame frame;
11346 HOST_WIDE_INT allocate;
11347 unsigned HOST_WIDE_INT args_size;
11348 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11349 rtx scratch_reg = NULL_RTX;
11350 rtx varargs_label = NULL_RTX;
11353 gcc_assert (flag_split_stack && reload_completed);
11355 ix86_finalize_stack_realign_flags ();
11356 ix86_compute_frame_layout (&frame);
11357 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11359 /* This is the label we will branch to if we have enough stack
11360 space. We expect the basic block reordering pass to reverse this
11361 branch if optimizing, so that we branch in the unlikely case. */
11362 label = gen_label_rtx ();
11364 /* We need to compare the stack pointer minus the frame size with
11365 the stack boundary in the TCB. The stack boundary always gives
11366 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11367 can compare directly. Otherwise we need to do an addition. */
11369 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11370 UNSPEC_STACK_CHECK);
11371 limit = gen_rtx_CONST (Pmode, limit);
11372 limit = gen_rtx_MEM (Pmode, limit);
11373 if (allocate < SPLIT_STACK_AVAILABLE)
11374 current = stack_pointer_rtx;
11377 unsigned int scratch_regno;
11380 /* We need a scratch register to hold the stack pointer minus
11381 the required frame size. Since this is the very start of the
11382 function, the scratch register can be any caller-saved
11383 register which is not used for parameters. */
11384 offset = GEN_INT (- allocate);
11385 scratch_regno = split_stack_prologue_scratch_regno ();
11386 if (scratch_regno == INVALID_REGNUM)
11388 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11389 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11391 /* We don't use ix86_gen_add3 in this case because it will
11392 want to split to lea, but when not optimizing the insn
11393 will not be split after this point. */
11394 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11395 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11400 emit_move_insn (scratch_reg, offset);
11401 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11402 stack_pointer_rtx));
11404 current = scratch_reg;
11407 ix86_expand_branch (GEU, current, limit, label);
11408 jump_insn = get_last_insn ();
11409 JUMP_LABEL (jump_insn) = label;
11411 /* Mark the jump as very likely to be taken. */
11412 add_reg_note (jump_insn, REG_BR_PROB,
11413 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11415 if (split_stack_fn == NULL_RTX)
11416 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11417 fn = split_stack_fn;
11419 /* Get more stack space. We pass in the desired stack space and the
11420 size of the arguments to copy to the new stack. In 32-bit mode
11421 we push the parameters; __morestack will return on a new stack
11422 anyhow. In 64-bit mode we pass the parameters in r10 and
11424 allocate_rtx = GEN_INT (allocate);
11425 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11426 call_fusage = NULL_RTX;
11431 reg10 = gen_rtx_REG (Pmode, R10_REG);
11432 reg11 = gen_rtx_REG (Pmode, R11_REG);
11434 /* If this function uses a static chain, it will be in %r10.
11435 Preserve it across the call to __morestack. */
11436 if (DECL_STATIC_CHAIN (cfun->decl))
11440 rax = gen_rtx_REG (Pmode, AX_REG);
11441 emit_move_insn (rax, reg10);
11442 use_reg (&call_fusage, rax);
11445 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11447 HOST_WIDE_INT argval;
11449 /* When using the large model we need to load the address
11450 into a register, and we've run out of registers. So we
11451 switch to a different calling convention, and we call a
11452 different function: __morestack_large. We pass the
11453 argument size in the upper 32 bits of r10 and pass the
11454 frame size in the lower 32 bits. */
11455 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11456 gcc_assert ((args_size & 0xffffffff) == args_size);
11458 if (split_stack_fn_large == NULL_RTX)
11459 split_stack_fn_large =
11460 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11462 if (ix86_cmodel == CM_LARGE_PIC)
11466 label = gen_label_rtx ();
11467 emit_label (label);
11468 LABEL_PRESERVE_P (label) = 1;
11469 emit_insn (gen_set_rip_rex64 (reg10, label));
11470 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11471 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11472 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11474 x = gen_rtx_CONST (Pmode, x);
11475 emit_move_insn (reg11, x);
11476 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11477 x = gen_const_mem (Pmode, x);
11478 emit_move_insn (reg11, x);
11481 emit_move_insn (reg11, split_stack_fn_large);
11485 argval = ((args_size << 16) << 16) + allocate;
11486 emit_move_insn (reg10, GEN_INT (argval));
11490 emit_move_insn (reg10, allocate_rtx);
11491 emit_move_insn (reg11, GEN_INT (args_size));
11492 use_reg (&call_fusage, reg11);
11495 use_reg (&call_fusage, reg10);
11499 emit_insn (gen_push (GEN_INT (args_size)));
11500 emit_insn (gen_push (allocate_rtx));
11502 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11503 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11505 add_function_usage_to (call_insn, call_fusage);
11507 /* In order to make call/return prediction work right, we now need
11508 to execute a return instruction. See
11509 libgcc/config/i386/morestack.S for the details on how this works.
11511 For flow purposes gcc must not see this as a return
11512 instruction--we need control flow to continue at the subsequent
11513 label. Therefore, we use an unspec. */
11514 gcc_assert (crtl->args.pops_args < 65536);
11515 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11517 /* If we are in 64-bit mode and this function uses a static chain,
11518 we saved %r10 in %rax before calling _morestack. */
11519 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11520 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11521 gen_rtx_REG (Pmode, AX_REG));
11523 /* If this function calls va_start, we need to store a pointer to
11524 the arguments on the old stack, because they may not have been
11525 all copied to the new stack. At this point the old stack can be
11526 found at the frame pointer value used by __morestack, because
11527 __morestack has set that up before calling back to us. Here we
11528 store that pointer in a scratch register, and in
11529 ix86_expand_prologue we store the scratch register in a stack
11531 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11533 unsigned int scratch_regno;
11537 scratch_regno = split_stack_prologue_scratch_regno ();
11538 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11539 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11543 return address within this function
11544 return address of caller of this function
11546 So we add three words to get to the stack arguments.
11550 return address within this function
11551 first argument to __morestack
11552 second argument to __morestack
11553 return address of caller of this function
11555 So we add five words to get to the stack arguments.
11557 words = TARGET_64BIT ? 3 : 5;
11558 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11559 gen_rtx_PLUS (Pmode, frame_reg,
11560 GEN_INT (words * UNITS_PER_WORD))));
11562 varargs_label = gen_label_rtx ();
11563 emit_jump_insn (gen_jump (varargs_label));
11564 JUMP_LABEL (get_last_insn ()) = varargs_label;
11569 emit_label (label);
11570 LABEL_NUSES (label) = 1;
11572 /* If this function calls va_start, we now have to set the scratch
11573 register for the case where we do not call __morestack. In this
11574 case we need to set it based on the stack pointer. */
11575 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11577 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11578 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11579 GEN_INT (UNITS_PER_WORD))));
11581 emit_label (varargs_label);
11582 LABEL_NUSES (varargs_label) = 1;
11586 /* We may have to tell the dataflow pass that the split stack prologue
11587 is initializing a scratch register. */
11590 ix86_live_on_entry (bitmap regs)
11592 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11594 gcc_assert (flag_split_stack);
11595 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11599 /* Extract the parts of an RTL expression that is a valid memory address
11600 for an instruction. Return 0 if the structure of the address is
11601 grossly off. Return -1 if the address contains ASHIFT, so it is not
11602 strictly valid, but still used for computing length of lea instruction. */
11605 ix86_decompose_address (rtx addr, struct ix86_address *out)
11607 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11608 rtx base_reg, index_reg;
11609 HOST_WIDE_INT scale = 1;
11610 rtx scale_rtx = NULL_RTX;
11613 enum ix86_address_seg seg = SEG_DEFAULT;
11615 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11617 else if (GET_CODE (addr) == PLUS)
11619 rtx addends[4], op;
11627 addends[n++] = XEXP (op, 1);
11630 while (GET_CODE (op) == PLUS);
11635 for (i = n; i >= 0; --i)
11638 switch (GET_CODE (op))
11643 index = XEXP (op, 0);
11644 scale_rtx = XEXP (op, 1);
11650 index = XEXP (op, 0);
11651 tmp = XEXP (op, 1);
11652 if (!CONST_INT_P (tmp))
11654 scale = INTVAL (tmp);
11655 if ((unsigned HOST_WIDE_INT) scale > 3)
11657 scale = 1 << scale;
11661 if (XINT (op, 1) == UNSPEC_TP
11662 && TARGET_TLS_DIRECT_SEG_REFS
11663 && seg == SEG_DEFAULT)
11664 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11693 else if (GET_CODE (addr) == MULT)
11695 index = XEXP (addr, 0); /* index*scale */
11696 scale_rtx = XEXP (addr, 1);
11698 else if (GET_CODE (addr) == ASHIFT)
11700 /* We're called for lea too, which implements ashift on occasion. */
11701 index = XEXP (addr, 0);
11702 tmp = XEXP (addr, 1);
11703 if (!CONST_INT_P (tmp))
11705 scale = INTVAL (tmp);
11706 if ((unsigned HOST_WIDE_INT) scale > 3)
11708 scale = 1 << scale;
11712 disp = addr; /* displacement */
11714 /* Extract the integral value of scale. */
11717 if (!CONST_INT_P (scale_rtx))
11719 scale = INTVAL (scale_rtx);
11722 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11723 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11725 /* Avoid useless 0 displacement. */
11726 if (disp == const0_rtx && (base || index))
11729 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11730 if (base_reg && index_reg && scale == 1
11731 && (index_reg == arg_pointer_rtx
11732 || index_reg == frame_pointer_rtx
11733 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11736 tmp = base, base = index, index = tmp;
11737 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11740 /* Special case: %ebp cannot be encoded as a base without a displacement.
11744 && (base_reg == hard_frame_pointer_rtx
11745 || base_reg == frame_pointer_rtx
11746 || base_reg == arg_pointer_rtx
11747 || (REG_P (base_reg)
11748 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11749 || REGNO (base_reg) == R13_REG))))
11752 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11753 Avoid this by transforming to [%esi+0].
11754 Reload calls address legitimization without cfun defined, so we need
11755 to test cfun for being non-NULL. */
11756 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11757 && base_reg && !index_reg && !disp
11758 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11761 /* Special case: encode reg+reg instead of reg*2. */
11762 if (!base && index && scale == 2)
11763 base = index, base_reg = index_reg, scale = 1;
11765 /* Special case: scaling cannot be encoded without base or displacement. */
11766 if (!base && !disp && index && scale != 1)
11770 out->index = index;
11772 out->scale = scale;
11778 /* Return cost of the memory address x.
11779 For i386, it is better to use a complex address than let gcc copy
11780 the address into a reg and make a new pseudo. But not if the address
11781 requires to two regs - that would mean more pseudos with longer
11784 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11786 struct ix86_address parts;
11788 int ok = ix86_decompose_address (x, &parts);
11792 if (parts.base && GET_CODE (parts.base) == SUBREG)
11793 parts.base = SUBREG_REG (parts.base);
11794 if (parts.index && GET_CODE (parts.index) == SUBREG)
11795 parts.index = SUBREG_REG (parts.index);
11797 /* Attempt to minimize number of registers in the address. */
11799 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11801 && (!REG_P (parts.index)
11802 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11806 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11808 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11809 && parts.base != parts.index)
11812 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11813 since it's predecode logic can't detect the length of instructions
11814 and it degenerates to vector decoded. Increase cost of such
11815 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11816 to split such addresses or even refuse such addresses at all.
11818 Following addressing modes are affected:
11823 The first and last case may be avoidable by explicitly coding the zero in
11824 memory address, but I don't have AMD-K6 machine handy to check this
11828 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11829 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11830 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11836 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11837 this is used for to form addresses to local data when -fPIC is in
11841 darwin_local_data_pic (rtx disp)
11843 return (GET_CODE (disp) == UNSPEC
11844 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11847 /* Determine if a given RTX is a valid constant. We already know this
11848 satisfies CONSTANT_P. */
11851 legitimate_constant_p (rtx x)
11853 switch (GET_CODE (x))
11858 if (GET_CODE (x) == PLUS)
11860 if (!CONST_INT_P (XEXP (x, 1)))
11865 if (TARGET_MACHO && darwin_local_data_pic (x))
11868 /* Only some unspecs are valid as "constants". */
11869 if (GET_CODE (x) == UNSPEC)
11870 switch (XINT (x, 1))
11873 case UNSPEC_GOTOFF:
11874 case UNSPEC_PLTOFF:
11875 return TARGET_64BIT;
11877 case UNSPEC_NTPOFF:
11878 x = XVECEXP (x, 0, 0);
11879 return (GET_CODE (x) == SYMBOL_REF
11880 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11881 case UNSPEC_DTPOFF:
11882 x = XVECEXP (x, 0, 0);
11883 return (GET_CODE (x) == SYMBOL_REF
11884 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11889 /* We must have drilled down to a symbol. */
11890 if (GET_CODE (x) == LABEL_REF)
11892 if (GET_CODE (x) != SYMBOL_REF)
11897 /* TLS symbols are never valid. */
11898 if (SYMBOL_REF_TLS_MODEL (x))
11901 /* DLLIMPORT symbols are never valid. */
11902 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11903 && SYMBOL_REF_DLLIMPORT_P (x))
11907 /* mdynamic-no-pic */
11908 if (MACHO_DYNAMIC_NO_PIC_P)
11909 return machopic_symbol_defined_p (x);
11914 if (GET_MODE (x) == TImode
11915 && x != CONST0_RTX (TImode)
11921 if (!standard_sse_constant_p (x))
11928 /* Otherwise we handle everything else in the move patterns. */
11932 /* Determine if it's legal to put X into the constant pool. This
11933 is not possible for the address of thread-local symbols, which
11934 is checked above. */
11937 ix86_cannot_force_const_mem (rtx x)
11939 /* We can always put integral constants and vectors in memory. */
11940 switch (GET_CODE (x))
11950 return !legitimate_constant_p (x);
11954 /* Nonzero if the constant value X is a legitimate general operand
11955 when generating PIC code. It is given that flag_pic is on and
11956 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11959 legitimate_pic_operand_p (rtx x)
11963 switch (GET_CODE (x))
11966 inner = XEXP (x, 0);
11967 if (GET_CODE (inner) == PLUS
11968 && CONST_INT_P (XEXP (inner, 1)))
11969 inner = XEXP (inner, 0);
11971 /* Only some unspecs are valid as "constants". */
11972 if (GET_CODE (inner) == UNSPEC)
11973 switch (XINT (inner, 1))
11976 case UNSPEC_GOTOFF:
11977 case UNSPEC_PLTOFF:
11978 return TARGET_64BIT;
11980 x = XVECEXP (inner, 0, 0);
11981 return (GET_CODE (x) == SYMBOL_REF
11982 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11983 case UNSPEC_MACHOPIC_OFFSET:
11984 return legitimate_pic_address_disp_p (x);
11992 return legitimate_pic_address_disp_p (x);
11999 /* Determine if a given CONST RTX is a valid memory displacement
12003 legitimate_pic_address_disp_p (rtx disp)
12007 /* In 64bit mode we can allow direct addresses of symbols and labels
12008 when they are not dynamic symbols. */
12011 rtx op0 = disp, op1;
12013 switch (GET_CODE (disp))
12019 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12021 op0 = XEXP (XEXP (disp, 0), 0);
12022 op1 = XEXP (XEXP (disp, 0), 1);
12023 if (!CONST_INT_P (op1)
12024 || INTVAL (op1) >= 16*1024*1024
12025 || INTVAL (op1) < -16*1024*1024)
12027 if (GET_CODE (op0) == LABEL_REF)
12029 if (GET_CODE (op0) != SYMBOL_REF)
12034 /* TLS references should always be enclosed in UNSPEC. */
12035 if (SYMBOL_REF_TLS_MODEL (op0))
12037 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
12038 && ix86_cmodel != CM_LARGE_PIC)
12046 if (GET_CODE (disp) != CONST)
12048 disp = XEXP (disp, 0);
12052 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12053 of GOT tables. We should not need these anyway. */
12054 if (GET_CODE (disp) != UNSPEC
12055 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12056 && XINT (disp, 1) != UNSPEC_GOTOFF
12057 && XINT (disp, 1) != UNSPEC_PCREL
12058 && XINT (disp, 1) != UNSPEC_PLTOFF))
12061 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12062 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12068 if (GET_CODE (disp) == PLUS)
12070 if (!CONST_INT_P (XEXP (disp, 1)))
12072 disp = XEXP (disp, 0);
12076 if (TARGET_MACHO && darwin_local_data_pic (disp))
12079 if (GET_CODE (disp) != UNSPEC)
12082 switch (XINT (disp, 1))
12087 /* We need to check for both symbols and labels because VxWorks loads
12088 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12090 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12091 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12092 case UNSPEC_GOTOFF:
12093 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12094 While ABI specify also 32bit relocation but we don't produce it in
12095 small PIC model at all. */
12096 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12097 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12099 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12101 case UNSPEC_GOTTPOFF:
12102 case UNSPEC_GOTNTPOFF:
12103 case UNSPEC_INDNTPOFF:
12106 disp = XVECEXP (disp, 0, 0);
12107 return (GET_CODE (disp) == SYMBOL_REF
12108 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12109 case UNSPEC_NTPOFF:
12110 disp = XVECEXP (disp, 0, 0);
12111 return (GET_CODE (disp) == SYMBOL_REF
12112 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12113 case UNSPEC_DTPOFF:
12114 disp = XVECEXP (disp, 0, 0);
12115 return (GET_CODE (disp) == SYMBOL_REF
12116 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12122 /* Recognizes RTL expressions that are valid memory addresses for an
12123 instruction. The MODE argument is the machine mode for the MEM
12124 expression that wants to use this address.
12126 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12127 convert common non-canonical forms to canonical form so that they will
12131 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12132 rtx addr, bool strict)
12134 struct ix86_address parts;
12135 rtx base, index, disp;
12136 HOST_WIDE_INT scale;
12138 if (ix86_decompose_address (addr, &parts) <= 0)
12139 /* Decomposition failed. */
12143 index = parts.index;
12145 scale = parts.scale;
12147 /* Validate base register.
12149 Don't allow SUBREG's that span more than a word here. It can lead to spill
12150 failures when the base is one word out of a two word structure, which is
12151 represented internally as a DImode int. */
12159 else if (GET_CODE (base) == SUBREG
12160 && REG_P (SUBREG_REG (base))
12161 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12163 reg = SUBREG_REG (base);
12165 /* Base is not a register. */
12168 if (GET_MODE (base) != Pmode)
12169 /* Base is not in Pmode. */
12172 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12173 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12174 /* Base is not valid. */
12178 /* Validate index register.
12180 Don't allow SUBREG's that span more than a word here -- same as above. */
12188 else if (GET_CODE (index) == SUBREG
12189 && REG_P (SUBREG_REG (index))
12190 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12192 reg = SUBREG_REG (index);
12194 /* Index is not a register. */
12197 if (GET_MODE (index) != Pmode)
12198 /* Index is not in Pmode. */
12201 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12202 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12203 /* Index is not valid. */
12207 /* Validate scale factor. */
12211 /* Scale without index. */
12214 if (scale != 2 && scale != 4 && scale != 8)
12215 /* Scale is not a valid multiplier. */
12219 /* Validate displacement. */
12222 if (GET_CODE (disp) == CONST
12223 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12224 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12225 switch (XINT (XEXP (disp, 0), 1))
12227 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12228 used. While ABI specify also 32bit relocations, we don't produce
12229 them at all and use IP relative instead. */
12231 case UNSPEC_GOTOFF:
12232 gcc_assert (flag_pic);
12234 goto is_legitimate_pic;
12236 /* 64bit address unspec. */
12239 case UNSPEC_GOTPCREL:
12241 gcc_assert (flag_pic);
12242 goto is_legitimate_pic;
12244 case UNSPEC_GOTTPOFF:
12245 case UNSPEC_GOTNTPOFF:
12246 case UNSPEC_INDNTPOFF:
12247 case UNSPEC_NTPOFF:
12248 case UNSPEC_DTPOFF:
12251 case UNSPEC_STACK_CHECK:
12252 gcc_assert (flag_split_stack);
12256 /* Invalid address unspec. */
12260 else if (SYMBOLIC_CONST (disp)
12264 && MACHOPIC_INDIRECT
12265 && !machopic_operand_p (disp)
12271 if (TARGET_64BIT && (index || base))
12273 /* foo@dtpoff(%rX) is ok. */
12274 if (GET_CODE (disp) != CONST
12275 || GET_CODE (XEXP (disp, 0)) != PLUS
12276 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12277 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12278 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12279 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12280 /* Non-constant pic memory reference. */
12283 else if ((!TARGET_MACHO || flag_pic)
12284 && ! legitimate_pic_address_disp_p (disp))
12285 /* Displacement is an invalid pic construct. */
12288 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12289 /* displacment must be referenced via non_lazy_pointer */
12293 /* This code used to verify that a symbolic pic displacement
12294 includes the pic_offset_table_rtx register.
12296 While this is good idea, unfortunately these constructs may
12297 be created by "adds using lea" optimization for incorrect
12306 This code is nonsensical, but results in addressing
12307 GOT table with pic_offset_table_rtx base. We can't
12308 just refuse it easily, since it gets matched by
12309 "addsi3" pattern, that later gets split to lea in the
12310 case output register differs from input. While this
12311 can be handled by separate addsi pattern for this case
12312 that never results in lea, this seems to be easier and
12313 correct fix for crash to disable this test. */
12315 else if (GET_CODE (disp) != LABEL_REF
12316 && !CONST_INT_P (disp)
12317 && (GET_CODE (disp) != CONST
12318 || !legitimate_constant_p (disp))
12319 && (GET_CODE (disp) != SYMBOL_REF
12320 || !legitimate_constant_p (disp)))
12321 /* Displacement is not constant. */
12323 else if (TARGET_64BIT
12324 && !x86_64_immediate_operand (disp, VOIDmode))
12325 /* Displacement is out of range. */
12329 /* Everything looks valid. */
12333 /* Determine if a given RTX is a valid constant address. */
12336 constant_address_p (rtx x)
12338 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12341 /* Return a unique alias set for the GOT. */
12343 static alias_set_type
12344 ix86_GOT_alias_set (void)
12346 static alias_set_type set = -1;
12348 set = new_alias_set ();
12352 /* Return a legitimate reference for ORIG (an address) using the
12353 register REG. If REG is 0, a new pseudo is generated.
12355 There are two types of references that must be handled:
12357 1. Global data references must load the address from the GOT, via
12358 the PIC reg. An insn is emitted to do this load, and the reg is
12361 2. Static data references, constant pool addresses, and code labels
12362 compute the address as an offset from the GOT, whose base is in
12363 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12364 differentiate them from global data objects. The returned
12365 address is the PIC reg + an unspec constant.
12367 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12368 reg also appears in the address. */
12371 legitimize_pic_address (rtx orig, rtx reg)
12374 rtx new_rtx = orig;
12378 if (TARGET_MACHO && !TARGET_64BIT)
12381 reg = gen_reg_rtx (Pmode);
12382 /* Use the generic Mach-O PIC machinery. */
12383 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12387 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12389 else if (TARGET_64BIT
12390 && ix86_cmodel != CM_SMALL_PIC
12391 && gotoff_operand (addr, Pmode))
12394 /* This symbol may be referenced via a displacement from the PIC
12395 base address (@GOTOFF). */
12397 if (reload_in_progress)
12398 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12399 if (GET_CODE (addr) == CONST)
12400 addr = XEXP (addr, 0);
12401 if (GET_CODE (addr) == PLUS)
12403 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12405 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12408 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12409 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12411 tmpreg = gen_reg_rtx (Pmode);
12414 emit_move_insn (tmpreg, new_rtx);
12418 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12419 tmpreg, 1, OPTAB_DIRECT);
12422 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12424 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12426 /* This symbol may be referenced via a displacement from the PIC
12427 base address (@GOTOFF). */
12429 if (reload_in_progress)
12430 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12431 if (GET_CODE (addr) == CONST)
12432 addr = XEXP (addr, 0);
12433 if (GET_CODE (addr) == PLUS)
12435 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12437 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12440 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12441 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12442 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12446 emit_move_insn (reg, new_rtx);
12450 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12451 /* We can't use @GOTOFF for text labels on VxWorks;
12452 see gotoff_operand. */
12453 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12455 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12457 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12458 return legitimize_dllimport_symbol (addr, true);
12459 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12460 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12461 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12463 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12464 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12468 /* For x64 PE-COFF there is no GOT table. So we use address
12470 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12472 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12473 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12476 reg = gen_reg_rtx (Pmode);
12477 emit_move_insn (reg, new_rtx);
12480 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12482 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12483 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12484 new_rtx = gen_const_mem (Pmode, new_rtx);
12485 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12488 reg = gen_reg_rtx (Pmode);
12489 /* Use directly gen_movsi, otherwise the address is loaded
12490 into register for CSE. We don't want to CSE this addresses,
12491 instead we CSE addresses from the GOT table, so skip this. */
12492 emit_insn (gen_movsi (reg, new_rtx));
12497 /* This symbol must be referenced via a load from the
12498 Global Offset Table (@GOT). */
12500 if (reload_in_progress)
12501 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12502 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12503 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12505 new_rtx = force_reg (Pmode, new_rtx);
12506 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12507 new_rtx = gen_const_mem (Pmode, new_rtx);
12508 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12511 reg = gen_reg_rtx (Pmode);
12512 emit_move_insn (reg, new_rtx);
12518 if (CONST_INT_P (addr)
12519 && !x86_64_immediate_operand (addr, VOIDmode))
12523 emit_move_insn (reg, addr);
12527 new_rtx = force_reg (Pmode, addr);
12529 else if (GET_CODE (addr) == CONST)
12531 addr = XEXP (addr, 0);
12533 /* We must match stuff we generate before. Assume the only
12534 unspecs that can get here are ours. Not that we could do
12535 anything with them anyway.... */
12536 if (GET_CODE (addr) == UNSPEC
12537 || (GET_CODE (addr) == PLUS
12538 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12540 gcc_assert (GET_CODE (addr) == PLUS);
12542 if (GET_CODE (addr) == PLUS)
12544 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12546 /* Check first to see if this is a constant offset from a @GOTOFF
12547 symbol reference. */
12548 if (gotoff_operand (op0, Pmode)
12549 && CONST_INT_P (op1))
12553 if (reload_in_progress)
12554 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12555 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12557 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12558 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12559 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12563 emit_move_insn (reg, new_rtx);
12569 if (INTVAL (op1) < -16*1024*1024
12570 || INTVAL (op1) >= 16*1024*1024)
12572 if (!x86_64_immediate_operand (op1, Pmode))
12573 op1 = force_reg (Pmode, op1);
12574 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12580 base = legitimize_pic_address (XEXP (addr, 0), reg);
12581 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12582 base == reg ? NULL_RTX : reg);
12584 if (CONST_INT_P (new_rtx))
12585 new_rtx = plus_constant (base, INTVAL (new_rtx));
12588 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12590 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12591 new_rtx = XEXP (new_rtx, 1);
12593 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12601 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12604 get_thread_pointer (int to_reg)
12608 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12612 reg = gen_reg_rtx (Pmode);
12613 insn = gen_rtx_SET (VOIDmode, reg, tp);
12614 insn = emit_insn (insn);
12619 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12620 false if we expect this to be used for a memory address and true if
12621 we expect to load the address into a register. */
12624 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12626 rtx dest, base, off, pic, tp;
12631 case TLS_MODEL_GLOBAL_DYNAMIC:
12632 dest = gen_reg_rtx (Pmode);
12633 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12635 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12637 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12640 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12641 insns = get_insns ();
12644 RTL_CONST_CALL_P (insns) = 1;
12645 emit_libcall_block (insns, dest, rax, x);
12647 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12648 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12650 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12652 if (TARGET_GNU2_TLS)
12654 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12656 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12660 case TLS_MODEL_LOCAL_DYNAMIC:
12661 base = gen_reg_rtx (Pmode);
12662 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12664 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12666 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12669 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12670 insns = get_insns ();
12673 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12674 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12675 RTL_CONST_CALL_P (insns) = 1;
12676 emit_libcall_block (insns, base, rax, note);
12678 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12679 emit_insn (gen_tls_local_dynamic_base_64 (base));
12681 emit_insn (gen_tls_local_dynamic_base_32 (base));
12683 if (TARGET_GNU2_TLS)
12685 rtx x = ix86_tls_module_base ();
12687 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12688 gen_rtx_MINUS (Pmode, x, tp));
12691 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12692 off = gen_rtx_CONST (Pmode, off);
12694 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12696 if (TARGET_GNU2_TLS)
12698 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12700 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12705 case TLS_MODEL_INITIAL_EXEC:
12708 if (TARGET_SUN_TLS)
12710 /* The Sun linker took the AMD64 TLS spec literally
12711 and can only handle %rax as destination of the
12712 initial executable code sequence. */
12714 dest = gen_reg_rtx (Pmode);
12715 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12720 type = UNSPEC_GOTNTPOFF;
12724 if (reload_in_progress)
12725 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12726 pic = pic_offset_table_rtx;
12727 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12729 else if (!TARGET_ANY_GNU_TLS)
12731 pic = gen_reg_rtx (Pmode);
12732 emit_insn (gen_set_got (pic));
12733 type = UNSPEC_GOTTPOFF;
12738 type = UNSPEC_INDNTPOFF;
12741 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12742 off = gen_rtx_CONST (Pmode, off);
12744 off = gen_rtx_PLUS (Pmode, pic, off);
12745 off = gen_const_mem (Pmode, off);
12746 set_mem_alias_set (off, ix86_GOT_alias_set ());
12748 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12750 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12751 off = force_reg (Pmode, off);
12752 return gen_rtx_PLUS (Pmode, base, off);
12756 base = get_thread_pointer (true);
12757 dest = gen_reg_rtx (Pmode);
12758 emit_insn (gen_subsi3 (dest, base, off));
12762 case TLS_MODEL_LOCAL_EXEC:
12763 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12764 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12765 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12766 off = gen_rtx_CONST (Pmode, off);
12768 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12770 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12771 return gen_rtx_PLUS (Pmode, base, off);
12775 base = get_thread_pointer (true);
12776 dest = gen_reg_rtx (Pmode);
12777 emit_insn (gen_subsi3 (dest, base, off));
12782 gcc_unreachable ();
12788 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12791 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12792 htab_t dllimport_map;
12795 get_dllimport_decl (tree decl)
12797 struct tree_map *h, in;
12800 const char *prefix;
12801 size_t namelen, prefixlen;
12806 if (!dllimport_map)
12807 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12809 in.hash = htab_hash_pointer (decl);
12810 in.base.from = decl;
12811 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12812 h = (struct tree_map *) *loc;
12816 *loc = h = ggc_alloc_tree_map ();
12818 h->base.from = decl;
12819 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12820 VAR_DECL, NULL, ptr_type_node);
12821 DECL_ARTIFICIAL (to) = 1;
12822 DECL_IGNORED_P (to) = 1;
12823 DECL_EXTERNAL (to) = 1;
12824 TREE_READONLY (to) = 1;
12826 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12827 name = targetm.strip_name_encoding (name);
12828 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12829 ? "*__imp_" : "*__imp__";
12830 namelen = strlen (name);
12831 prefixlen = strlen (prefix);
12832 imp_name = (char *) alloca (namelen + prefixlen + 1);
12833 memcpy (imp_name, prefix, prefixlen);
12834 memcpy (imp_name + prefixlen, name, namelen + 1);
12836 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12837 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12838 SET_SYMBOL_REF_DECL (rtl, to);
12839 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12841 rtl = gen_const_mem (Pmode, rtl);
12842 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12844 SET_DECL_RTL (to, rtl);
12845 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12850 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12851 true if we require the result be a register. */
12854 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12859 gcc_assert (SYMBOL_REF_DECL (symbol));
12860 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12862 x = DECL_RTL (imp_decl);
12864 x = force_reg (Pmode, x);
12868 /* Try machine-dependent ways of modifying an illegitimate address
12869 to be legitimate. If we find one, return the new, valid address.
12870 This macro is used in only one place: `memory_address' in explow.c.
12872 OLDX is the address as it was before break_out_memory_refs was called.
12873 In some cases it is useful to look at this to decide what needs to be done.
12875 It is always safe for this macro to do nothing. It exists to recognize
12876 opportunities to optimize the output.
12878 For the 80386, we handle X+REG by loading X into a register R and
12879 using R+REG. R will go in a general reg and indexing will be used.
12880 However, if REG is a broken-out memory address or multiplication,
12881 nothing needs to be done because REG can certainly go in a general reg.
12883 When -fpic is used, special handling is needed for symbolic references.
12884 See comments by legitimize_pic_address in i386.c for details. */
12887 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12888 enum machine_mode mode)
12893 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12895 return legitimize_tls_address (x, (enum tls_model) log, false);
12896 if (GET_CODE (x) == CONST
12897 && GET_CODE (XEXP (x, 0)) == PLUS
12898 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12899 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12901 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12902 (enum tls_model) log, false);
12903 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12906 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12908 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12909 return legitimize_dllimport_symbol (x, true);
12910 if (GET_CODE (x) == CONST
12911 && GET_CODE (XEXP (x, 0)) == PLUS
12912 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12913 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12915 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12916 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12920 if (flag_pic && SYMBOLIC_CONST (x))
12921 return legitimize_pic_address (x, 0);
12924 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12925 return machopic_indirect_data_reference (x, 0);
12928 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12929 if (GET_CODE (x) == ASHIFT
12930 && CONST_INT_P (XEXP (x, 1))
12931 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12934 log = INTVAL (XEXP (x, 1));
12935 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12936 GEN_INT (1 << log));
12939 if (GET_CODE (x) == PLUS)
12941 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12943 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12944 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12945 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12948 log = INTVAL (XEXP (XEXP (x, 0), 1));
12949 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12950 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12951 GEN_INT (1 << log));
12954 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12955 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12956 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12959 log = INTVAL (XEXP (XEXP (x, 1), 1));
12960 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12961 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12962 GEN_INT (1 << log));
12965 /* Put multiply first if it isn't already. */
12966 if (GET_CODE (XEXP (x, 1)) == MULT)
12968 rtx tmp = XEXP (x, 0);
12969 XEXP (x, 0) = XEXP (x, 1);
12974 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12975 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12976 created by virtual register instantiation, register elimination, and
12977 similar optimizations. */
12978 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12981 x = gen_rtx_PLUS (Pmode,
12982 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12983 XEXP (XEXP (x, 1), 0)),
12984 XEXP (XEXP (x, 1), 1));
12988 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12989 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12990 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12991 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12992 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12993 && CONSTANT_P (XEXP (x, 1)))
12996 rtx other = NULL_RTX;
12998 if (CONST_INT_P (XEXP (x, 1)))
13000 constant = XEXP (x, 1);
13001 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13003 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13005 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13006 other = XEXP (x, 1);
13014 x = gen_rtx_PLUS (Pmode,
13015 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13016 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13017 plus_constant (other, INTVAL (constant)));
13021 if (changed && ix86_legitimate_address_p (mode, x, false))
13024 if (GET_CODE (XEXP (x, 0)) == MULT)
13027 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13030 if (GET_CODE (XEXP (x, 1)) == MULT)
13033 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13037 && REG_P (XEXP (x, 1))
13038 && REG_P (XEXP (x, 0)))
13041 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13044 x = legitimize_pic_address (x, 0);
13047 if (changed && ix86_legitimate_address_p (mode, x, false))
13050 if (REG_P (XEXP (x, 0)))
13052 rtx temp = gen_reg_rtx (Pmode);
13053 rtx val = force_operand (XEXP (x, 1), temp);
13055 emit_move_insn (temp, val);
13057 XEXP (x, 1) = temp;
13061 else if (REG_P (XEXP (x, 1)))
13063 rtx temp = gen_reg_rtx (Pmode);
13064 rtx val = force_operand (XEXP (x, 0), temp);
13066 emit_move_insn (temp, val);
13068 XEXP (x, 0) = temp;
13076 /* Print an integer constant expression in assembler syntax. Addition
13077 and subtraction are the only arithmetic that may appear in these
13078 expressions. FILE is the stdio stream to write to, X is the rtx, and
13079 CODE is the operand print code from the output string. */
13082 output_pic_addr_const (FILE *file, rtx x, int code)
13086 switch (GET_CODE (x))
13089 gcc_assert (flag_pic);
13094 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13095 output_addr_const (file, x);
13098 const char *name = XSTR (x, 0);
13100 /* Mark the decl as referenced so that cgraph will
13101 output the function. */
13102 if (SYMBOL_REF_DECL (x))
13103 mark_decl_referenced (SYMBOL_REF_DECL (x));
13106 if (MACHOPIC_INDIRECT
13107 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13108 name = machopic_indirection_name (x, /*stub_p=*/true);
13110 assemble_name (file, name);
13112 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13113 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13114 fputs ("@PLT", file);
13121 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13122 assemble_name (asm_out_file, buf);
13126 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13130 /* This used to output parentheses around the expression,
13131 but that does not work on the 386 (either ATT or BSD assembler). */
13132 output_pic_addr_const (file, XEXP (x, 0), code);
13136 if (GET_MODE (x) == VOIDmode)
13138 /* We can use %d if the number is <32 bits and positive. */
13139 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13140 fprintf (file, "0x%lx%08lx",
13141 (unsigned long) CONST_DOUBLE_HIGH (x),
13142 (unsigned long) CONST_DOUBLE_LOW (x));
13144 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13147 /* We can't handle floating point constants;
13148 TARGET_PRINT_OPERAND must handle them. */
13149 output_operand_lossage ("floating constant misused");
13153 /* Some assemblers need integer constants to appear first. */
13154 if (CONST_INT_P (XEXP (x, 0)))
13156 output_pic_addr_const (file, XEXP (x, 0), code);
13158 output_pic_addr_const (file, XEXP (x, 1), code);
13162 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13163 output_pic_addr_const (file, XEXP (x, 1), code);
13165 output_pic_addr_const (file, XEXP (x, 0), code);
13171 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13172 output_pic_addr_const (file, XEXP (x, 0), code);
13174 output_pic_addr_const (file, XEXP (x, 1), code);
13176 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13180 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13182 bool f = i386_asm_output_addr_const_extra (file, x);
13187 gcc_assert (XVECLEN (x, 0) == 1);
13188 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13189 switch (XINT (x, 1))
13192 fputs ("@GOT", file);
13194 case UNSPEC_GOTOFF:
13195 fputs ("@GOTOFF", file);
13197 case UNSPEC_PLTOFF:
13198 fputs ("@PLTOFF", file);
13201 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13202 "(%rip)" : "[rip]", file);
13204 case UNSPEC_GOTPCREL:
13205 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13206 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13208 case UNSPEC_GOTTPOFF:
13209 /* FIXME: This might be @TPOFF in Sun ld too. */
13210 fputs ("@gottpoff", file);
13213 fputs ("@tpoff", file);
13215 case UNSPEC_NTPOFF:
13217 fputs ("@tpoff", file);
13219 fputs ("@ntpoff", file);
13221 case UNSPEC_DTPOFF:
13222 fputs ("@dtpoff", file);
13224 case UNSPEC_GOTNTPOFF:
13226 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13227 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13229 fputs ("@gotntpoff", file);
13231 case UNSPEC_INDNTPOFF:
13232 fputs ("@indntpoff", file);
13235 case UNSPEC_MACHOPIC_OFFSET:
13237 machopic_output_function_base_name (file);
13241 output_operand_lossage ("invalid UNSPEC as operand");
13247 output_operand_lossage ("invalid expression as operand");
13251 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13252 We need to emit DTP-relative relocations. */
13254 static void ATTRIBUTE_UNUSED
13255 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13257 fputs (ASM_LONG, file);
13258 output_addr_const (file, x);
13259 fputs ("@dtpoff", file);
13265 fputs (", 0", file);
13268 gcc_unreachable ();
13272 /* Return true if X is a representation of the PIC register. This copes
13273 with calls from ix86_find_base_term, where the register might have
13274 been replaced by a cselib value. */
13277 ix86_pic_register_p (rtx x)
13279 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13280 return (pic_offset_table_rtx
13281 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13283 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13286 /* Helper function for ix86_delegitimize_address.
13287 Attempt to delegitimize TLS local-exec accesses. */
13290 ix86_delegitimize_tls_address (rtx orig_x)
13292 rtx x = orig_x, unspec;
13293 struct ix86_address addr;
13295 if (!TARGET_TLS_DIRECT_SEG_REFS)
13299 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13301 if (ix86_decompose_address (x, &addr) == 0
13302 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13303 || addr.disp == NULL_RTX
13304 || GET_CODE (addr.disp) != CONST)
13306 unspec = XEXP (addr.disp, 0);
13307 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13308 unspec = XEXP (unspec, 0);
13309 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13311 x = XVECEXP (unspec, 0, 0);
13312 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13313 if (unspec != XEXP (addr.disp, 0))
13314 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13317 rtx idx = addr.index;
13318 if (addr.scale != 1)
13319 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13320 x = gen_rtx_PLUS (Pmode, idx, x);
13323 x = gen_rtx_PLUS (Pmode, addr.base, x);
13324 if (MEM_P (orig_x))
13325 x = replace_equiv_address_nv (orig_x, x);
13329 /* In the name of slightly smaller debug output, and to cater to
13330 general assembler lossage, recognize PIC+GOTOFF and turn it back
13331 into a direct symbol reference.
13333 On Darwin, this is necessary to avoid a crash, because Darwin
13334 has a different PIC label for each routine but the DWARF debugging
13335 information is not associated with any particular routine, so it's
13336 necessary to remove references to the PIC label from RTL stored by
13337 the DWARF output code. */
13340 ix86_delegitimize_address (rtx x)
13342 rtx orig_x = delegitimize_mem_from_attrs (x);
13343 /* addend is NULL or some rtx if x is something+GOTOFF where
13344 something doesn't include the PIC register. */
13345 rtx addend = NULL_RTX;
13346 /* reg_addend is NULL or a multiple of some register. */
13347 rtx reg_addend = NULL_RTX;
13348 /* const_addend is NULL or a const_int. */
13349 rtx const_addend = NULL_RTX;
13350 /* This is the result, or NULL. */
13351 rtx result = NULL_RTX;
13360 if (GET_CODE (x) != CONST
13361 || GET_CODE (XEXP (x, 0)) != UNSPEC
13362 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13363 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13364 || !MEM_P (orig_x))
13365 return ix86_delegitimize_tls_address (orig_x);
13366 x = XVECEXP (XEXP (x, 0), 0, 0);
13367 if (GET_MODE (orig_x) != Pmode)
13369 x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13376 if (GET_CODE (x) != PLUS
13377 || GET_CODE (XEXP (x, 1)) != CONST)
13378 return ix86_delegitimize_tls_address (orig_x);
13380 if (ix86_pic_register_p (XEXP (x, 0)))
13381 /* %ebx + GOT/GOTOFF */
13383 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13385 /* %ebx + %reg * scale + GOT/GOTOFF */
13386 reg_addend = XEXP (x, 0);
13387 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13388 reg_addend = XEXP (reg_addend, 1);
13389 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13390 reg_addend = XEXP (reg_addend, 0);
13393 reg_addend = NULL_RTX;
13394 addend = XEXP (x, 0);
13398 addend = XEXP (x, 0);
13400 x = XEXP (XEXP (x, 1), 0);
13401 if (GET_CODE (x) == PLUS
13402 && CONST_INT_P (XEXP (x, 1)))
13404 const_addend = XEXP (x, 1);
13408 if (GET_CODE (x) == UNSPEC
13409 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13410 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13411 result = XVECEXP (x, 0, 0);
13413 if (TARGET_MACHO && darwin_local_data_pic (x)
13414 && !MEM_P (orig_x))
13415 result = XVECEXP (x, 0, 0);
13418 return ix86_delegitimize_tls_address (orig_x);
13421 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13423 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13426 /* If the rest of original X doesn't involve the PIC register, add
13427 addend and subtract pic_offset_table_rtx. This can happen e.g.
13429 leal (%ebx, %ecx, 4), %ecx
13431 movl foo@GOTOFF(%ecx), %edx
13432 in which case we return (%ecx - %ebx) + foo. */
13433 if (pic_offset_table_rtx)
13434 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13435 pic_offset_table_rtx),
13440 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13442 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13443 if (result == NULL_RTX)
13449 /* If X is a machine specific address (i.e. a symbol or label being
13450 referenced as a displacement from the GOT implemented using an
13451 UNSPEC), then return the base term. Otherwise return X. */
13454 ix86_find_base_term (rtx x)
13460 if (GET_CODE (x) != CONST)
13462 term = XEXP (x, 0);
13463 if (GET_CODE (term) == PLUS
13464 && (CONST_INT_P (XEXP (term, 1))
13465 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13466 term = XEXP (term, 0);
13467 if (GET_CODE (term) != UNSPEC
13468 || (XINT (term, 1) != UNSPEC_GOTPCREL
13469 && XINT (term, 1) != UNSPEC_PCREL))
13472 return XVECEXP (term, 0, 0);
13475 return ix86_delegitimize_address (x);
13479 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13480 int fp, FILE *file)
13482 const char *suffix;
13484 if (mode == CCFPmode || mode == CCFPUmode)
13486 code = ix86_fp_compare_code_to_integer (code);
13490 code = reverse_condition (code);
13541 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13545 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13546 Those same assemblers have the same but opposite lossage on cmov. */
13547 if (mode == CCmode)
13548 suffix = fp ? "nbe" : "a";
13549 else if (mode == CCCmode)
13552 gcc_unreachable ();
13568 gcc_unreachable ();
13572 gcc_assert (mode == CCmode || mode == CCCmode);
13589 gcc_unreachable ();
13593 /* ??? As above. */
13594 gcc_assert (mode == CCmode || mode == CCCmode);
13595 suffix = fp ? "nb" : "ae";
13598 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13602 /* ??? As above. */
13603 if (mode == CCmode)
13605 else if (mode == CCCmode)
13606 suffix = fp ? "nb" : "ae";
13608 gcc_unreachable ();
13611 suffix = fp ? "u" : "p";
13614 suffix = fp ? "nu" : "np";
13617 gcc_unreachable ();
13619 fputs (suffix, file);
13622 /* Print the name of register X to FILE based on its machine mode and number.
13623 If CODE is 'w', pretend the mode is HImode.
13624 If CODE is 'b', pretend the mode is QImode.
13625 If CODE is 'k', pretend the mode is SImode.
13626 If CODE is 'q', pretend the mode is DImode.
13627 If CODE is 'x', pretend the mode is V4SFmode.
13628 If CODE is 't', pretend the mode is V8SFmode.
13629 If CODE is 'h', pretend the reg is the 'high' byte register.
13630 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13631 If CODE is 'd', duplicate the operand for AVX instruction.
13635 print_reg (rtx x, int code, FILE *file)
13638 bool duplicated = code == 'd' && TARGET_AVX;
13640 gcc_assert (x == pc_rtx
13641 || (REGNO (x) != ARG_POINTER_REGNUM
13642 && REGNO (x) != FRAME_POINTER_REGNUM
13643 && REGNO (x) != FLAGS_REG
13644 && REGNO (x) != FPSR_REG
13645 && REGNO (x) != FPCR_REG));
13647 if (ASSEMBLER_DIALECT == ASM_ATT)
13652 gcc_assert (TARGET_64BIT);
13653 fputs ("rip", file);
13657 if (code == 'w' || MMX_REG_P (x))
13659 else if (code == 'b')
13661 else if (code == 'k')
13663 else if (code == 'q')
13665 else if (code == 'y')
13667 else if (code == 'h')
13669 else if (code == 'x')
13671 else if (code == 't')
13674 code = GET_MODE_SIZE (GET_MODE (x));
13676 /* Irritatingly, AMD extended registers use different naming convention
13677 from the normal registers. */
13678 if (REX_INT_REG_P (x))
13680 gcc_assert (TARGET_64BIT);
13684 error ("extended registers have no high halves");
13687 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13690 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13693 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13696 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13699 error ("unsupported operand size for extended register");
13709 if (STACK_TOP_P (x))
13718 if (! ANY_FP_REG_P (x))
13719 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13724 reg = hi_reg_name[REGNO (x)];
13727 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13729 reg = qi_reg_name[REGNO (x)];
13732 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13734 reg = qi_high_reg_name[REGNO (x)];
13739 gcc_assert (!duplicated);
13741 fputs (hi_reg_name[REGNO (x)] + 1, file);
13746 gcc_unreachable ();
13752 if (ASSEMBLER_DIALECT == ASM_ATT)
13753 fprintf (file, ", %%%s", reg);
13755 fprintf (file, ", %s", reg);
13759 /* Locate some local-dynamic symbol still in use by this function
13760 so that we can print its name in some tls_local_dynamic_base
13764 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13768 if (GET_CODE (x) == SYMBOL_REF
13769 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13771 cfun->machine->some_ld_name = XSTR (x, 0);
13778 static const char *
13779 get_some_local_dynamic_name (void)
13783 if (cfun->machine->some_ld_name)
13784 return cfun->machine->some_ld_name;
13786 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13787 if (NONDEBUG_INSN_P (insn)
13788 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13789 return cfun->machine->some_ld_name;
13794 /* Meaning of CODE:
13795 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13796 C -- print opcode suffix for set/cmov insn.
13797 c -- like C, but print reversed condition
13798 F,f -- likewise, but for floating-point.
13799 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13801 R -- print the prefix for register names.
13802 z -- print the opcode suffix for the size of the current operand.
13803 Z -- likewise, with special suffixes for x87 instructions.
13804 * -- print a star (in certain assembler syntax)
13805 A -- print an absolute memory reference.
13806 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13807 s -- print a shift double count, followed by the assemblers argument
13809 b -- print the QImode name of the register for the indicated operand.
13810 %b0 would print %al if operands[0] is reg 0.
13811 w -- likewise, print the HImode name of the register.
13812 k -- likewise, print the SImode name of the register.
13813 q -- likewise, print the DImode name of the register.
13814 x -- likewise, print the V4SFmode name of the register.
13815 t -- likewise, print the V8SFmode name of the register.
13816 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13817 y -- print "st(0)" instead of "st" as a register.
13818 d -- print duplicated register operand for AVX instruction.
13819 D -- print condition for SSE cmp instruction.
13820 P -- if PIC, print an @PLT suffix.
13821 X -- don't print any sort of PIC '@' suffix for a symbol.
13822 & -- print some in-use local-dynamic symbol name.
13823 H -- print a memory address offset by 8; used for sse high-parts
13824 Y -- print condition for XOP pcom* instruction.
13825 + -- print a branch hint as 'cs' or 'ds' prefix
13826 ; -- print a semicolon (after prefixes due to bug in older gas).
13827 @ -- print a segment register of thread base pointer load
13831 ix86_print_operand (FILE *file, rtx x, int code)
13838 if (ASSEMBLER_DIALECT == ASM_ATT)
13844 const char *name = get_some_local_dynamic_name ();
13846 output_operand_lossage ("'%%&' used without any "
13847 "local dynamic TLS references");
13849 assemble_name (file, name);
13854 switch (ASSEMBLER_DIALECT)
13861 /* Intel syntax. For absolute addresses, registers should not
13862 be surrounded by braces. */
13866 ix86_print_operand (file, x, 0);
13873 gcc_unreachable ();
13876 ix86_print_operand (file, x, 0);
13881 if (ASSEMBLER_DIALECT == ASM_ATT)
13886 if (ASSEMBLER_DIALECT == ASM_ATT)
13891 if (ASSEMBLER_DIALECT == ASM_ATT)
13896 if (ASSEMBLER_DIALECT == ASM_ATT)
13901 if (ASSEMBLER_DIALECT == ASM_ATT)
13906 if (ASSEMBLER_DIALECT == ASM_ATT)
13911 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13913 /* Opcodes don't get size suffixes if using Intel opcodes. */
13914 if (ASSEMBLER_DIALECT == ASM_INTEL)
13917 switch (GET_MODE_SIZE (GET_MODE (x)))
13936 output_operand_lossage
13937 ("invalid operand size for operand code '%c'", code);
13942 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13944 (0, "non-integer operand used with operand code '%c'", code);
13948 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13949 if (ASSEMBLER_DIALECT == ASM_INTEL)
13952 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13954 switch (GET_MODE_SIZE (GET_MODE (x)))
13957 #ifdef HAVE_AS_IX86_FILDS
13967 #ifdef HAVE_AS_IX86_FILDQ
13970 fputs ("ll", file);
13978 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13980 /* 387 opcodes don't get size suffixes
13981 if the operands are registers. */
13982 if (STACK_REG_P (x))
13985 switch (GET_MODE_SIZE (GET_MODE (x)))
14006 output_operand_lossage
14007 ("invalid operand type used with operand code '%c'", code);
14011 output_operand_lossage
14012 ("invalid operand size for operand code '%c'", code);
14029 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14031 ix86_print_operand (file, x, 0);
14032 fputs (", ", file);
14037 /* Little bit of braindamage here. The SSE compare instructions
14038 does use completely different names for the comparisons that the
14039 fp conditional moves. */
14042 switch (GET_CODE (x))
14045 fputs ("eq", file);
14048 fputs ("eq_us", file);
14051 fputs ("lt", file);
14054 fputs ("nge", file);
14057 fputs ("le", file);
14060 fputs ("ngt", file);
14063 fputs ("unord", file);
14066 fputs ("neq", file);
14069 fputs ("neq_oq", file);
14072 fputs ("ge", file);
14075 fputs ("nlt", file);
14078 fputs ("gt", file);
14081 fputs ("nle", file);
14084 fputs ("ord", file);
14087 output_operand_lossage ("operand is not a condition code, "
14088 "invalid operand code 'D'");
14094 switch (GET_CODE (x))
14098 fputs ("eq", file);
14102 fputs ("lt", file);
14106 fputs ("le", file);
14109 fputs ("unord", file);
14113 fputs ("neq", file);
14117 fputs ("nlt", file);
14121 fputs ("nle", file);
14124 fputs ("ord", file);
14127 output_operand_lossage ("operand is not a condition code, "
14128 "invalid operand code 'D'");
14134 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14135 if (ASSEMBLER_DIALECT == ASM_ATT)
14137 switch (GET_MODE (x))
14139 case HImode: putc ('w', file); break;
14141 case SFmode: putc ('l', file); break;
14143 case DFmode: putc ('q', file); break;
14144 default: gcc_unreachable ();
14151 if (!COMPARISON_P (x))
14153 output_operand_lossage ("operand is neither a constant nor a "
14154 "condition code, invalid operand code "
14158 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14161 if (!COMPARISON_P (x))
14163 output_operand_lossage ("operand is neither a constant nor a "
14164 "condition code, invalid operand code "
14168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14169 if (ASSEMBLER_DIALECT == ASM_ATT)
14172 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14175 /* Like above, but reverse condition */
14177 /* Check to see if argument to %c is really a constant
14178 and not a condition code which needs to be reversed. */
14179 if (!COMPARISON_P (x))
14181 output_operand_lossage ("operand is neither a constant nor a "
14182 "condition code, invalid operand "
14186 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14189 if (!COMPARISON_P (x))
14191 output_operand_lossage ("operand is neither a constant nor a "
14192 "condition code, invalid operand "
14196 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14197 if (ASSEMBLER_DIALECT == ASM_ATT)
14200 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14204 /* It doesn't actually matter what mode we use here, as we're
14205 only going to use this for printing. */
14206 x = adjust_address_nv (x, DImode, 8);
14214 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14217 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14220 int pred_val = INTVAL (XEXP (x, 0));
14222 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14223 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14225 int taken = pred_val > REG_BR_PROB_BASE / 2;
14226 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14228 /* Emit hints only in the case default branch prediction
14229 heuristics would fail. */
14230 if (taken != cputaken)
14232 /* We use 3e (DS) prefix for taken branches and
14233 2e (CS) prefix for not taken branches. */
14235 fputs ("ds ; ", file);
14237 fputs ("cs ; ", file);
14245 switch (GET_CODE (x))
14248 fputs ("neq", file);
14251 fputs ("eq", file);
14255 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14259 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14263 fputs ("le", file);
14267 fputs ("lt", file);
14270 fputs ("unord", file);
14273 fputs ("ord", file);
14276 fputs ("ueq", file);
14279 fputs ("nlt", file);
14282 fputs ("nle", file);
14285 fputs ("ule", file);
14288 fputs ("ult", file);
14291 fputs ("une", file);
14294 output_operand_lossage ("operand is not a condition code, "
14295 "invalid operand code 'Y'");
14301 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14307 if (ASSEMBLER_DIALECT == ASM_ATT)
14310 /* The kernel uses a different segment register for performance
14311 reasons; a system call would not have to trash the userspace
14312 segment register, which would be expensive. */
14313 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14314 fputs ("fs", file);
14316 fputs ("gs", file);
14320 output_operand_lossage ("invalid operand code '%c'", code);
14325 print_reg (x, code, file);
14327 else if (MEM_P (x))
14329 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14330 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14331 && GET_MODE (x) != BLKmode)
14334 switch (GET_MODE_SIZE (GET_MODE (x)))
14336 case 1: size = "BYTE"; break;
14337 case 2: size = "WORD"; break;
14338 case 4: size = "DWORD"; break;
14339 case 8: size = "QWORD"; break;
14340 case 12: size = "TBYTE"; break;
14342 if (GET_MODE (x) == XFmode)
14347 case 32: size = "YMMWORD"; break;
14349 gcc_unreachable ();
14352 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14355 else if (code == 'w')
14357 else if (code == 'k')
14360 fputs (size, file);
14361 fputs (" PTR ", file);
14365 /* Avoid (%rip) for call operands. */
14366 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14367 && !CONST_INT_P (x))
14368 output_addr_const (file, x);
14369 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14370 output_operand_lossage ("invalid constraints for operand");
14372 output_address (x);
14375 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14380 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14381 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14383 if (ASSEMBLER_DIALECT == ASM_ATT)
14385 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14387 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14389 fprintf (file, "0x%08x", (unsigned int) l);
14392 /* These float cases don't actually occur as immediate operands. */
14393 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14397 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14398 fputs (dstr, file);
14401 else if (GET_CODE (x) == CONST_DOUBLE
14402 && GET_MODE (x) == XFmode)
14406 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14407 fputs (dstr, file);
14412 /* We have patterns that allow zero sets of memory, for instance.
14413 In 64-bit mode, we should probably support all 8-byte vectors,
14414 since we can in fact encode that into an immediate. */
14415 if (GET_CODE (x) == CONST_VECTOR)
14417 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14423 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14425 if (ASSEMBLER_DIALECT == ASM_ATT)
14428 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14429 || GET_CODE (x) == LABEL_REF)
14431 if (ASSEMBLER_DIALECT == ASM_ATT)
14434 fputs ("OFFSET FLAT:", file);
14437 if (CONST_INT_P (x))
14438 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14439 else if (flag_pic || MACHOPIC_INDIRECT)
14440 output_pic_addr_const (file, x, code);
14442 output_addr_const (file, x);
14447 ix86_print_operand_punct_valid_p (unsigned char code)
14449 return (code == '@' || code == '*' || code == '+'
14450 || code == '&' || code == ';');
14453 /* Print a memory operand whose address is ADDR. */
14456 ix86_print_operand_address (FILE *file, rtx addr)
14458 struct ix86_address parts;
14459 rtx base, index, disp;
14461 int ok = ix86_decompose_address (addr, &parts);
14466 index = parts.index;
14468 scale = parts.scale;
14476 if (ASSEMBLER_DIALECT == ASM_ATT)
14478 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14481 gcc_unreachable ();
14484 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14485 if (TARGET_64BIT && !base && !index)
14489 if (GET_CODE (disp) == CONST
14490 && GET_CODE (XEXP (disp, 0)) == PLUS
14491 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14492 symbol = XEXP (XEXP (disp, 0), 0);
14494 if (GET_CODE (symbol) == LABEL_REF
14495 || (GET_CODE (symbol) == SYMBOL_REF
14496 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14499 if (!base && !index)
14501 /* Displacement only requires special attention. */
14503 if (CONST_INT_P (disp))
14505 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14506 fputs ("ds:", file);
14507 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14510 output_pic_addr_const (file, disp, 0);
14512 output_addr_const (file, disp);
14516 if (ASSEMBLER_DIALECT == ASM_ATT)
14521 output_pic_addr_const (file, disp, 0);
14522 else if (GET_CODE (disp) == LABEL_REF)
14523 output_asm_label (disp);
14525 output_addr_const (file, disp);
14530 print_reg (base, 0, file);
14534 print_reg (index, 0, file);
14536 fprintf (file, ",%d", scale);
14542 rtx offset = NULL_RTX;
14546 /* Pull out the offset of a symbol; print any symbol itself. */
14547 if (GET_CODE (disp) == CONST
14548 && GET_CODE (XEXP (disp, 0)) == PLUS
14549 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14551 offset = XEXP (XEXP (disp, 0), 1);
14552 disp = gen_rtx_CONST (VOIDmode,
14553 XEXP (XEXP (disp, 0), 0));
14557 output_pic_addr_const (file, disp, 0);
14558 else if (GET_CODE (disp) == LABEL_REF)
14559 output_asm_label (disp);
14560 else if (CONST_INT_P (disp))
14563 output_addr_const (file, disp);
14569 print_reg (base, 0, file);
14572 if (INTVAL (offset) >= 0)
14574 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14578 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14585 print_reg (index, 0, file);
14587 fprintf (file, "*%d", scale);
14594 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14597 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14601 if (GET_CODE (x) != UNSPEC)
14604 op = XVECEXP (x, 0, 0);
14605 switch (XINT (x, 1))
14607 case UNSPEC_GOTTPOFF:
14608 output_addr_const (file, op);
14609 /* FIXME: This might be @TPOFF in Sun ld. */
14610 fputs ("@gottpoff", file);
14613 output_addr_const (file, op);
14614 fputs ("@tpoff", file);
14616 case UNSPEC_NTPOFF:
14617 output_addr_const (file, op);
14619 fputs ("@tpoff", file);
14621 fputs ("@ntpoff", file);
14623 case UNSPEC_DTPOFF:
14624 output_addr_const (file, op);
14625 fputs ("@dtpoff", file);
14627 case UNSPEC_GOTNTPOFF:
14628 output_addr_const (file, op);
14630 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14631 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14633 fputs ("@gotntpoff", file);
14635 case UNSPEC_INDNTPOFF:
14636 output_addr_const (file, op);
14637 fputs ("@indntpoff", file);
14640 case UNSPEC_MACHOPIC_OFFSET:
14641 output_addr_const (file, op);
14643 machopic_output_function_base_name (file);
14647 case UNSPEC_STACK_CHECK:
14651 gcc_assert (flag_split_stack);
14653 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14654 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14656 gcc_unreachable ();
14659 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14670 /* Split one or more double-mode RTL references into pairs of half-mode
14671 references. The RTL can be REG, offsettable MEM, integer constant, or
14672 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14673 split and "num" is its length. lo_half and hi_half are output arrays
14674 that parallel "operands". */
14677 split_double_mode (enum machine_mode mode, rtx operands[],
14678 int num, rtx lo_half[], rtx hi_half[])
14680 enum machine_mode half_mode;
14686 half_mode = DImode;
14689 half_mode = SImode;
14692 gcc_unreachable ();
14695 byte = GET_MODE_SIZE (half_mode);
14699 rtx op = operands[num];
14701 /* simplify_subreg refuse to split volatile memory addresses,
14702 but we still have to handle it. */
14705 lo_half[num] = adjust_address (op, half_mode, 0);
14706 hi_half[num] = adjust_address (op, half_mode, byte);
14710 lo_half[num] = simplify_gen_subreg (half_mode, op,
14711 GET_MODE (op) == VOIDmode
14712 ? mode : GET_MODE (op), 0);
14713 hi_half[num] = simplify_gen_subreg (half_mode, op,
14714 GET_MODE (op) == VOIDmode
14715 ? mode : GET_MODE (op), byte);
14720 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14721 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14722 is the expression of the binary operation. The output may either be
14723 emitted here, or returned to the caller, like all output_* functions.
14725 There is no guarantee that the operands are the same mode, as they
14726 might be within FLOAT or FLOAT_EXTEND expressions. */
14728 #ifndef SYSV386_COMPAT
14729 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14730 wants to fix the assemblers because that causes incompatibility
14731 with gcc. No-one wants to fix gcc because that causes
14732 incompatibility with assemblers... You can use the option of
14733 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14734 #define SYSV386_COMPAT 1
14738 output_387_binary_op (rtx insn, rtx *operands)
14740 static char buf[40];
14743 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14745 #ifdef ENABLE_CHECKING
14746 /* Even if we do not want to check the inputs, this documents input
14747 constraints. Which helps in understanding the following code. */
14748 if (STACK_REG_P (operands[0])
14749 && ((REG_P (operands[1])
14750 && REGNO (operands[0]) == REGNO (operands[1])
14751 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14752 || (REG_P (operands[2])
14753 && REGNO (operands[0]) == REGNO (operands[2])
14754 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14755 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14758 gcc_assert (is_sse);
14761 switch (GET_CODE (operands[3]))
14764 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14765 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14773 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14774 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14782 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14783 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14791 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14792 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14800 gcc_unreachable ();
14807 strcpy (buf, ssep);
14808 if (GET_MODE (operands[0]) == SFmode)
14809 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14811 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14815 strcpy (buf, ssep + 1);
14816 if (GET_MODE (operands[0]) == SFmode)
14817 strcat (buf, "ss\t{%2, %0|%0, %2}");
14819 strcat (buf, "sd\t{%2, %0|%0, %2}");
14825 switch (GET_CODE (operands[3]))
14829 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14831 rtx temp = operands[2];
14832 operands[2] = operands[1];
14833 operands[1] = temp;
14836 /* know operands[0] == operands[1]. */
14838 if (MEM_P (operands[2]))
14844 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14846 if (STACK_TOP_P (operands[0]))
14847 /* How is it that we are storing to a dead operand[2]?
14848 Well, presumably operands[1] is dead too. We can't
14849 store the result to st(0) as st(0) gets popped on this
14850 instruction. Instead store to operands[2] (which I
14851 think has to be st(1)). st(1) will be popped later.
14852 gcc <= 2.8.1 didn't have this check and generated
14853 assembly code that the Unixware assembler rejected. */
14854 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14856 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14860 if (STACK_TOP_P (operands[0]))
14861 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14863 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14868 if (MEM_P (operands[1]))
14874 if (MEM_P (operands[2]))
14880 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14883 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14884 derived assemblers, confusingly reverse the direction of
14885 the operation for fsub{r} and fdiv{r} when the
14886 destination register is not st(0). The Intel assembler
14887 doesn't have this brain damage. Read !SYSV386_COMPAT to
14888 figure out what the hardware really does. */
14889 if (STACK_TOP_P (operands[0]))
14890 p = "{p\t%0, %2|rp\t%2, %0}";
14892 p = "{rp\t%2, %0|p\t%0, %2}";
14894 if (STACK_TOP_P (operands[0]))
14895 /* As above for fmul/fadd, we can't store to st(0). */
14896 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14898 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14903 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14906 if (STACK_TOP_P (operands[0]))
14907 p = "{rp\t%0, %1|p\t%1, %0}";
14909 p = "{p\t%1, %0|rp\t%0, %1}";
14911 if (STACK_TOP_P (operands[0]))
14912 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14914 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14919 if (STACK_TOP_P (operands[0]))
14921 if (STACK_TOP_P (operands[1]))
14922 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14924 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14927 else if (STACK_TOP_P (operands[1]))
14930 p = "{\t%1, %0|r\t%0, %1}";
14932 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14938 p = "{r\t%2, %0|\t%0, %2}";
14940 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14946 gcc_unreachable ();
14953 /* Return needed mode for entity in optimize_mode_switching pass. */
14956 ix86_mode_needed (int entity, rtx insn)
14958 enum attr_i387_cw mode;
14960 /* The mode UNINITIALIZED is used to store control word after a
14961 function call or ASM pattern. The mode ANY specify that function
14962 has no requirements on the control word and make no changes in the
14963 bits we are interested in. */
14966 || (NONJUMP_INSN_P (insn)
14967 && (asm_noperands (PATTERN (insn)) >= 0
14968 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14969 return I387_CW_UNINITIALIZED;
14971 if (recog_memoized (insn) < 0)
14972 return I387_CW_ANY;
14974 mode = get_attr_i387_cw (insn);
14979 if (mode == I387_CW_TRUNC)
14984 if (mode == I387_CW_FLOOR)
14989 if (mode == I387_CW_CEIL)
14994 if (mode == I387_CW_MASK_PM)
14999 gcc_unreachable ();
15002 return I387_CW_ANY;
15005 /* Output code to initialize control word copies used by trunc?f?i and
15006 rounding patterns. CURRENT_MODE is set to current control word,
15007 while NEW_MODE is set to new control word. */
15010 emit_i387_cw_initialization (int mode)
15012 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15015 enum ix86_stack_slot slot;
15017 rtx reg = gen_reg_rtx (HImode);
15019 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15020 emit_move_insn (reg, copy_rtx (stored_mode));
15022 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15023 || optimize_function_for_size_p (cfun))
15027 case I387_CW_TRUNC:
15028 /* round toward zero (truncate) */
15029 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15030 slot = SLOT_CW_TRUNC;
15033 case I387_CW_FLOOR:
15034 /* round down toward -oo */
15035 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15036 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15037 slot = SLOT_CW_FLOOR;
15041 /* round up toward +oo */
15042 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15043 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15044 slot = SLOT_CW_CEIL;
15047 case I387_CW_MASK_PM:
15048 /* mask precision exception for nearbyint() */
15049 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15050 slot = SLOT_CW_MASK_PM;
15054 gcc_unreachable ();
15061 case I387_CW_TRUNC:
15062 /* round toward zero (truncate) */
15063 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15064 slot = SLOT_CW_TRUNC;
15067 case I387_CW_FLOOR:
15068 /* round down toward -oo */
15069 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15070 slot = SLOT_CW_FLOOR;
15074 /* round up toward +oo */
15075 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15076 slot = SLOT_CW_CEIL;
15079 case I387_CW_MASK_PM:
15080 /* mask precision exception for nearbyint() */
15081 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15082 slot = SLOT_CW_MASK_PM;
15086 gcc_unreachable ();
15090 gcc_assert (slot < MAX_386_STACK_LOCALS);
15092 new_mode = assign_386_stack_local (HImode, slot);
15093 emit_move_insn (new_mode, reg);
15096 /* Output code for INSN to convert a float to a signed int. OPERANDS
15097 are the insn operands. The output may be [HSD]Imode and the input
15098 operand may be [SDX]Fmode. */
15101 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
15103 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15104 int dimode_p = GET_MODE (operands[0]) == DImode;
15105 int round_mode = get_attr_i387_cw (insn);
15107 /* Jump through a hoop or two for DImode, since the hardware has no
15108 non-popping instruction. We used to do this a different way, but
15109 that was somewhat fragile and broke with post-reload splitters. */
15110 if ((dimode_p || fisttp) && !stack_top_dies)
15111 output_asm_insn ("fld\t%y1", operands);
15113 gcc_assert (STACK_TOP_P (operands[1]));
15114 gcc_assert (MEM_P (operands[0]));
15115 gcc_assert (GET_MODE (operands[1]) != TFmode);
15118 output_asm_insn ("fisttp%Z0\t%0", operands);
15121 if (round_mode != I387_CW_ANY)
15122 output_asm_insn ("fldcw\t%3", operands);
15123 if (stack_top_dies || dimode_p)
15124 output_asm_insn ("fistp%Z0\t%0", operands);
15126 output_asm_insn ("fist%Z0\t%0", operands);
15127 if (round_mode != I387_CW_ANY)
15128 output_asm_insn ("fldcw\t%2", operands);
15134 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15135 have the values zero or one, indicates the ffreep insn's operand
15136 from the OPERANDS array. */
15138 static const char *
15139 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15141 if (TARGET_USE_FFREEP)
15142 #ifdef HAVE_AS_IX86_FFREEP
15143 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15146 static char retval[32];
15147 int regno = REGNO (operands[opno]);
15149 gcc_assert (FP_REGNO_P (regno));
15151 regno -= FIRST_STACK_REG;
15153 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15158 return opno ? "fstp\t%y1" : "fstp\t%y0";
15162 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15163 should be used. UNORDERED_P is true when fucom should be used. */
15166 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15168 int stack_top_dies;
15169 rtx cmp_op0, cmp_op1;
15170 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15174 cmp_op0 = operands[0];
15175 cmp_op1 = operands[1];
15179 cmp_op0 = operands[1];
15180 cmp_op1 = operands[2];
15185 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15186 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15187 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15188 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15190 if (GET_MODE (operands[0]) == SFmode)
15192 return &ucomiss[TARGET_AVX ? 0 : 1];
15194 return &comiss[TARGET_AVX ? 0 : 1];
15197 return &ucomisd[TARGET_AVX ? 0 : 1];
15199 return &comisd[TARGET_AVX ? 0 : 1];
15202 gcc_assert (STACK_TOP_P (cmp_op0));
15204 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15206 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15208 if (stack_top_dies)
15210 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15211 return output_387_ffreep (operands, 1);
15214 return "ftst\n\tfnstsw\t%0";
15217 if (STACK_REG_P (cmp_op1)
15219 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15220 && REGNO (cmp_op1) != FIRST_STACK_REG)
15222 /* If both the top of the 387 stack dies, and the other operand
15223 is also a stack register that dies, then this must be a
15224 `fcompp' float compare */
15228 /* There is no double popping fcomi variant. Fortunately,
15229 eflags is immune from the fstp's cc clobbering. */
15231 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15233 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15234 return output_387_ffreep (operands, 0);
15239 return "fucompp\n\tfnstsw\t%0";
15241 return "fcompp\n\tfnstsw\t%0";
15246 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15248 static const char * const alt[16] =
15250 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15251 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15252 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15253 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15255 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15256 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15260 "fcomi\t{%y1, %0|%0, %y1}",
15261 "fcomip\t{%y1, %0|%0, %y1}",
15262 "fucomi\t{%y1, %0|%0, %y1}",
15263 "fucomip\t{%y1, %0|%0, %y1}",
15274 mask = eflags_p << 3;
15275 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15276 mask |= unordered_p << 1;
15277 mask |= stack_top_dies;
15279 gcc_assert (mask < 16);
15288 ix86_output_addr_vec_elt (FILE *file, int value)
15290 const char *directive = ASM_LONG;
15294 directive = ASM_QUAD;
15296 gcc_assert (!TARGET_64BIT);
15299 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15303 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15305 const char *directive = ASM_LONG;
15308 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15309 directive = ASM_QUAD;
15311 gcc_assert (!TARGET_64BIT);
15313 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15314 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15315 fprintf (file, "%s%s%d-%s%d\n",
15316 directive, LPREFIX, value, LPREFIX, rel);
15317 else if (HAVE_AS_GOTOFF_IN_DATA)
15318 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15320 else if (TARGET_MACHO)
15322 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15323 machopic_output_function_base_name (file);
15328 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15329 GOT_SYMBOL_NAME, LPREFIX, value);
15332 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15336 ix86_expand_clear (rtx dest)
15340 /* We play register width games, which are only valid after reload. */
15341 gcc_assert (reload_completed);
15343 /* Avoid HImode and its attendant prefix byte. */
15344 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15345 dest = gen_rtx_REG (SImode, REGNO (dest));
15346 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15348 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15349 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15351 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15352 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15358 /* X is an unchanging MEM. If it is a constant pool reference, return
15359 the constant pool rtx, else NULL. */
15362 maybe_get_pool_constant (rtx x)
15364 x = ix86_delegitimize_address (XEXP (x, 0));
15366 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15367 return get_pool_constant (x);
15373 ix86_expand_move (enum machine_mode mode, rtx operands[])
15376 enum tls_model model;
15381 if (GET_CODE (op1) == SYMBOL_REF)
15383 model = SYMBOL_REF_TLS_MODEL (op1);
15386 op1 = legitimize_tls_address (op1, model, true);
15387 op1 = force_operand (op1, op0);
15391 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15392 && SYMBOL_REF_DLLIMPORT_P (op1))
15393 op1 = legitimize_dllimport_symbol (op1, false);
15395 else if (GET_CODE (op1) == CONST
15396 && GET_CODE (XEXP (op1, 0)) == PLUS
15397 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15399 rtx addend = XEXP (XEXP (op1, 0), 1);
15400 rtx symbol = XEXP (XEXP (op1, 0), 0);
15403 model = SYMBOL_REF_TLS_MODEL (symbol);
15405 tmp = legitimize_tls_address (symbol, model, true);
15406 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15407 && SYMBOL_REF_DLLIMPORT_P (symbol))
15408 tmp = legitimize_dllimport_symbol (symbol, true);
15412 tmp = force_operand (tmp, NULL);
15413 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15414 op0, 1, OPTAB_DIRECT);
15420 if ((flag_pic || MACHOPIC_INDIRECT)
15421 && mode == Pmode && symbolic_operand (op1, Pmode))
15423 if (TARGET_MACHO && !TARGET_64BIT)
15426 /* dynamic-no-pic */
15427 if (MACHOPIC_INDIRECT)
15429 rtx temp = ((reload_in_progress
15430 || ((op0 && REG_P (op0))
15432 ? op0 : gen_reg_rtx (Pmode));
15433 op1 = machopic_indirect_data_reference (op1, temp);
15435 op1 = machopic_legitimize_pic_address (op1, mode,
15436 temp == op1 ? 0 : temp);
15438 if (op0 != op1 && GET_CODE (op0) != MEM)
15440 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15444 if (GET_CODE (op0) == MEM)
15445 op1 = force_reg (Pmode, op1);
15449 if (GET_CODE (temp) != REG)
15450 temp = gen_reg_rtx (Pmode);
15451 temp = legitimize_pic_address (op1, temp);
15456 /* dynamic-no-pic */
15462 op1 = force_reg (Pmode, op1);
15463 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15465 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15466 op1 = legitimize_pic_address (op1, reg);
15475 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15476 || !push_operand (op0, mode))
15478 op1 = force_reg (mode, op1);
15480 if (push_operand (op0, mode)
15481 && ! general_no_elim_operand (op1, mode))
15482 op1 = copy_to_mode_reg (mode, op1);
15484 /* Force large constants in 64bit compilation into register
15485 to get them CSEed. */
15486 if (can_create_pseudo_p ()
15487 && (mode == DImode) && TARGET_64BIT
15488 && immediate_operand (op1, mode)
15489 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15490 && !register_operand (op0, mode)
15492 op1 = copy_to_mode_reg (mode, op1);
15494 if (can_create_pseudo_p ()
15495 && FLOAT_MODE_P (mode)
15496 && GET_CODE (op1) == CONST_DOUBLE)
15498 /* If we are loading a floating point constant to a register,
15499 force the value to memory now, since we'll get better code
15500 out the back end. */
15502 op1 = validize_mem (force_const_mem (mode, op1));
15503 if (!register_operand (op0, mode))
15505 rtx temp = gen_reg_rtx (mode);
15506 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15507 emit_move_insn (op0, temp);
15513 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15517 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15519 rtx op0 = operands[0], op1 = operands[1];
15520 unsigned int align = GET_MODE_ALIGNMENT (mode);
15522 /* Force constants other than zero into memory. We do not know how
15523 the instructions used to build constants modify the upper 64 bits
15524 of the register, once we have that information we may be able
15525 to handle some of them more efficiently. */
15526 if (can_create_pseudo_p ()
15527 && register_operand (op0, mode)
15528 && (CONSTANT_P (op1)
15529 || (GET_CODE (op1) == SUBREG
15530 && CONSTANT_P (SUBREG_REG (op1))))
15531 && !standard_sse_constant_p (op1))
15532 op1 = validize_mem (force_const_mem (mode, op1));
15534 /* We need to check memory alignment for SSE mode since attribute
15535 can make operands unaligned. */
15536 if (can_create_pseudo_p ()
15537 && SSE_REG_MODE_P (mode)
15538 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15539 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15543 /* ix86_expand_vector_move_misalign() does not like constants ... */
15544 if (CONSTANT_P (op1)
15545 || (GET_CODE (op1) == SUBREG
15546 && CONSTANT_P (SUBREG_REG (op1))))
15547 op1 = validize_mem (force_const_mem (mode, op1));
15549 /* ... nor both arguments in memory. */
15550 if (!register_operand (op0, mode)
15551 && !register_operand (op1, mode))
15552 op1 = force_reg (mode, op1);
15554 tmp[0] = op0; tmp[1] = op1;
15555 ix86_expand_vector_move_misalign (mode, tmp);
15559 /* Make operand1 a register if it isn't already. */
15560 if (can_create_pseudo_p ()
15561 && !register_operand (op0, mode)
15562 && !register_operand (op1, mode))
15564 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15568 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15571 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15572 straight to ix86_expand_vector_move. */
15573 /* Code generation for scalar reg-reg moves of single and double precision data:
15574 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15578 if (x86_sse_partial_reg_dependency == true)
15583 Code generation for scalar loads of double precision data:
15584 if (x86_sse_split_regs == true)
15585 movlpd mem, reg (gas syntax)
15589 Code generation for unaligned packed loads of single precision data
15590 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15591 if (x86_sse_unaligned_move_optimal)
15594 if (x86_sse_partial_reg_dependency == true)
15606 Code generation for unaligned packed loads of double precision data
15607 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15608 if (x86_sse_unaligned_move_optimal)
15611 if (x86_sse_split_regs == true)
15624 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15633 switch (GET_MODE_CLASS (mode))
15635 case MODE_VECTOR_INT:
15637 switch (GET_MODE_SIZE (mode))
15640 /* If we're optimizing for size, movups is the smallest. */
15641 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15643 op0 = gen_lowpart (V4SFmode, op0);
15644 op1 = gen_lowpart (V4SFmode, op1);
15645 emit_insn (gen_avx_movups (op0, op1));
15648 op0 = gen_lowpart (V16QImode, op0);
15649 op1 = gen_lowpart (V16QImode, op1);
15650 emit_insn (gen_avx_movdqu (op0, op1));
15653 op0 = gen_lowpart (V32QImode, op0);
15654 op1 = gen_lowpart (V32QImode, op1);
15655 emit_insn (gen_avx_movdqu256 (op0, op1));
15658 gcc_unreachable ();
15661 case MODE_VECTOR_FLOAT:
15662 op0 = gen_lowpart (mode, op0);
15663 op1 = gen_lowpart (mode, op1);
15668 emit_insn (gen_avx_movups (op0, op1));
15671 emit_insn (gen_avx_movups256 (op0, op1));
15674 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15676 op0 = gen_lowpart (V4SFmode, op0);
15677 op1 = gen_lowpart (V4SFmode, op1);
15678 emit_insn (gen_avx_movups (op0, op1));
15681 emit_insn (gen_avx_movupd (op0, op1));
15684 emit_insn (gen_avx_movupd256 (op0, op1));
15687 gcc_unreachable ();
15692 gcc_unreachable ();
15700 /* If we're optimizing for size, movups is the smallest. */
15701 if (optimize_insn_for_size_p ()
15702 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15704 op0 = gen_lowpart (V4SFmode, op0);
15705 op1 = gen_lowpart (V4SFmode, op1);
15706 emit_insn (gen_sse_movups (op0, op1));
15710 /* ??? If we have typed data, then it would appear that using
15711 movdqu is the only way to get unaligned data loaded with
15713 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15715 op0 = gen_lowpart (V16QImode, op0);
15716 op1 = gen_lowpart (V16QImode, op1);
15717 emit_insn (gen_sse2_movdqu (op0, op1));
15721 if (TARGET_SSE2 && mode == V2DFmode)
15725 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15727 op0 = gen_lowpart (V2DFmode, op0);
15728 op1 = gen_lowpart (V2DFmode, op1);
15729 emit_insn (gen_sse2_movupd (op0, op1));
15733 /* When SSE registers are split into halves, we can avoid
15734 writing to the top half twice. */
15735 if (TARGET_SSE_SPLIT_REGS)
15737 emit_clobber (op0);
15742 /* ??? Not sure about the best option for the Intel chips.
15743 The following would seem to satisfy; the register is
15744 entirely cleared, breaking the dependency chain. We
15745 then store to the upper half, with a dependency depth
15746 of one. A rumor has it that Intel recommends two movsd
15747 followed by an unpacklpd, but this is unconfirmed. And
15748 given that the dependency depth of the unpacklpd would
15749 still be one, I'm not sure why this would be better. */
15750 zero = CONST0_RTX (V2DFmode);
15753 m = adjust_address (op1, DFmode, 0);
15754 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15755 m = adjust_address (op1, DFmode, 8);
15756 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15760 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15762 op0 = gen_lowpart (V4SFmode, op0);
15763 op1 = gen_lowpart (V4SFmode, op1);
15764 emit_insn (gen_sse_movups (op0, op1));
15768 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15769 emit_move_insn (op0, CONST0_RTX (mode));
15771 emit_clobber (op0);
15773 if (mode != V4SFmode)
15774 op0 = gen_lowpart (V4SFmode, op0);
15775 m = adjust_address (op1, V2SFmode, 0);
15776 emit_insn (gen_sse_loadlps (op0, op0, m));
15777 m = adjust_address (op1, V2SFmode, 8);
15778 emit_insn (gen_sse_loadhps (op0, op0, m));
15781 else if (MEM_P (op0))
15783 /* If we're optimizing for size, movups is the smallest. */
15784 if (optimize_insn_for_size_p ()
15785 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15787 op0 = gen_lowpart (V4SFmode, op0);
15788 op1 = gen_lowpart (V4SFmode, op1);
15789 emit_insn (gen_sse_movups (op0, op1));
15793 /* ??? Similar to above, only less clear because of quote
15794 typeless stores unquote. */
15795 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15796 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15798 op0 = gen_lowpart (V16QImode, op0);
15799 op1 = gen_lowpart (V16QImode, op1);
15800 emit_insn (gen_sse2_movdqu (op0, op1));
15804 if (TARGET_SSE2 && mode == V2DFmode)
15806 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15808 op0 = gen_lowpart (V2DFmode, op0);
15809 op1 = gen_lowpart (V2DFmode, op1);
15810 emit_insn (gen_sse2_movupd (op0, op1));
15814 m = adjust_address (op0, DFmode, 0);
15815 emit_insn (gen_sse2_storelpd (m, op1));
15816 m = adjust_address (op0, DFmode, 8);
15817 emit_insn (gen_sse2_storehpd (m, op1));
15822 if (mode != V4SFmode)
15823 op1 = gen_lowpart (V4SFmode, op1);
15825 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15827 op0 = gen_lowpart (V4SFmode, op0);
15828 emit_insn (gen_sse_movups (op0, op1));
15832 m = adjust_address (op0, V2SFmode, 0);
15833 emit_insn (gen_sse_storelps (m, op1));
15834 m = adjust_address (op0, V2SFmode, 8);
15835 emit_insn (gen_sse_storehps (m, op1));
15840 gcc_unreachable ();
15843 /* Expand a push in MODE. This is some mode for which we do not support
15844 proper push instructions, at least from the registers that we expect
15845 the value to live in. */
15848 ix86_expand_push (enum machine_mode mode, rtx x)
15852 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15853 GEN_INT (-GET_MODE_SIZE (mode)),
15854 stack_pointer_rtx, 1, OPTAB_DIRECT);
15855 if (tmp != stack_pointer_rtx)
15856 emit_move_insn (stack_pointer_rtx, tmp);
15858 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15860 /* When we push an operand onto stack, it has to be aligned at least
15861 at the function argument boundary. However since we don't have
15862 the argument type, we can't determine the actual argument
15864 emit_move_insn (tmp, x);
15867 /* Helper function of ix86_fixup_binary_operands to canonicalize
15868 operand order. Returns true if the operands should be swapped. */
15871 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15874 rtx dst = operands[0];
15875 rtx src1 = operands[1];
15876 rtx src2 = operands[2];
15878 /* If the operation is not commutative, we can't do anything. */
15879 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15882 /* Highest priority is that src1 should match dst. */
15883 if (rtx_equal_p (dst, src1))
15885 if (rtx_equal_p (dst, src2))
15888 /* Next highest priority is that immediate constants come second. */
15889 if (immediate_operand (src2, mode))
15891 if (immediate_operand (src1, mode))
15894 /* Lowest priority is that memory references should come second. */
15904 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15905 destination to use for the operation. If different from the true
15906 destination in operands[0], a copy operation will be required. */
15909 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15912 rtx dst = operands[0];
15913 rtx src1 = operands[1];
15914 rtx src2 = operands[2];
15916 /* Canonicalize operand order. */
15917 if (ix86_swap_binary_operands_p (code, mode, operands))
15921 /* It is invalid to swap operands of different modes. */
15922 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15929 /* Both source operands cannot be in memory. */
15930 if (MEM_P (src1) && MEM_P (src2))
15932 /* Optimization: Only read from memory once. */
15933 if (rtx_equal_p (src1, src2))
15935 src2 = force_reg (mode, src2);
15939 src2 = force_reg (mode, src2);
15942 /* If the destination is memory, and we do not have matching source
15943 operands, do things in registers. */
15944 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15945 dst = gen_reg_rtx (mode);
15947 /* Source 1 cannot be a constant. */
15948 if (CONSTANT_P (src1))
15949 src1 = force_reg (mode, src1);
15951 /* Source 1 cannot be a non-matching memory. */
15952 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15953 src1 = force_reg (mode, src1);
15955 operands[1] = src1;
15956 operands[2] = src2;
15960 /* Similarly, but assume that the destination has already been
15961 set up properly. */
15964 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15965 enum machine_mode mode, rtx operands[])
15967 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15968 gcc_assert (dst == operands[0]);
15971 /* Attempt to expand a binary operator. Make the expansion closer to the
15972 actual machine, then just general_operand, which will allow 3 separate
15973 memory references (one output, two input) in a single insn. */
15976 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15979 rtx src1, src2, dst, op, clob;
15981 dst = ix86_fixup_binary_operands (code, mode, operands);
15982 src1 = operands[1];
15983 src2 = operands[2];
15985 /* Emit the instruction. */
15987 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15988 if (reload_in_progress)
15990 /* Reload doesn't know about the flags register, and doesn't know that
15991 it doesn't want to clobber it. We can only do this with PLUS. */
15992 gcc_assert (code == PLUS);
15995 else if (reload_completed
15997 && !rtx_equal_p (dst, src1))
15999 /* This is going to be an LEA; avoid splitting it later. */
16004 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16005 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16008 /* Fix up the destination if needed. */
16009 if (dst != operands[0])
16010 emit_move_insn (operands[0], dst);
16013 /* Return TRUE or FALSE depending on whether the binary operator meets the
16014 appropriate constraints. */
16017 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16020 rtx dst = operands[0];
16021 rtx src1 = operands[1];
16022 rtx src2 = operands[2];
16024 /* Both source operands cannot be in memory. */
16025 if (MEM_P (src1) && MEM_P (src2))
16028 /* Canonicalize operand order for commutative operators. */
16029 if (ix86_swap_binary_operands_p (code, mode, operands))
16036 /* If the destination is memory, we must have a matching source operand. */
16037 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16040 /* Source 1 cannot be a constant. */
16041 if (CONSTANT_P (src1))
16044 /* Source 1 cannot be a non-matching memory. */
16045 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16047 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16048 return (code == AND
16051 || (TARGET_64BIT && mode == DImode))
16052 && CONST_INT_P (src2)
16053 && (INTVAL (src2) == 0xff
16054 || INTVAL (src2) == 0xffff));
16060 /* Attempt to expand a unary operator. Make the expansion closer to the
16061 actual machine, then just general_operand, which will allow 2 separate
16062 memory references (one output, one input) in a single insn. */
16065 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16068 int matching_memory;
16069 rtx src, dst, op, clob;
16074 /* If the destination is memory, and we do not have matching source
16075 operands, do things in registers. */
16076 matching_memory = 0;
16079 if (rtx_equal_p (dst, src))
16080 matching_memory = 1;
16082 dst = gen_reg_rtx (mode);
16085 /* When source operand is memory, destination must match. */
16086 if (MEM_P (src) && !matching_memory)
16087 src = force_reg (mode, src);
16089 /* Emit the instruction. */
16091 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16092 if (reload_in_progress || code == NOT)
16094 /* Reload doesn't know about the flags register, and doesn't know that
16095 it doesn't want to clobber it. */
16096 gcc_assert (code == NOT);
16101 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16102 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16105 /* Fix up the destination if needed. */
16106 if (dst != operands[0])
16107 emit_move_insn (operands[0], dst);
16110 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16111 divisor are within the the range [0-255]. */
16114 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16117 rtx end_label, qimode_label;
16118 rtx insn, div, mod;
16119 rtx scratch, tmp0, tmp1, tmp2;
16120 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16121 rtx (*gen_zero_extend) (rtx, rtx);
16122 rtx (*gen_test_ccno_1) (rtx, rtx);
16127 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16128 gen_test_ccno_1 = gen_testsi_ccno_1;
16129 gen_zero_extend = gen_zero_extendqisi2;
16132 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16133 gen_test_ccno_1 = gen_testdi_ccno_1;
16134 gen_zero_extend = gen_zero_extendqidi2;
16137 gcc_unreachable ();
16140 end_label = gen_label_rtx ();
16141 qimode_label = gen_label_rtx ();
16143 scratch = gen_reg_rtx (mode);
16145 /* Use 8bit unsigned divimod if dividend and divisor are within the
16146 the range [0-255]. */
16147 emit_move_insn (scratch, operands[2]);
16148 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16149 scratch, 1, OPTAB_DIRECT);
16150 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16151 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16152 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16153 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16154 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16156 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16157 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16158 JUMP_LABEL (insn) = qimode_label;
16160 /* Generate original signed/unsigned divimod. */
16161 div = gen_divmod4_1 (operands[0], operands[1],
16162 operands[2], operands[3]);
16165 /* Branch to the end. */
16166 emit_jump_insn (gen_jump (end_label));
16169 /* Generate 8bit unsigned divide. */
16170 emit_label (qimode_label);
16171 /* Don't use operands[0] for result of 8bit divide since not all
16172 registers support QImode ZERO_EXTRACT. */
16173 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16174 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16175 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16176 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16180 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16181 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16185 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16186 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16189 /* Extract remainder from AH. */
16190 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16191 if (REG_P (operands[1]))
16192 insn = emit_move_insn (operands[1], tmp1);
16195 /* Need a new scratch register since the old one has result
16197 scratch = gen_reg_rtx (mode);
16198 emit_move_insn (scratch, tmp1);
16199 insn = emit_move_insn (operands[1], scratch);
16201 set_unique_reg_note (insn, REG_EQUAL, mod);
16203 /* Zero extend quotient from AL. */
16204 tmp1 = gen_lowpart (QImode, tmp0);
16205 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16206 set_unique_reg_note (insn, REG_EQUAL, div);
16208 emit_label (end_label);
16211 #define LEA_SEARCH_THRESHOLD 12
16213 /* Search backward for non-agu definition of register number REGNO1
16214 or register number REGNO2 in INSN's basic block until
16215 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16216 2. Reach BB boundary, or
16217 3. Reach agu definition.
16218 Returns the distance between the non-agu definition point and INSN.
16219 If no definition point, returns -1. */
16222 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16225 basic_block bb = BLOCK_FOR_INSN (insn);
16228 enum attr_type insn_type;
16230 if (insn != BB_HEAD (bb))
16232 rtx prev = PREV_INSN (insn);
16233 while (prev && distance < LEA_SEARCH_THRESHOLD)
16235 if (NONDEBUG_INSN_P (prev))
16238 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16239 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16240 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16241 && (regno1 == DF_REF_REGNO (*def_rec)
16242 || regno2 == DF_REF_REGNO (*def_rec)))
16244 insn_type = get_attr_type (prev);
16245 if (insn_type != TYPE_LEA)
16249 if (prev == BB_HEAD (bb))
16251 prev = PREV_INSN (prev);
16255 if (distance < LEA_SEARCH_THRESHOLD)
16259 bool simple_loop = false;
16261 FOR_EACH_EDGE (e, ei, bb->preds)
16264 simple_loop = true;
16270 rtx prev = BB_END (bb);
16273 && distance < LEA_SEARCH_THRESHOLD)
16275 if (NONDEBUG_INSN_P (prev))
16278 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16279 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16280 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16281 && (regno1 == DF_REF_REGNO (*def_rec)
16282 || regno2 == DF_REF_REGNO (*def_rec)))
16284 insn_type = get_attr_type (prev);
16285 if (insn_type != TYPE_LEA)
16289 prev = PREV_INSN (prev);
16297 /* get_attr_type may modify recog data. We want to make sure
16298 that recog data is valid for instruction INSN, on which
16299 distance_non_agu_define is called. INSN is unchanged here. */
16300 extract_insn_cached (insn);
16304 /* Return the distance between INSN and the next insn that uses
16305 register number REGNO0 in memory address. Return -1 if no such
16306 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16309 distance_agu_use (unsigned int regno0, rtx insn)
16311 basic_block bb = BLOCK_FOR_INSN (insn);
16316 if (insn != BB_END (bb))
16318 rtx next = NEXT_INSN (insn);
16319 while (next && distance < LEA_SEARCH_THRESHOLD)
16321 if (NONDEBUG_INSN_P (next))
16325 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16326 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16327 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16328 && regno0 == DF_REF_REGNO (*use_rec))
16330 /* Return DISTANCE if OP0 is used in memory
16331 address in NEXT. */
16335 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16336 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16337 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16338 && regno0 == DF_REF_REGNO (*def_rec))
16340 /* Return -1 if OP0 is set in NEXT. */
16344 if (next == BB_END (bb))
16346 next = NEXT_INSN (next);
16350 if (distance < LEA_SEARCH_THRESHOLD)
16354 bool simple_loop = false;
16356 FOR_EACH_EDGE (e, ei, bb->succs)
16359 simple_loop = true;
16365 rtx next = BB_HEAD (bb);
16368 && distance < LEA_SEARCH_THRESHOLD)
16370 if (NONDEBUG_INSN_P (next))
16374 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16375 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16376 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16377 && regno0 == DF_REF_REGNO (*use_rec))
16379 /* Return DISTANCE if OP0 is used in memory
16380 address in NEXT. */
16384 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16385 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16386 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16387 && regno0 == DF_REF_REGNO (*def_rec))
16389 /* Return -1 if OP0 is set in NEXT. */
16394 next = NEXT_INSN (next);
16402 /* Define this macro to tune LEA priority vs ADD, it take effect when
16403 there is a dilemma of choicing LEA or ADD
16404 Negative value: ADD is more preferred than LEA
16406 Positive value: LEA is more preferred than ADD*/
16407 #define IX86_LEA_PRIORITY 2
16409 /* Return true if it is ok to optimize an ADD operation to LEA
16410 operation to avoid flag register consumation. For most processors,
16411 ADD is faster than LEA. For the processors like ATOM, if the
16412 destination register of LEA holds an actual address which will be
16413 used soon, LEA is better and otherwise ADD is better. */
16416 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16418 unsigned int regno0 = true_regnum (operands[0]);
16419 unsigned int regno1 = true_regnum (operands[1]);
16420 unsigned int regno2 = true_regnum (operands[2]);
16422 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16423 if (regno0 != regno1 && regno0 != regno2)
16426 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16430 int dist_define, dist_use;
16432 /* Return false if REGNO0 isn't used in memory address. */
16433 dist_use = distance_agu_use (regno0, insn);
16437 dist_define = distance_non_agu_define (regno1, regno2, insn);
16438 if (dist_define <= 0)
16441 /* If this insn has both backward non-agu dependence and forward
16442 agu dependence, the one with short distance take effect. */
16443 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16450 /* Return true if destination reg of SET_BODY is shift count of
16454 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16460 /* Retrieve destination of SET_BODY. */
16461 switch (GET_CODE (set_body))
16464 set_dest = SET_DEST (set_body);
16465 if (!set_dest || !REG_P (set_dest))
16469 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16470 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16478 /* Retrieve shift count of USE_BODY. */
16479 switch (GET_CODE (use_body))
16482 shift_rtx = XEXP (use_body, 1);
16485 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16486 if (ix86_dep_by_shift_count_body (set_body,
16487 XVECEXP (use_body, 0, i)))
16495 && (GET_CODE (shift_rtx) == ASHIFT
16496 || GET_CODE (shift_rtx) == LSHIFTRT
16497 || GET_CODE (shift_rtx) == ASHIFTRT
16498 || GET_CODE (shift_rtx) == ROTATE
16499 || GET_CODE (shift_rtx) == ROTATERT))
16501 rtx shift_count = XEXP (shift_rtx, 1);
16503 /* Return true if shift count is dest of SET_BODY. */
16504 if (REG_P (shift_count)
16505 && true_regnum (set_dest) == true_regnum (shift_count))
16512 /* Return true if destination reg of SET_INSN is shift count of
16516 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16518 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16519 PATTERN (use_insn));
16522 /* Return TRUE or FALSE depending on whether the unary operator meets the
16523 appropriate constraints. */
16526 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16527 enum machine_mode mode ATTRIBUTE_UNUSED,
16528 rtx operands[2] ATTRIBUTE_UNUSED)
16530 /* If one of operands is memory, source and destination must match. */
16531 if ((MEM_P (operands[0])
16532 || MEM_P (operands[1]))
16533 && ! rtx_equal_p (operands[0], operands[1]))
16538 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16539 are ok, keeping in mind the possible movddup alternative. */
16542 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16544 if (MEM_P (operands[0]))
16545 return rtx_equal_p (operands[0], operands[1 + high]);
16546 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16547 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16551 /* Post-reload splitter for converting an SF or DFmode value in an
16552 SSE register into an unsigned SImode. */
16555 ix86_split_convert_uns_si_sse (rtx operands[])
16557 enum machine_mode vecmode;
16558 rtx value, large, zero_or_two31, input, two31, x;
16560 large = operands[1];
16561 zero_or_two31 = operands[2];
16562 input = operands[3];
16563 two31 = operands[4];
16564 vecmode = GET_MODE (large);
16565 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16567 /* Load up the value into the low element. We must ensure that the other
16568 elements are valid floats -- zero is the easiest such value. */
16571 if (vecmode == V4SFmode)
16572 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16574 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16578 input = gen_rtx_REG (vecmode, REGNO (input));
16579 emit_move_insn (value, CONST0_RTX (vecmode));
16580 if (vecmode == V4SFmode)
16581 emit_insn (gen_sse_movss (value, value, input));
16583 emit_insn (gen_sse2_movsd (value, value, input));
16586 emit_move_insn (large, two31);
16587 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16589 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16590 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16592 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16593 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16595 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16596 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16598 large = gen_rtx_REG (V4SImode, REGNO (large));
16599 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16601 x = gen_rtx_REG (V4SImode, REGNO (value));
16602 if (vecmode == V4SFmode)
16603 emit_insn (gen_sse2_cvttps2dq (x, value));
16605 emit_insn (gen_sse2_cvttpd2dq (x, value));
16608 emit_insn (gen_xorv4si3 (value, value, large));
16611 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16612 Expects the 64-bit DImode to be supplied in a pair of integral
16613 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16614 -mfpmath=sse, !optimize_size only. */
16617 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16619 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16620 rtx int_xmm, fp_xmm;
16621 rtx biases, exponents;
16624 int_xmm = gen_reg_rtx (V4SImode);
16625 if (TARGET_INTER_UNIT_MOVES)
16626 emit_insn (gen_movdi_to_sse (int_xmm, input));
16627 else if (TARGET_SSE_SPLIT_REGS)
16629 emit_clobber (int_xmm);
16630 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16634 x = gen_reg_rtx (V2DImode);
16635 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16636 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16639 x = gen_rtx_CONST_VECTOR (V4SImode,
16640 gen_rtvec (4, GEN_INT (0x43300000UL),
16641 GEN_INT (0x45300000UL),
16642 const0_rtx, const0_rtx));
16643 exponents = validize_mem (force_const_mem (V4SImode, x));
16645 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16646 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16648 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16649 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16650 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16651 (0x1.0p84 + double(fp_value_hi_xmm)).
16652 Note these exponents differ by 32. */
16654 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16656 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16657 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16658 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16659 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16660 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16661 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16662 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16663 biases = validize_mem (force_const_mem (V2DFmode, biases));
16664 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16666 /* Add the upper and lower DFmode values together. */
16668 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16671 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16672 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16673 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16676 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16679 /* Not used, but eases macroization of patterns. */
16681 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16682 rtx input ATTRIBUTE_UNUSED)
16684 gcc_unreachable ();
16687 /* Convert an unsigned SImode value into a DFmode. Only currently used
16688 for SSE, but applicable anywhere. */
16691 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16693 REAL_VALUE_TYPE TWO31r;
16696 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16697 NULL, 1, OPTAB_DIRECT);
16699 fp = gen_reg_rtx (DFmode);
16700 emit_insn (gen_floatsidf2 (fp, x));
16702 real_ldexp (&TWO31r, &dconst1, 31);
16703 x = const_double_from_real_value (TWO31r, DFmode);
16705 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16707 emit_move_insn (target, x);
16710 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16711 32-bit mode; otherwise we have a direct convert instruction. */
16714 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16716 REAL_VALUE_TYPE TWO32r;
16717 rtx fp_lo, fp_hi, x;
16719 fp_lo = gen_reg_rtx (DFmode);
16720 fp_hi = gen_reg_rtx (DFmode);
16722 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16724 real_ldexp (&TWO32r, &dconst1, 32);
16725 x = const_double_from_real_value (TWO32r, DFmode);
16726 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16728 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16730 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16733 emit_move_insn (target, x);
16736 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16737 For x86_32, -mfpmath=sse, !optimize_size only. */
16739 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16741 REAL_VALUE_TYPE ONE16r;
16742 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16744 real_ldexp (&ONE16r, &dconst1, 16);
16745 x = const_double_from_real_value (ONE16r, SFmode);
16746 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16747 NULL, 0, OPTAB_DIRECT);
16748 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16749 NULL, 0, OPTAB_DIRECT);
16750 fp_hi = gen_reg_rtx (SFmode);
16751 fp_lo = gen_reg_rtx (SFmode);
16752 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16753 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16754 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16756 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16758 if (!rtx_equal_p (target, fp_hi))
16759 emit_move_insn (target, fp_hi);
16762 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16763 then replicate the value for all elements of the vector
16767 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16774 v = gen_rtvec (4, value, value, value, value);
16775 return gen_rtx_CONST_VECTOR (V4SImode, v);
16779 v = gen_rtvec (2, value, value);
16780 return gen_rtx_CONST_VECTOR (V2DImode, v);
16784 v = gen_rtvec (8, value, value, value, value,
16785 value, value, value, value);
16787 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16788 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16789 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16790 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16791 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16795 v = gen_rtvec (4, value, value, value, value);
16797 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16798 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16799 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16803 v = gen_rtvec (4, value, value, value, value);
16805 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16806 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16807 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16811 v = gen_rtvec (2, value, value);
16813 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16814 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16817 gcc_unreachable ();
16821 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16822 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16823 for an SSE register. If VECT is true, then replicate the mask for
16824 all elements of the vector register. If INVERT is true, then create
16825 a mask excluding the sign bit. */
16828 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16830 enum machine_mode vec_mode, imode;
16831 HOST_WIDE_INT hi, lo;
16836 /* Find the sign bit, sign extended to 2*HWI. */
16843 mode = GET_MODE_INNER (mode);
16845 lo = 0x80000000, hi = lo < 0;
16852 mode = GET_MODE_INNER (mode);
16854 if (HOST_BITS_PER_WIDE_INT >= 64)
16855 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16857 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16862 vec_mode = VOIDmode;
16863 if (HOST_BITS_PER_WIDE_INT >= 64)
16866 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16873 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16877 lo = ~lo, hi = ~hi;
16883 mask = immed_double_const (lo, hi, imode);
16885 vec = gen_rtvec (2, v, mask);
16886 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16887 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16894 gcc_unreachable ();
16898 lo = ~lo, hi = ~hi;
16900 /* Force this value into the low part of a fp vector constant. */
16901 mask = immed_double_const (lo, hi, imode);
16902 mask = gen_lowpart (mode, mask);
16904 if (vec_mode == VOIDmode)
16905 return force_reg (mode, mask);
16907 v = ix86_build_const_vector (vec_mode, vect, mask);
16908 return force_reg (vec_mode, v);
16911 /* Generate code for floating point ABS or NEG. */
16914 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16917 rtx mask, set, dst, src;
16918 bool use_sse = false;
16919 bool vector_mode = VECTOR_MODE_P (mode);
16920 enum machine_mode vmode = mode;
16924 else if (mode == TFmode)
16926 else if (TARGET_SSE_MATH)
16928 use_sse = SSE_FLOAT_MODE_P (mode);
16929 if (mode == SFmode)
16931 else if (mode == DFmode)
16935 /* NEG and ABS performed with SSE use bitwise mask operations.
16936 Create the appropriate mask now. */
16938 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16945 set = gen_rtx_fmt_e (code, mode, src);
16946 set = gen_rtx_SET (VOIDmode, dst, set);
16953 use = gen_rtx_USE (VOIDmode, mask);
16955 par = gen_rtvec (2, set, use);
16958 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16959 par = gen_rtvec (3, set, use, clob);
16961 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16967 /* Expand a copysign operation. Special case operand 0 being a constant. */
16970 ix86_expand_copysign (rtx operands[])
16972 enum machine_mode mode, vmode;
16973 rtx dest, op0, op1, mask, nmask;
16975 dest = operands[0];
16979 mode = GET_MODE (dest);
16981 if (mode == SFmode)
16983 else if (mode == DFmode)
16988 if (GET_CODE (op0) == CONST_DOUBLE)
16990 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16992 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16993 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16995 if (mode == SFmode || mode == DFmode)
16997 if (op0 == CONST0_RTX (mode))
16998 op0 = CONST0_RTX (vmode);
17001 rtx v = ix86_build_const_vector (vmode, false, op0);
17003 op0 = force_reg (vmode, v);
17006 else if (op0 != CONST0_RTX (mode))
17007 op0 = force_reg (mode, op0);
17009 mask = ix86_build_signbit_mask (vmode, 0, 0);
17011 if (mode == SFmode)
17012 copysign_insn = gen_copysignsf3_const;
17013 else if (mode == DFmode)
17014 copysign_insn = gen_copysigndf3_const;
17016 copysign_insn = gen_copysigntf3_const;
17018 emit_insn (copysign_insn (dest, op0, op1, mask));
17022 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
17024 nmask = ix86_build_signbit_mask (vmode, 0, 1);
17025 mask = ix86_build_signbit_mask (vmode, 0, 0);
17027 if (mode == SFmode)
17028 copysign_insn = gen_copysignsf3_var;
17029 else if (mode == DFmode)
17030 copysign_insn = gen_copysigndf3_var;
17032 copysign_insn = gen_copysigntf3_var;
17034 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
17038 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17039 be a constant, and so has already been expanded into a vector constant. */
17042 ix86_split_copysign_const (rtx operands[])
17044 enum machine_mode mode, vmode;
17045 rtx dest, op0, mask, x;
17047 dest = operands[0];
17049 mask = operands[3];
17051 mode = GET_MODE (dest);
17052 vmode = GET_MODE (mask);
17054 dest = simplify_gen_subreg (vmode, dest, mode, 0);
17055 x = gen_rtx_AND (vmode, dest, mask);
17056 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17058 if (op0 != CONST0_RTX (vmode))
17060 x = gen_rtx_IOR (vmode, dest, op0);
17061 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17065 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17066 so we have to do two masks. */
17069 ix86_split_copysign_var (rtx operands[])
17071 enum machine_mode mode, vmode;
17072 rtx dest, scratch, op0, op1, mask, nmask, x;
17074 dest = operands[0];
17075 scratch = operands[1];
17078 nmask = operands[4];
17079 mask = operands[5];
17081 mode = GET_MODE (dest);
17082 vmode = GET_MODE (mask);
17084 if (rtx_equal_p (op0, op1))
17086 /* Shouldn't happen often (it's useless, obviously), but when it does
17087 we'd generate incorrect code if we continue below. */
17088 emit_move_insn (dest, op0);
17092 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
17094 gcc_assert (REGNO (op1) == REGNO (scratch));
17096 x = gen_rtx_AND (vmode, scratch, mask);
17097 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17100 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17101 x = gen_rtx_NOT (vmode, dest);
17102 x = gen_rtx_AND (vmode, x, op0);
17103 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17107 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
17109 x = gen_rtx_AND (vmode, scratch, mask);
17111 else /* alternative 2,4 */
17113 gcc_assert (REGNO (mask) == REGNO (scratch));
17114 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
17115 x = gen_rtx_AND (vmode, scratch, op1);
17117 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17119 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17121 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17122 x = gen_rtx_AND (vmode, dest, nmask);
17124 else /* alternative 3,4 */
17126 gcc_assert (REGNO (nmask) == REGNO (dest));
17128 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17129 x = gen_rtx_AND (vmode, dest, op0);
17131 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17134 x = gen_rtx_IOR (vmode, dest, scratch);
17135 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17138 /* Return TRUE or FALSE depending on whether the first SET in INSN
17139 has source and destination with matching CC modes, and that the
17140 CC mode is at least as constrained as REQ_MODE. */
17143 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17146 enum machine_mode set_mode;
17148 set = PATTERN (insn);
17149 if (GET_CODE (set) == PARALLEL)
17150 set = XVECEXP (set, 0, 0);
17151 gcc_assert (GET_CODE (set) == SET);
17152 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17154 set_mode = GET_MODE (SET_DEST (set));
17158 if (req_mode != CCNOmode
17159 && (req_mode != CCmode
17160 || XEXP (SET_SRC (set), 1) != const0_rtx))
17164 if (req_mode == CCGCmode)
17168 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17172 if (req_mode == CCZmode)
17183 gcc_unreachable ();
17186 return GET_MODE (SET_SRC (set)) == set_mode;
17189 /* Generate insn patterns to do an integer compare of OPERANDS. */
17192 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17194 enum machine_mode cmpmode;
17197 cmpmode = SELECT_CC_MODE (code, op0, op1);
17198 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17200 /* This is very simple, but making the interface the same as in the
17201 FP case makes the rest of the code easier. */
17202 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17203 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17205 /* Return the test that should be put into the flags user, i.e.
17206 the bcc, scc, or cmov instruction. */
17207 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17210 /* Figure out whether to use ordered or unordered fp comparisons.
17211 Return the appropriate mode to use. */
17214 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17216 /* ??? In order to make all comparisons reversible, we do all comparisons
17217 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17218 all forms trapping and nontrapping comparisons, we can make inequality
17219 comparisons trapping again, since it results in better code when using
17220 FCOM based compares. */
17221 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17225 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17227 enum machine_mode mode = GET_MODE (op0);
17229 if (SCALAR_FLOAT_MODE_P (mode))
17231 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17232 return ix86_fp_compare_mode (code);
17237 /* Only zero flag is needed. */
17238 case EQ: /* ZF=0 */
17239 case NE: /* ZF!=0 */
17241 /* Codes needing carry flag. */
17242 case GEU: /* CF=0 */
17243 case LTU: /* CF=1 */
17244 /* Detect overflow checks. They need just the carry flag. */
17245 if (GET_CODE (op0) == PLUS
17246 && rtx_equal_p (op1, XEXP (op0, 0)))
17250 case GTU: /* CF=0 & ZF=0 */
17251 case LEU: /* CF=1 | ZF=1 */
17252 /* Detect overflow checks. They need just the carry flag. */
17253 if (GET_CODE (op0) == MINUS
17254 && rtx_equal_p (op1, XEXP (op0, 0)))
17258 /* Codes possibly doable only with sign flag when
17259 comparing against zero. */
17260 case GE: /* SF=OF or SF=0 */
17261 case LT: /* SF<>OF or SF=1 */
17262 if (op1 == const0_rtx)
17265 /* For other cases Carry flag is not required. */
17267 /* Codes doable only with sign flag when comparing
17268 against zero, but we miss jump instruction for it
17269 so we need to use relational tests against overflow
17270 that thus needs to be zero. */
17271 case GT: /* ZF=0 & SF=OF */
17272 case LE: /* ZF=1 | SF<>OF */
17273 if (op1 == const0_rtx)
17277 /* strcmp pattern do (use flags) and combine may ask us for proper
17282 gcc_unreachable ();
17286 /* Return the fixed registers used for condition codes. */
17289 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17296 /* If two condition code modes are compatible, return a condition code
17297 mode which is compatible with both. Otherwise, return
17300 static enum machine_mode
17301 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17306 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17309 if ((m1 == CCGCmode && m2 == CCGOCmode)
17310 || (m1 == CCGOCmode && m2 == CCGCmode))
17316 gcc_unreachable ();
17346 /* These are only compatible with themselves, which we already
17353 /* Return a comparison we can do and that it is equivalent to
17354 swap_condition (code) apart possibly from orderedness.
17355 But, never change orderedness if TARGET_IEEE_FP, returning
17356 UNKNOWN in that case if necessary. */
17358 static enum rtx_code
17359 ix86_fp_swap_condition (enum rtx_code code)
17363 case GT: /* GTU - CF=0 & ZF=0 */
17364 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17365 case GE: /* GEU - CF=0 */
17366 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17367 case UNLT: /* LTU - CF=1 */
17368 return TARGET_IEEE_FP ? UNKNOWN : GT;
17369 case UNLE: /* LEU - CF=1 | ZF=1 */
17370 return TARGET_IEEE_FP ? UNKNOWN : GE;
17372 return swap_condition (code);
17376 /* Return cost of comparison CODE using the best strategy for performance.
17377 All following functions do use number of instructions as a cost metrics.
17378 In future this should be tweaked to compute bytes for optimize_size and
17379 take into account performance of various instructions on various CPUs. */
17382 ix86_fp_comparison_cost (enum rtx_code code)
17386 /* The cost of code using bit-twiddling on %ah. */
17403 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17407 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17410 gcc_unreachable ();
17413 switch (ix86_fp_comparison_strategy (code))
17415 case IX86_FPCMP_COMI:
17416 return arith_cost > 4 ? 3 : 2;
17417 case IX86_FPCMP_SAHF:
17418 return arith_cost > 4 ? 4 : 3;
17424 /* Return strategy to use for floating-point. We assume that fcomi is always
17425 preferrable where available, since that is also true when looking at size
17426 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17428 enum ix86_fpcmp_strategy
17429 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17431 /* Do fcomi/sahf based test when profitable. */
17434 return IX86_FPCMP_COMI;
17436 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17437 return IX86_FPCMP_SAHF;
17439 return IX86_FPCMP_ARITH;
17442 /* Swap, force into registers, or otherwise massage the two operands
17443 to a fp comparison. The operands are updated in place; the new
17444 comparison code is returned. */
17446 static enum rtx_code
17447 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17449 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17450 rtx op0 = *pop0, op1 = *pop1;
17451 enum machine_mode op_mode = GET_MODE (op0);
17452 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17454 /* All of the unordered compare instructions only work on registers.
17455 The same is true of the fcomi compare instructions. The XFmode
17456 compare instructions require registers except when comparing
17457 against zero or when converting operand 1 from fixed point to
17461 && (fpcmp_mode == CCFPUmode
17462 || (op_mode == XFmode
17463 && ! (standard_80387_constant_p (op0) == 1
17464 || standard_80387_constant_p (op1) == 1)
17465 && GET_CODE (op1) != FLOAT)
17466 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17468 op0 = force_reg (op_mode, op0);
17469 op1 = force_reg (op_mode, op1);
17473 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17474 things around if they appear profitable, otherwise force op0
17475 into a register. */
17477 if (standard_80387_constant_p (op0) == 0
17479 && ! (standard_80387_constant_p (op1) == 0
17482 enum rtx_code new_code = ix86_fp_swap_condition (code);
17483 if (new_code != UNKNOWN)
17486 tmp = op0, op0 = op1, op1 = tmp;
17492 op0 = force_reg (op_mode, op0);
17494 if (CONSTANT_P (op1))
17496 int tmp = standard_80387_constant_p (op1);
17498 op1 = validize_mem (force_const_mem (op_mode, op1));
17502 op1 = force_reg (op_mode, op1);
17505 op1 = force_reg (op_mode, op1);
17509 /* Try to rearrange the comparison to make it cheaper. */
17510 if (ix86_fp_comparison_cost (code)
17511 > ix86_fp_comparison_cost (swap_condition (code))
17512 && (REG_P (op1) || can_create_pseudo_p ()))
17515 tmp = op0, op0 = op1, op1 = tmp;
17516 code = swap_condition (code);
17518 op0 = force_reg (op_mode, op0);
17526 /* Convert comparison codes we use to represent FP comparison to integer
17527 code that will result in proper branch. Return UNKNOWN if no such code
17531 ix86_fp_compare_code_to_integer (enum rtx_code code)
17560 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17563 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17565 enum machine_mode fpcmp_mode, intcmp_mode;
17568 fpcmp_mode = ix86_fp_compare_mode (code);
17569 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17571 /* Do fcomi/sahf based test when profitable. */
17572 switch (ix86_fp_comparison_strategy (code))
17574 case IX86_FPCMP_COMI:
17575 intcmp_mode = fpcmp_mode;
17576 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17577 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17582 case IX86_FPCMP_SAHF:
17583 intcmp_mode = fpcmp_mode;
17584 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17585 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17589 scratch = gen_reg_rtx (HImode);
17590 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17591 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17594 case IX86_FPCMP_ARITH:
17595 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17596 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17597 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17599 scratch = gen_reg_rtx (HImode);
17600 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17602 /* In the unordered case, we have to check C2 for NaN's, which
17603 doesn't happen to work out to anything nice combination-wise.
17604 So do some bit twiddling on the value we've got in AH to come
17605 up with an appropriate set of condition codes. */
17607 intcmp_mode = CCNOmode;
17612 if (code == GT || !TARGET_IEEE_FP)
17614 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17619 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17620 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17621 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17622 intcmp_mode = CCmode;
17628 if (code == LT && TARGET_IEEE_FP)
17630 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17631 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17632 intcmp_mode = CCmode;
17637 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17643 if (code == GE || !TARGET_IEEE_FP)
17645 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17650 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17651 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17657 if (code == LE && TARGET_IEEE_FP)
17659 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17660 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17661 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17662 intcmp_mode = CCmode;
17667 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17673 if (code == EQ && TARGET_IEEE_FP)
17675 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17676 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17677 intcmp_mode = CCmode;
17682 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17688 if (code == NE && TARGET_IEEE_FP)
17690 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17691 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17697 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17703 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17707 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17712 gcc_unreachable ();
17720 /* Return the test that should be put into the flags user, i.e.
17721 the bcc, scc, or cmov instruction. */
17722 return gen_rtx_fmt_ee (code, VOIDmode,
17723 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17728 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17732 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17733 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17735 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17737 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17738 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17741 ret = ix86_expand_int_compare (code, op0, op1);
17747 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17749 enum machine_mode mode = GET_MODE (op0);
17761 tmp = ix86_expand_compare (code, op0, op1);
17762 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17763 gen_rtx_LABEL_REF (VOIDmode, label),
17765 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17772 /* Expand DImode branch into multiple compare+branch. */
17774 rtx lo[2], hi[2], label2;
17775 enum rtx_code code1, code2, code3;
17776 enum machine_mode submode;
17778 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17780 tmp = op0, op0 = op1, op1 = tmp;
17781 code = swap_condition (code);
17784 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17785 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17787 submode = mode == DImode ? SImode : DImode;
17789 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17790 avoid two branches. This costs one extra insn, so disable when
17791 optimizing for size. */
17793 if ((code == EQ || code == NE)
17794 && (!optimize_insn_for_size_p ()
17795 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17800 if (hi[1] != const0_rtx)
17801 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17802 NULL_RTX, 0, OPTAB_WIDEN);
17805 if (lo[1] != const0_rtx)
17806 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17807 NULL_RTX, 0, OPTAB_WIDEN);
17809 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17810 NULL_RTX, 0, OPTAB_WIDEN);
17812 ix86_expand_branch (code, tmp, const0_rtx, label);
17816 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17817 op1 is a constant and the low word is zero, then we can just
17818 examine the high word. Similarly for low word -1 and
17819 less-or-equal-than or greater-than. */
17821 if (CONST_INT_P (hi[1]))
17824 case LT: case LTU: case GE: case GEU:
17825 if (lo[1] == const0_rtx)
17827 ix86_expand_branch (code, hi[0], hi[1], label);
17831 case LE: case LEU: case GT: case GTU:
17832 if (lo[1] == constm1_rtx)
17834 ix86_expand_branch (code, hi[0], hi[1], label);
17842 /* Otherwise, we need two or three jumps. */
17844 label2 = gen_label_rtx ();
17847 code2 = swap_condition (code);
17848 code3 = unsigned_condition (code);
17852 case LT: case GT: case LTU: case GTU:
17855 case LE: code1 = LT; code2 = GT; break;
17856 case GE: code1 = GT; code2 = LT; break;
17857 case LEU: code1 = LTU; code2 = GTU; break;
17858 case GEU: code1 = GTU; code2 = LTU; break;
17860 case EQ: code1 = UNKNOWN; code2 = NE; break;
17861 case NE: code2 = UNKNOWN; break;
17864 gcc_unreachable ();
17869 * if (hi(a) < hi(b)) goto true;
17870 * if (hi(a) > hi(b)) goto false;
17871 * if (lo(a) < lo(b)) goto true;
17875 if (code1 != UNKNOWN)
17876 ix86_expand_branch (code1, hi[0], hi[1], label);
17877 if (code2 != UNKNOWN)
17878 ix86_expand_branch (code2, hi[0], hi[1], label2);
17880 ix86_expand_branch (code3, lo[0], lo[1], label);
17882 if (code2 != UNKNOWN)
17883 emit_label (label2);
17888 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17893 /* Split branch based on floating point condition. */
17895 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17896 rtx target1, rtx target2, rtx tmp, rtx pushed)
17901 if (target2 != pc_rtx)
17904 code = reverse_condition_maybe_unordered (code);
17909 condition = ix86_expand_fp_compare (code, op1, op2,
17912 /* Remove pushed operand from stack. */
17914 ix86_free_from_memory (GET_MODE (pushed));
17916 i = emit_jump_insn (gen_rtx_SET
17918 gen_rtx_IF_THEN_ELSE (VOIDmode,
17919 condition, target1, target2)));
17920 if (split_branch_probability >= 0)
17921 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17925 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17929 gcc_assert (GET_MODE (dest) == QImode);
17931 ret = ix86_expand_compare (code, op0, op1);
17932 PUT_MODE (ret, QImode);
17933 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17936 /* Expand comparison setting or clearing carry flag. Return true when
17937 successful and set pop for the operation. */
17939 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17941 enum machine_mode mode =
17942 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17944 /* Do not handle double-mode compares that go through special path. */
17945 if (mode == (TARGET_64BIT ? TImode : DImode))
17948 if (SCALAR_FLOAT_MODE_P (mode))
17950 rtx compare_op, compare_seq;
17952 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17954 /* Shortcut: following common codes never translate
17955 into carry flag compares. */
17956 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17957 || code == ORDERED || code == UNORDERED)
17960 /* These comparisons require zero flag; swap operands so they won't. */
17961 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17962 && !TARGET_IEEE_FP)
17967 code = swap_condition (code);
17970 /* Try to expand the comparison and verify that we end up with
17971 carry flag based comparison. This fails to be true only when
17972 we decide to expand comparison using arithmetic that is not
17973 too common scenario. */
17975 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17976 compare_seq = get_insns ();
17979 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17980 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17981 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17983 code = GET_CODE (compare_op);
17985 if (code != LTU && code != GEU)
17988 emit_insn (compare_seq);
17993 if (!INTEGRAL_MODE_P (mode))
18002 /* Convert a==0 into (unsigned)a<1. */
18005 if (op1 != const0_rtx)
18008 code = (code == EQ ? LTU : GEU);
18011 /* Convert a>b into b<a or a>=b-1. */
18014 if (CONST_INT_P (op1))
18016 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
18017 /* Bail out on overflow. We still can swap operands but that
18018 would force loading of the constant into register. */
18019 if (op1 == const0_rtx
18020 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
18022 code = (code == GTU ? GEU : LTU);
18029 code = (code == GTU ? LTU : GEU);
18033 /* Convert a>=0 into (unsigned)a<0x80000000. */
18036 if (mode == DImode || op1 != const0_rtx)
18038 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18039 code = (code == LT ? GEU : LTU);
18043 if (mode == DImode || op1 != constm1_rtx)
18045 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
18046 code = (code == LE ? GEU : LTU);
18052 /* Swapping operands may cause constant to appear as first operand. */
18053 if (!nonimmediate_operand (op0, VOIDmode))
18055 if (!can_create_pseudo_p ())
18057 op0 = force_reg (mode, op0);
18059 *pop = ix86_expand_compare (code, op0, op1);
18060 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
18065 ix86_expand_int_movcc (rtx operands[])
18067 enum rtx_code code = GET_CODE (operands[1]), compare_code;
18068 rtx compare_seq, compare_op;
18069 enum machine_mode mode = GET_MODE (operands[0]);
18070 bool sign_bit_compare_p = false;
18071 rtx op0 = XEXP (operands[1], 0);
18072 rtx op1 = XEXP (operands[1], 1);
18075 compare_op = ix86_expand_compare (code, op0, op1);
18076 compare_seq = get_insns ();
18079 compare_code = GET_CODE (compare_op);
18081 if ((op1 == const0_rtx && (code == GE || code == LT))
18082 || (op1 == constm1_rtx && (code == GT || code == LE)))
18083 sign_bit_compare_p = true;
18085 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18086 HImode insns, we'd be swallowed in word prefix ops. */
18088 if ((mode != HImode || TARGET_FAST_PREFIX)
18089 && (mode != (TARGET_64BIT ? TImode : DImode))
18090 && CONST_INT_P (operands[2])
18091 && CONST_INT_P (operands[3]))
18093 rtx out = operands[0];
18094 HOST_WIDE_INT ct = INTVAL (operands[2]);
18095 HOST_WIDE_INT cf = INTVAL (operands[3]);
18096 HOST_WIDE_INT diff;
18099 /* Sign bit compares are better done using shifts than we do by using
18101 if (sign_bit_compare_p
18102 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18104 /* Detect overlap between destination and compare sources. */
18107 if (!sign_bit_compare_p)
18110 bool fpcmp = false;
18112 compare_code = GET_CODE (compare_op);
18114 flags = XEXP (compare_op, 0);
18116 if (GET_MODE (flags) == CCFPmode
18117 || GET_MODE (flags) == CCFPUmode)
18121 = ix86_fp_compare_code_to_integer (compare_code);
18124 /* To simplify rest of code, restrict to the GEU case. */
18125 if (compare_code == LTU)
18127 HOST_WIDE_INT tmp = ct;
18130 compare_code = reverse_condition (compare_code);
18131 code = reverse_condition (code);
18136 PUT_CODE (compare_op,
18137 reverse_condition_maybe_unordered
18138 (GET_CODE (compare_op)));
18140 PUT_CODE (compare_op,
18141 reverse_condition (GET_CODE (compare_op)));
18145 if (reg_overlap_mentioned_p (out, op0)
18146 || reg_overlap_mentioned_p (out, op1))
18147 tmp = gen_reg_rtx (mode);
18149 if (mode == DImode)
18150 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18152 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18153 flags, compare_op));
18157 if (code == GT || code == GE)
18158 code = reverse_condition (code);
18161 HOST_WIDE_INT tmp = ct;
18166 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18179 tmp = expand_simple_binop (mode, PLUS,
18181 copy_rtx (tmp), 1, OPTAB_DIRECT);
18192 tmp = expand_simple_binop (mode, IOR,
18194 copy_rtx (tmp), 1, OPTAB_DIRECT);
18196 else if (diff == -1 && ct)
18206 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18208 tmp = expand_simple_binop (mode, PLUS,
18209 copy_rtx (tmp), GEN_INT (cf),
18210 copy_rtx (tmp), 1, OPTAB_DIRECT);
18218 * andl cf - ct, dest
18228 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18231 tmp = expand_simple_binop (mode, AND,
18233 gen_int_mode (cf - ct, mode),
18234 copy_rtx (tmp), 1, OPTAB_DIRECT);
18236 tmp = expand_simple_binop (mode, PLUS,
18237 copy_rtx (tmp), GEN_INT (ct),
18238 copy_rtx (tmp), 1, OPTAB_DIRECT);
18241 if (!rtx_equal_p (tmp, out))
18242 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18249 enum machine_mode cmp_mode = GET_MODE (op0);
18252 tmp = ct, ct = cf, cf = tmp;
18255 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18257 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18259 /* We may be reversing unordered compare to normal compare, that
18260 is not valid in general (we may convert non-trapping condition
18261 to trapping one), however on i386 we currently emit all
18262 comparisons unordered. */
18263 compare_code = reverse_condition_maybe_unordered (compare_code);
18264 code = reverse_condition_maybe_unordered (code);
18268 compare_code = reverse_condition (compare_code);
18269 code = reverse_condition (code);
18273 compare_code = UNKNOWN;
18274 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18275 && CONST_INT_P (op1))
18277 if (op1 == const0_rtx
18278 && (code == LT || code == GE))
18279 compare_code = code;
18280 else if (op1 == constm1_rtx)
18284 else if (code == GT)
18289 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18290 if (compare_code != UNKNOWN
18291 && GET_MODE (op0) == GET_MODE (out)
18292 && (cf == -1 || ct == -1))
18294 /* If lea code below could be used, only optimize
18295 if it results in a 2 insn sequence. */
18297 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18298 || diff == 3 || diff == 5 || diff == 9)
18299 || (compare_code == LT && ct == -1)
18300 || (compare_code == GE && cf == -1))
18303 * notl op1 (if necessary)
18311 code = reverse_condition (code);
18314 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18316 out = expand_simple_binop (mode, IOR,
18318 out, 1, OPTAB_DIRECT);
18319 if (out != operands[0])
18320 emit_move_insn (operands[0], out);
18327 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18328 || diff == 3 || diff == 5 || diff == 9)
18329 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18331 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18337 * lea cf(dest*(ct-cf)),dest
18341 * This also catches the degenerate setcc-only case.
18347 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18350 /* On x86_64 the lea instruction operates on Pmode, so we need
18351 to get arithmetics done in proper mode to match. */
18353 tmp = copy_rtx (out);
18357 out1 = copy_rtx (out);
18358 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18362 tmp = gen_rtx_PLUS (mode, tmp, out1);
18368 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18371 if (!rtx_equal_p (tmp, out))
18374 out = force_operand (tmp, copy_rtx (out));
18376 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18378 if (!rtx_equal_p (out, operands[0]))
18379 emit_move_insn (operands[0], copy_rtx (out));
18385 * General case: Jumpful:
18386 * xorl dest,dest cmpl op1, op2
18387 * cmpl op1, op2 movl ct, dest
18388 * setcc dest jcc 1f
18389 * decl dest movl cf, dest
18390 * andl (cf-ct),dest 1:
18393 * Size 20. Size 14.
18395 * This is reasonably steep, but branch mispredict costs are
18396 * high on modern cpus, so consider failing only if optimizing
18400 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18401 && BRANCH_COST (optimize_insn_for_speed_p (),
18406 enum machine_mode cmp_mode = GET_MODE (op0);
18411 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18413 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18415 /* We may be reversing unordered compare to normal compare,
18416 that is not valid in general (we may convert non-trapping
18417 condition to trapping one), however on i386 we currently
18418 emit all comparisons unordered. */
18419 code = reverse_condition_maybe_unordered (code);
18423 code = reverse_condition (code);
18424 if (compare_code != UNKNOWN)
18425 compare_code = reverse_condition (compare_code);
18429 if (compare_code != UNKNOWN)
18431 /* notl op1 (if needed)
18436 For x < 0 (resp. x <= -1) there will be no notl,
18437 so if possible swap the constants to get rid of the
18439 True/false will be -1/0 while code below (store flag
18440 followed by decrement) is 0/-1, so the constants need
18441 to be exchanged once more. */
18443 if (compare_code == GE || !cf)
18445 code = reverse_condition (code);
18450 HOST_WIDE_INT tmp = cf;
18455 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18459 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18461 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18463 copy_rtx (out), 1, OPTAB_DIRECT);
18466 out = expand_simple_binop (mode, AND, copy_rtx (out),
18467 gen_int_mode (cf - ct, mode),
18468 copy_rtx (out), 1, OPTAB_DIRECT);
18470 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18471 copy_rtx (out), 1, OPTAB_DIRECT);
18472 if (!rtx_equal_p (out, operands[0]))
18473 emit_move_insn (operands[0], copy_rtx (out));
18479 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18481 /* Try a few things more with specific constants and a variable. */
18484 rtx var, orig_out, out, tmp;
18486 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18489 /* If one of the two operands is an interesting constant, load a
18490 constant with the above and mask it in with a logical operation. */
18492 if (CONST_INT_P (operands[2]))
18495 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18496 operands[3] = constm1_rtx, op = and_optab;
18497 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18498 operands[3] = const0_rtx, op = ior_optab;
18502 else if (CONST_INT_P (operands[3]))
18505 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18506 operands[2] = constm1_rtx, op = and_optab;
18507 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18508 operands[2] = const0_rtx, op = ior_optab;
18515 orig_out = operands[0];
18516 tmp = gen_reg_rtx (mode);
18519 /* Recurse to get the constant loaded. */
18520 if (ix86_expand_int_movcc (operands) == 0)
18523 /* Mask in the interesting variable. */
18524 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18526 if (!rtx_equal_p (out, orig_out))
18527 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18533 * For comparison with above,
18543 if (! nonimmediate_operand (operands[2], mode))
18544 operands[2] = force_reg (mode, operands[2]);
18545 if (! nonimmediate_operand (operands[3], mode))
18546 operands[3] = force_reg (mode, operands[3]);
18548 if (! register_operand (operands[2], VOIDmode)
18550 || ! register_operand (operands[3], VOIDmode)))
18551 operands[2] = force_reg (mode, operands[2]);
18554 && ! register_operand (operands[3], VOIDmode))
18555 operands[3] = force_reg (mode, operands[3]);
18557 emit_insn (compare_seq);
18558 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18559 gen_rtx_IF_THEN_ELSE (mode,
18560 compare_op, operands[2],
18565 /* Swap, force into registers, or otherwise massage the two operands
18566 to an sse comparison with a mask result. Thus we differ a bit from
18567 ix86_prepare_fp_compare_args which expects to produce a flags result.
18569 The DEST operand exists to help determine whether to commute commutative
18570 operators. The POP0/POP1 operands are updated in place. The new
18571 comparison code is returned, or UNKNOWN if not implementable. */
18573 static enum rtx_code
18574 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18575 rtx *pop0, rtx *pop1)
18583 /* We have no LTGT as an operator. We could implement it with
18584 NE & ORDERED, but this requires an extra temporary. It's
18585 not clear that it's worth it. */
18592 /* These are supported directly. */
18599 /* For commutative operators, try to canonicalize the destination
18600 operand to be first in the comparison - this helps reload to
18601 avoid extra moves. */
18602 if (!dest || !rtx_equal_p (dest, *pop1))
18610 /* These are not supported directly. Swap the comparison operands
18611 to transform into something that is supported. */
18615 code = swap_condition (code);
18619 gcc_unreachable ();
18625 /* Detect conditional moves that exactly match min/max operational
18626 semantics. Note that this is IEEE safe, as long as we don't
18627 interchange the operands.
18629 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18630 and TRUE if the operation is successful and instructions are emitted. */
18633 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18634 rtx cmp_op1, rtx if_true, rtx if_false)
18636 enum machine_mode mode;
18642 else if (code == UNGE)
18645 if_true = if_false;
18651 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18653 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18658 mode = GET_MODE (dest);
18660 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18661 but MODE may be a vector mode and thus not appropriate. */
18662 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18664 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18667 if_true = force_reg (mode, if_true);
18668 v = gen_rtvec (2, if_true, if_false);
18669 tmp = gen_rtx_UNSPEC (mode, v, u);
18673 code = is_min ? SMIN : SMAX;
18674 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18677 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18681 /* Expand an sse vector comparison. Return the register with the result. */
18684 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18685 rtx op_true, rtx op_false)
18687 enum machine_mode mode = GET_MODE (dest);
18690 cmp_op0 = force_reg (mode, cmp_op0);
18691 if (!nonimmediate_operand (cmp_op1, mode))
18692 cmp_op1 = force_reg (mode, cmp_op1);
18695 || reg_overlap_mentioned_p (dest, op_true)
18696 || reg_overlap_mentioned_p (dest, op_false))
18697 dest = gen_reg_rtx (mode);
18699 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18700 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18705 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18706 operations. This is used for both scalar and vector conditional moves. */
18709 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18711 enum machine_mode mode = GET_MODE (dest);
18714 if (op_false == CONST0_RTX (mode))
18716 op_true = force_reg (mode, op_true);
18717 x = gen_rtx_AND (mode, cmp, op_true);
18718 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18720 else if (op_true == CONST0_RTX (mode))
18722 op_false = force_reg (mode, op_false);
18723 x = gen_rtx_NOT (mode, cmp);
18724 x = gen_rtx_AND (mode, x, op_false);
18725 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18727 else if (TARGET_XOP)
18729 rtx pcmov = gen_rtx_SET (mode, dest,
18730 gen_rtx_IF_THEN_ELSE (mode, cmp,
18737 op_true = force_reg (mode, op_true);
18738 op_false = force_reg (mode, op_false);
18740 t2 = gen_reg_rtx (mode);
18742 t3 = gen_reg_rtx (mode);
18746 x = gen_rtx_AND (mode, op_true, cmp);
18747 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18749 x = gen_rtx_NOT (mode, cmp);
18750 x = gen_rtx_AND (mode, x, op_false);
18751 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18753 x = gen_rtx_IOR (mode, t3, t2);
18754 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18758 /* Expand a floating-point conditional move. Return true if successful. */
18761 ix86_expand_fp_movcc (rtx operands[])
18763 enum machine_mode mode = GET_MODE (operands[0]);
18764 enum rtx_code code = GET_CODE (operands[1]);
18765 rtx tmp, compare_op;
18766 rtx op0 = XEXP (operands[1], 0);
18767 rtx op1 = XEXP (operands[1], 1);
18769 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18771 enum machine_mode cmode;
18773 /* Since we've no cmove for sse registers, don't force bad register
18774 allocation just to gain access to it. Deny movcc when the
18775 comparison mode doesn't match the move mode. */
18776 cmode = GET_MODE (op0);
18777 if (cmode == VOIDmode)
18778 cmode = GET_MODE (op1);
18782 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18783 if (code == UNKNOWN)
18786 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18787 operands[2], operands[3]))
18790 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18791 operands[2], operands[3]);
18792 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18796 /* The floating point conditional move instructions don't directly
18797 support conditions resulting from a signed integer comparison. */
18799 compare_op = ix86_expand_compare (code, op0, op1);
18800 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18802 tmp = gen_reg_rtx (QImode);
18803 ix86_expand_setcc (tmp, code, op0, op1);
18805 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18808 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18809 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18810 operands[2], operands[3])));
18815 /* Expand a floating-point vector conditional move; a vcond operation
18816 rather than a movcc operation. */
18819 ix86_expand_fp_vcond (rtx operands[])
18821 enum rtx_code code = GET_CODE (operands[3]);
18824 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18825 &operands[4], &operands[5]);
18826 if (code == UNKNOWN)
18829 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18830 operands[5], operands[1], operands[2]))
18833 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18834 operands[1], operands[2]);
18835 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18839 /* Expand a signed/unsigned integral vector conditional move. */
18842 ix86_expand_int_vcond (rtx operands[])
18844 enum machine_mode mode = GET_MODE (operands[0]);
18845 enum rtx_code code = GET_CODE (operands[3]);
18846 bool negate = false;
18849 cop0 = operands[4];
18850 cop1 = operands[5];
18852 /* XOP supports all of the comparisons on all vector int types. */
18855 /* Canonicalize the comparison to EQ, GT, GTU. */
18866 code = reverse_condition (code);
18872 code = reverse_condition (code);
18878 code = swap_condition (code);
18879 x = cop0, cop0 = cop1, cop1 = x;
18883 gcc_unreachable ();
18886 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18887 if (mode == V2DImode)
18892 /* SSE4.1 supports EQ. */
18893 if (!TARGET_SSE4_1)
18899 /* SSE4.2 supports GT/GTU. */
18900 if (!TARGET_SSE4_2)
18905 gcc_unreachable ();
18909 /* Unsigned parallel compare is not supported by the hardware.
18910 Play some tricks to turn this into a signed comparison
18914 cop0 = force_reg (mode, cop0);
18922 rtx (*gen_sub3) (rtx, rtx, rtx);
18924 /* Subtract (-(INT MAX) - 1) from both operands to make
18926 mask = ix86_build_signbit_mask (mode, true, false);
18927 gen_sub3 = (mode == V4SImode
18928 ? gen_subv4si3 : gen_subv2di3);
18929 t1 = gen_reg_rtx (mode);
18930 emit_insn (gen_sub3 (t1, cop0, mask));
18932 t2 = gen_reg_rtx (mode);
18933 emit_insn (gen_sub3 (t2, cop1, mask));
18943 /* Perform a parallel unsigned saturating subtraction. */
18944 x = gen_reg_rtx (mode);
18945 emit_insn (gen_rtx_SET (VOIDmode, x,
18946 gen_rtx_US_MINUS (mode, cop0, cop1)));
18949 cop1 = CONST0_RTX (mode);
18955 gcc_unreachable ();
18960 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18961 operands[1+negate], operands[2-negate]);
18963 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18964 operands[2-negate]);
18968 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18969 true if we should do zero extension, else sign extension. HIGH_P is
18970 true if we want the N/2 high elements, else the low elements. */
18973 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18975 enum machine_mode imode = GET_MODE (operands[1]);
18976 rtx (*unpack)(rtx, rtx, rtx);
18983 unpack = gen_vec_interleave_highv16qi;
18985 unpack = gen_vec_interleave_lowv16qi;
18989 unpack = gen_vec_interleave_highv8hi;
18991 unpack = gen_vec_interleave_lowv8hi;
18995 unpack = gen_vec_interleave_highv4si;
18997 unpack = gen_vec_interleave_lowv4si;
19000 gcc_unreachable ();
19003 dest = gen_lowpart (imode, operands[0]);
19006 se = force_reg (imode, CONST0_RTX (imode));
19008 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
19009 operands[1], pc_rtx, pc_rtx);
19011 emit_insn (unpack (dest, operands[1], se));
19014 /* This function performs the same task as ix86_expand_sse_unpack,
19015 but with SSE4.1 instructions. */
19018 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
19020 enum machine_mode imode = GET_MODE (operands[1]);
19021 rtx (*unpack)(rtx, rtx);
19028 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
19030 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
19034 unpack = gen_sse4_1_zero_extendv4hiv4si2;
19036 unpack = gen_sse4_1_sign_extendv4hiv4si2;
19040 unpack = gen_sse4_1_zero_extendv2siv2di2;
19042 unpack = gen_sse4_1_sign_extendv2siv2di2;
19045 gcc_unreachable ();
19048 dest = operands[0];
19051 /* Shift higher 8 bytes to lower 8 bytes. */
19052 src = gen_reg_rtx (imode);
19053 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
19054 gen_lowpart (V1TImode, operands[1]),
19060 emit_insn (unpack (dest, src));
19063 /* Expand conditional increment or decrement using adb/sbb instructions.
19064 The default case using setcc followed by the conditional move can be
19065 done by generic code. */
19067 ix86_expand_int_addcc (rtx operands[])
19069 enum rtx_code code = GET_CODE (operands[1]);
19071 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19073 rtx val = const0_rtx;
19074 bool fpcmp = false;
19075 enum machine_mode mode;
19076 rtx op0 = XEXP (operands[1], 0);
19077 rtx op1 = XEXP (operands[1], 1);
19079 if (operands[3] != const1_rtx
19080 && operands[3] != constm1_rtx)
19082 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19084 code = GET_CODE (compare_op);
19086 flags = XEXP (compare_op, 0);
19088 if (GET_MODE (flags) == CCFPmode
19089 || GET_MODE (flags) == CCFPUmode)
19092 code = ix86_fp_compare_code_to_integer (code);
19099 PUT_CODE (compare_op,
19100 reverse_condition_maybe_unordered
19101 (GET_CODE (compare_op)));
19103 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
19106 mode = GET_MODE (operands[0]);
19108 /* Construct either adc or sbb insn. */
19109 if ((code == LTU) == (operands[3] == constm1_rtx))
19114 insn = gen_subqi3_carry;
19117 insn = gen_subhi3_carry;
19120 insn = gen_subsi3_carry;
19123 insn = gen_subdi3_carry;
19126 gcc_unreachable ();
19134 insn = gen_addqi3_carry;
19137 insn = gen_addhi3_carry;
19140 insn = gen_addsi3_carry;
19143 insn = gen_adddi3_carry;
19146 gcc_unreachable ();
19149 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19155 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19156 but works for floating pointer parameters and nonoffsetable memories.
19157 For pushes, it returns just stack offsets; the values will be saved
19158 in the right order. Maximally three parts are generated. */
19161 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19166 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19168 size = (GET_MODE_SIZE (mode) + 4) / 8;
19170 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19171 gcc_assert (size >= 2 && size <= 4);
19173 /* Optimize constant pool reference to immediates. This is used by fp
19174 moves, that force all constants to memory to allow combining. */
19175 if (MEM_P (operand) && MEM_READONLY_P (operand))
19177 rtx tmp = maybe_get_pool_constant (operand);
19182 if (MEM_P (operand) && !offsettable_memref_p (operand))
19184 /* The only non-offsetable memories we handle are pushes. */
19185 int ok = push_operand (operand, VOIDmode);
19189 operand = copy_rtx (operand);
19190 PUT_MODE (operand, Pmode);
19191 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19195 if (GET_CODE (operand) == CONST_VECTOR)
19197 enum machine_mode imode = int_mode_for_mode (mode);
19198 /* Caution: if we looked through a constant pool memory above,
19199 the operand may actually have a different mode now. That's
19200 ok, since we want to pun this all the way back to an integer. */
19201 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19202 gcc_assert (operand != NULL);
19208 if (mode == DImode)
19209 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19214 if (REG_P (operand))
19216 gcc_assert (reload_completed);
19217 for (i = 0; i < size; i++)
19218 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19220 else if (offsettable_memref_p (operand))
19222 operand = adjust_address (operand, SImode, 0);
19223 parts[0] = operand;
19224 for (i = 1; i < size; i++)
19225 parts[i] = adjust_address (operand, SImode, 4 * i);
19227 else if (GET_CODE (operand) == CONST_DOUBLE)
19232 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19236 real_to_target (l, &r, mode);
19237 parts[3] = gen_int_mode (l[3], SImode);
19238 parts[2] = gen_int_mode (l[2], SImode);
19241 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19242 parts[2] = gen_int_mode (l[2], SImode);
19245 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19248 gcc_unreachable ();
19250 parts[1] = gen_int_mode (l[1], SImode);
19251 parts[0] = gen_int_mode (l[0], SImode);
19254 gcc_unreachable ();
19259 if (mode == TImode)
19260 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19261 if (mode == XFmode || mode == TFmode)
19263 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19264 if (REG_P (operand))
19266 gcc_assert (reload_completed);
19267 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19268 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19270 else if (offsettable_memref_p (operand))
19272 operand = adjust_address (operand, DImode, 0);
19273 parts[0] = operand;
19274 parts[1] = adjust_address (operand, upper_mode, 8);
19276 else if (GET_CODE (operand) == CONST_DOUBLE)
19281 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19282 real_to_target (l, &r, mode);
19284 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19285 if (HOST_BITS_PER_WIDE_INT >= 64)
19288 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19289 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19292 parts[0] = immed_double_const (l[0], l[1], DImode);
19294 if (upper_mode == SImode)
19295 parts[1] = gen_int_mode (l[2], SImode);
19296 else if (HOST_BITS_PER_WIDE_INT >= 64)
19299 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19300 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19303 parts[1] = immed_double_const (l[2], l[3], DImode);
19306 gcc_unreachable ();
19313 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19314 Return false when normal moves are needed; true when all required
19315 insns have been emitted. Operands 2-4 contain the input values
19316 int the correct order; operands 5-7 contain the output values. */
19319 ix86_split_long_move (rtx operands[])
19324 int collisions = 0;
19325 enum machine_mode mode = GET_MODE (operands[0]);
19326 bool collisionparts[4];
19328 /* The DFmode expanders may ask us to move double.
19329 For 64bit target this is single move. By hiding the fact
19330 here we simplify i386.md splitters. */
19331 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19333 /* Optimize constant pool reference to immediates. This is used by
19334 fp moves, that force all constants to memory to allow combining. */
19336 if (MEM_P (operands[1])
19337 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19338 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19339 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19340 if (push_operand (operands[0], VOIDmode))
19342 operands[0] = copy_rtx (operands[0]);
19343 PUT_MODE (operands[0], Pmode);
19346 operands[0] = gen_lowpart (DImode, operands[0]);
19347 operands[1] = gen_lowpart (DImode, operands[1]);
19348 emit_move_insn (operands[0], operands[1]);
19352 /* The only non-offsettable memory we handle is push. */
19353 if (push_operand (operands[0], VOIDmode))
19356 gcc_assert (!MEM_P (operands[0])
19357 || offsettable_memref_p (operands[0]));
19359 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19360 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19362 /* When emitting push, take care for source operands on the stack. */
19363 if (push && MEM_P (operands[1])
19364 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19366 rtx src_base = XEXP (part[1][nparts - 1], 0);
19368 /* Compensate for the stack decrement by 4. */
19369 if (!TARGET_64BIT && nparts == 3
19370 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19371 src_base = plus_constant (src_base, 4);
19373 /* src_base refers to the stack pointer and is
19374 automatically decreased by emitted push. */
19375 for (i = 0; i < nparts; i++)
19376 part[1][i] = change_address (part[1][i],
19377 GET_MODE (part[1][i]), src_base);
19380 /* We need to do copy in the right order in case an address register
19381 of the source overlaps the destination. */
19382 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19386 for (i = 0; i < nparts; i++)
19389 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19390 if (collisionparts[i])
19394 /* Collision in the middle part can be handled by reordering. */
19395 if (collisions == 1 && nparts == 3 && collisionparts [1])
19397 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19398 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19400 else if (collisions == 1
19402 && (collisionparts [1] || collisionparts [2]))
19404 if (collisionparts [1])
19406 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19407 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19411 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19412 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19416 /* If there are more collisions, we can't handle it by reordering.
19417 Do an lea to the last part and use only one colliding move. */
19418 else if (collisions > 1)
19424 base = part[0][nparts - 1];
19426 /* Handle the case when the last part isn't valid for lea.
19427 Happens in 64-bit mode storing the 12-byte XFmode. */
19428 if (GET_MODE (base) != Pmode)
19429 base = gen_rtx_REG (Pmode, REGNO (base));
19431 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19432 part[1][0] = replace_equiv_address (part[1][0], base);
19433 for (i = 1; i < nparts; i++)
19435 tmp = plus_constant (base, UNITS_PER_WORD * i);
19436 part[1][i] = replace_equiv_address (part[1][i], tmp);
19447 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19448 emit_insn (gen_addsi3 (stack_pointer_rtx,
19449 stack_pointer_rtx, GEN_INT (-4)));
19450 emit_move_insn (part[0][2], part[1][2]);
19452 else if (nparts == 4)
19454 emit_move_insn (part[0][3], part[1][3]);
19455 emit_move_insn (part[0][2], part[1][2]);
19460 /* In 64bit mode we don't have 32bit push available. In case this is
19461 register, it is OK - we will just use larger counterpart. We also
19462 retype memory - these comes from attempt to avoid REX prefix on
19463 moving of second half of TFmode value. */
19464 if (GET_MODE (part[1][1]) == SImode)
19466 switch (GET_CODE (part[1][1]))
19469 part[1][1] = adjust_address (part[1][1], DImode, 0);
19473 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19477 gcc_unreachable ();
19480 if (GET_MODE (part[1][0]) == SImode)
19481 part[1][0] = part[1][1];
19484 emit_move_insn (part[0][1], part[1][1]);
19485 emit_move_insn (part[0][0], part[1][0]);
19489 /* Choose correct order to not overwrite the source before it is copied. */
19490 if ((REG_P (part[0][0])
19491 && REG_P (part[1][1])
19492 && (REGNO (part[0][0]) == REGNO (part[1][1])
19494 && REGNO (part[0][0]) == REGNO (part[1][2]))
19496 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19498 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19500 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19502 operands[2 + i] = part[0][j];
19503 operands[6 + i] = part[1][j];
19508 for (i = 0; i < nparts; i++)
19510 operands[2 + i] = part[0][i];
19511 operands[6 + i] = part[1][i];
19515 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19516 if (optimize_insn_for_size_p ())
19518 for (j = 0; j < nparts - 1; j++)
19519 if (CONST_INT_P (operands[6 + j])
19520 && operands[6 + j] != const0_rtx
19521 && REG_P (operands[2 + j]))
19522 for (i = j; i < nparts - 1; i++)
19523 if (CONST_INT_P (operands[7 + i])
19524 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19525 operands[7 + i] = operands[2 + j];
19528 for (i = 0; i < nparts; i++)
19529 emit_move_insn (operands[2 + i], operands[6 + i]);
19534 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19535 left shift by a constant, either using a single shift or
19536 a sequence of add instructions. */
19539 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19541 rtx (*insn)(rtx, rtx, rtx);
19544 || (count * ix86_cost->add <= ix86_cost->shift_const
19545 && !optimize_insn_for_size_p ()))
19547 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19548 while (count-- > 0)
19549 emit_insn (insn (operand, operand, operand));
19553 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19554 emit_insn (insn (operand, operand, GEN_INT (count)));
19559 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19561 rtx (*gen_ashl3)(rtx, rtx, rtx);
19562 rtx (*gen_shld)(rtx, rtx, rtx);
19563 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19565 rtx low[2], high[2];
19568 if (CONST_INT_P (operands[2]))
19570 split_double_mode (mode, operands, 2, low, high);
19571 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19573 if (count >= half_width)
19575 emit_move_insn (high[0], low[1]);
19576 emit_move_insn (low[0], const0_rtx);
19578 if (count > half_width)
19579 ix86_expand_ashl_const (high[0], count - half_width, mode);
19583 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19585 if (!rtx_equal_p (operands[0], operands[1]))
19586 emit_move_insn (operands[0], operands[1]);
19588 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19589 ix86_expand_ashl_const (low[0], count, mode);
19594 split_double_mode (mode, operands, 1, low, high);
19596 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19598 if (operands[1] == const1_rtx)
19600 /* Assuming we've chosen a QImode capable registers, then 1 << N
19601 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19602 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19604 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19606 ix86_expand_clear (low[0]);
19607 ix86_expand_clear (high[0]);
19608 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19610 d = gen_lowpart (QImode, low[0]);
19611 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19612 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19613 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19615 d = gen_lowpart (QImode, high[0]);
19616 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19617 s = gen_rtx_NE (QImode, flags, const0_rtx);
19618 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19621 /* Otherwise, we can get the same results by manually performing
19622 a bit extract operation on bit 5/6, and then performing the two
19623 shifts. The two methods of getting 0/1 into low/high are exactly
19624 the same size. Avoiding the shift in the bit extract case helps
19625 pentium4 a bit; no one else seems to care much either way. */
19628 enum machine_mode half_mode;
19629 rtx (*gen_lshr3)(rtx, rtx, rtx);
19630 rtx (*gen_and3)(rtx, rtx, rtx);
19631 rtx (*gen_xor3)(rtx, rtx, rtx);
19632 HOST_WIDE_INT bits;
19635 if (mode == DImode)
19637 half_mode = SImode;
19638 gen_lshr3 = gen_lshrsi3;
19639 gen_and3 = gen_andsi3;
19640 gen_xor3 = gen_xorsi3;
19645 half_mode = DImode;
19646 gen_lshr3 = gen_lshrdi3;
19647 gen_and3 = gen_anddi3;
19648 gen_xor3 = gen_xordi3;
19652 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19653 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19655 x = gen_lowpart (half_mode, operands[2]);
19656 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19658 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19659 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19660 emit_move_insn (low[0], high[0]);
19661 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19664 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19665 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19669 if (operands[1] == constm1_rtx)
19671 /* For -1 << N, we can avoid the shld instruction, because we
19672 know that we're shifting 0...31/63 ones into a -1. */
19673 emit_move_insn (low[0], constm1_rtx);
19674 if (optimize_insn_for_size_p ())
19675 emit_move_insn (high[0], low[0]);
19677 emit_move_insn (high[0], constm1_rtx);
19681 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19683 if (!rtx_equal_p (operands[0], operands[1]))
19684 emit_move_insn (operands[0], operands[1]);
19686 split_double_mode (mode, operands, 1, low, high);
19687 emit_insn (gen_shld (high[0], low[0], operands[2]));
19690 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19692 if (TARGET_CMOVE && scratch)
19694 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19695 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19697 ix86_expand_clear (scratch);
19698 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19702 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19703 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19705 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19710 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19712 rtx (*gen_ashr3)(rtx, rtx, rtx)
19713 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19714 rtx (*gen_shrd)(rtx, rtx, rtx);
19715 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19717 rtx low[2], high[2];
19720 if (CONST_INT_P (operands[2]))
19722 split_double_mode (mode, operands, 2, low, high);
19723 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19725 if (count == GET_MODE_BITSIZE (mode) - 1)
19727 emit_move_insn (high[0], high[1]);
19728 emit_insn (gen_ashr3 (high[0], high[0],
19729 GEN_INT (half_width - 1)));
19730 emit_move_insn (low[0], high[0]);
19733 else if (count >= half_width)
19735 emit_move_insn (low[0], high[1]);
19736 emit_move_insn (high[0], low[0]);
19737 emit_insn (gen_ashr3 (high[0], high[0],
19738 GEN_INT (half_width - 1)));
19740 if (count > half_width)
19741 emit_insn (gen_ashr3 (low[0], low[0],
19742 GEN_INT (count - half_width)));
19746 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19748 if (!rtx_equal_p (operands[0], operands[1]))
19749 emit_move_insn (operands[0], operands[1]);
19751 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19752 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19757 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19759 if (!rtx_equal_p (operands[0], operands[1]))
19760 emit_move_insn (operands[0], operands[1]);
19762 split_double_mode (mode, operands, 1, low, high);
19764 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19765 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19767 if (TARGET_CMOVE && scratch)
19769 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19770 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19772 emit_move_insn (scratch, high[0]);
19773 emit_insn (gen_ashr3 (scratch, scratch,
19774 GEN_INT (half_width - 1)));
19775 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19780 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19781 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19783 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19789 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19791 rtx (*gen_lshr3)(rtx, rtx, rtx)
19792 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19793 rtx (*gen_shrd)(rtx, rtx, rtx);
19794 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19796 rtx low[2], high[2];
19799 if (CONST_INT_P (operands[2]))
19801 split_double_mode (mode, operands, 2, low, high);
19802 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19804 if (count >= half_width)
19806 emit_move_insn (low[0], high[1]);
19807 ix86_expand_clear (high[0]);
19809 if (count > half_width)
19810 emit_insn (gen_lshr3 (low[0], low[0],
19811 GEN_INT (count - half_width)));
19815 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19817 if (!rtx_equal_p (operands[0], operands[1]))
19818 emit_move_insn (operands[0], operands[1]);
19820 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19821 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19826 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19828 if (!rtx_equal_p (operands[0], operands[1]))
19829 emit_move_insn (operands[0], operands[1]);
19831 split_double_mode (mode, operands, 1, low, high);
19833 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19834 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19836 if (TARGET_CMOVE && scratch)
19838 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19839 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19841 ix86_expand_clear (scratch);
19842 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19847 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19848 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19850 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19855 /* Predict just emitted jump instruction to be taken with probability PROB. */
19857 predict_jump (int prob)
19859 rtx insn = get_last_insn ();
19860 gcc_assert (JUMP_P (insn));
19861 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19864 /* Helper function for the string operations below. Dest VARIABLE whether
19865 it is aligned to VALUE bytes. If true, jump to the label. */
19867 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19869 rtx label = gen_label_rtx ();
19870 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19871 if (GET_MODE (variable) == DImode)
19872 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19874 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19875 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19878 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19880 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19884 /* Adjust COUNTER by the VALUE. */
19886 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19888 rtx (*gen_add)(rtx, rtx, rtx)
19889 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19891 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19894 /* Zero extend possibly SImode EXP to Pmode register. */
19896 ix86_zero_extend_to_Pmode (rtx exp)
19899 if (GET_MODE (exp) == VOIDmode)
19900 return force_reg (Pmode, exp);
19901 if (GET_MODE (exp) == Pmode)
19902 return copy_to_mode_reg (Pmode, exp);
19903 r = gen_reg_rtx (Pmode);
19904 emit_insn (gen_zero_extendsidi2 (r, exp));
19908 /* Divide COUNTREG by SCALE. */
19910 scale_counter (rtx countreg, int scale)
19916 if (CONST_INT_P (countreg))
19917 return GEN_INT (INTVAL (countreg) / scale);
19918 gcc_assert (REG_P (countreg));
19920 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19921 GEN_INT (exact_log2 (scale)),
19922 NULL, 1, OPTAB_DIRECT);
19926 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19927 DImode for constant loop counts. */
19929 static enum machine_mode
19930 counter_mode (rtx count_exp)
19932 if (GET_MODE (count_exp) != VOIDmode)
19933 return GET_MODE (count_exp);
19934 if (!CONST_INT_P (count_exp))
19936 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19941 /* When SRCPTR is non-NULL, output simple loop to move memory
19942 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19943 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19944 equivalent loop to set memory by VALUE (supposed to be in MODE).
19946 The size is rounded down to whole number of chunk size moved at once.
19947 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19951 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19952 rtx destptr, rtx srcptr, rtx value,
19953 rtx count, enum machine_mode mode, int unroll,
19956 rtx out_label, top_label, iter, tmp;
19957 enum machine_mode iter_mode = counter_mode (count);
19958 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19959 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19965 top_label = gen_label_rtx ();
19966 out_label = gen_label_rtx ();
19967 iter = gen_reg_rtx (iter_mode);
19969 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19970 NULL, 1, OPTAB_DIRECT);
19971 /* Those two should combine. */
19972 if (piece_size == const1_rtx)
19974 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19976 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19978 emit_move_insn (iter, const0_rtx);
19980 emit_label (top_label);
19982 tmp = convert_modes (Pmode, iter_mode, iter, true);
19983 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19984 destmem = change_address (destmem, mode, x_addr);
19988 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19989 srcmem = change_address (srcmem, mode, y_addr);
19991 /* When unrolling for chips that reorder memory reads and writes,
19992 we can save registers by using single temporary.
19993 Also using 4 temporaries is overkill in 32bit mode. */
19994 if (!TARGET_64BIT && 0)
19996 for (i = 0; i < unroll; i++)
20001 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20003 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20005 emit_move_insn (destmem, srcmem);
20011 gcc_assert (unroll <= 4);
20012 for (i = 0; i < unroll; i++)
20014 tmpreg[i] = gen_reg_rtx (mode);
20018 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
20020 emit_move_insn (tmpreg[i], srcmem);
20022 for (i = 0; i < unroll; i++)
20027 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20029 emit_move_insn (destmem, tmpreg[i]);
20034 for (i = 0; i < unroll; i++)
20038 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
20039 emit_move_insn (destmem, value);
20042 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
20043 true, OPTAB_LIB_WIDEN);
20045 emit_move_insn (iter, tmp);
20047 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
20049 if (expected_size != -1)
20051 expected_size /= GET_MODE_SIZE (mode) * unroll;
20052 if (expected_size == 0)
20054 else if (expected_size > REG_BR_PROB_BASE)
20055 predict_jump (REG_BR_PROB_BASE - 1);
20057 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
20060 predict_jump (REG_BR_PROB_BASE * 80 / 100);
20061 iter = ix86_zero_extend_to_Pmode (iter);
20062 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
20063 true, OPTAB_LIB_WIDEN);
20064 if (tmp != destptr)
20065 emit_move_insn (destptr, tmp);
20068 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
20069 true, OPTAB_LIB_WIDEN);
20071 emit_move_insn (srcptr, tmp);
20073 emit_label (out_label);
20076 /* Output "rep; mov" instruction.
20077 Arguments have same meaning as for previous function */
20079 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
20080 rtx destptr, rtx srcptr,
20082 enum machine_mode mode)
20088 /* If the size is known, it is shorter to use rep movs. */
20089 if (mode == QImode && CONST_INT_P (count)
20090 && !(INTVAL (count) & 3))
20093 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20094 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20095 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
20096 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
20097 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20098 if (mode != QImode)
20100 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20101 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20102 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20103 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
20104 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20105 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
20109 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20110 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
20112 if (CONST_INT_P (count))
20114 count = GEN_INT (INTVAL (count)
20115 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20116 destmem = shallow_copy_rtx (destmem);
20117 srcmem = shallow_copy_rtx (srcmem);
20118 set_mem_size (destmem, count);
20119 set_mem_size (srcmem, count);
20123 if (MEM_SIZE (destmem))
20124 set_mem_size (destmem, NULL_RTX);
20125 if (MEM_SIZE (srcmem))
20126 set_mem_size (srcmem, NULL_RTX);
20128 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20132 /* Output "rep; stos" instruction.
20133 Arguments have same meaning as for previous function */
20135 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20136 rtx count, enum machine_mode mode,
20142 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20143 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20144 value = force_reg (mode, gen_lowpart (mode, value));
20145 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20146 if (mode != QImode)
20148 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20149 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20150 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20153 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20154 if (orig_value == const0_rtx && CONST_INT_P (count))
20156 count = GEN_INT (INTVAL (count)
20157 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20158 destmem = shallow_copy_rtx (destmem);
20159 set_mem_size (destmem, count);
20161 else if (MEM_SIZE (destmem))
20162 set_mem_size (destmem, NULL_RTX);
20163 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20167 emit_strmov (rtx destmem, rtx srcmem,
20168 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20170 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20171 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20172 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20175 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20177 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20178 rtx destptr, rtx srcptr, rtx count, int max_size)
20181 if (CONST_INT_P (count))
20183 HOST_WIDE_INT countval = INTVAL (count);
20186 if ((countval & 0x10) && max_size > 16)
20190 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20191 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20194 gcc_unreachable ();
20197 if ((countval & 0x08) && max_size > 8)
20200 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20203 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20204 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20208 if ((countval & 0x04) && max_size > 4)
20210 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20213 if ((countval & 0x02) && max_size > 2)
20215 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20218 if ((countval & 0x01) && max_size > 1)
20220 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20227 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20228 count, 1, OPTAB_DIRECT);
20229 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20230 count, QImode, 1, 4);
20234 /* When there are stringops, we can cheaply increase dest and src pointers.
20235 Otherwise we save code size by maintaining offset (zero is readily
20236 available from preceding rep operation) and using x86 addressing modes.
20238 if (TARGET_SINGLE_STRINGOP)
20242 rtx label = ix86_expand_aligntest (count, 4, true);
20243 src = change_address (srcmem, SImode, srcptr);
20244 dest = change_address (destmem, SImode, destptr);
20245 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20246 emit_label (label);
20247 LABEL_NUSES (label) = 1;
20251 rtx label = ix86_expand_aligntest (count, 2, true);
20252 src = change_address (srcmem, HImode, srcptr);
20253 dest = change_address (destmem, HImode, destptr);
20254 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20255 emit_label (label);
20256 LABEL_NUSES (label) = 1;
20260 rtx label = ix86_expand_aligntest (count, 1, true);
20261 src = change_address (srcmem, QImode, srcptr);
20262 dest = change_address (destmem, QImode, destptr);
20263 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20264 emit_label (label);
20265 LABEL_NUSES (label) = 1;
20270 rtx offset = force_reg (Pmode, const0_rtx);
20275 rtx label = ix86_expand_aligntest (count, 4, true);
20276 src = change_address (srcmem, SImode, srcptr);
20277 dest = change_address (destmem, SImode, destptr);
20278 emit_move_insn (dest, src);
20279 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20280 true, OPTAB_LIB_WIDEN);
20282 emit_move_insn (offset, tmp);
20283 emit_label (label);
20284 LABEL_NUSES (label) = 1;
20288 rtx label = ix86_expand_aligntest (count, 2, true);
20289 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20290 src = change_address (srcmem, HImode, tmp);
20291 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20292 dest = change_address (destmem, HImode, tmp);
20293 emit_move_insn (dest, src);
20294 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20295 true, OPTAB_LIB_WIDEN);
20297 emit_move_insn (offset, tmp);
20298 emit_label (label);
20299 LABEL_NUSES (label) = 1;
20303 rtx label = ix86_expand_aligntest (count, 1, true);
20304 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20305 src = change_address (srcmem, QImode, tmp);
20306 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20307 dest = change_address (destmem, QImode, tmp);
20308 emit_move_insn (dest, src);
20309 emit_label (label);
20310 LABEL_NUSES (label) = 1;
20315 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20317 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20318 rtx count, int max_size)
20321 expand_simple_binop (counter_mode (count), AND, count,
20322 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20323 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20324 gen_lowpart (QImode, value), count, QImode,
20328 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20330 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20334 if (CONST_INT_P (count))
20336 HOST_WIDE_INT countval = INTVAL (count);
20339 if ((countval & 0x10) && max_size > 16)
20343 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20344 emit_insn (gen_strset (destptr, dest, value));
20345 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20346 emit_insn (gen_strset (destptr, dest, value));
20349 gcc_unreachable ();
20352 if ((countval & 0x08) && max_size > 8)
20356 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20357 emit_insn (gen_strset (destptr, dest, value));
20361 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20362 emit_insn (gen_strset (destptr, dest, value));
20363 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20364 emit_insn (gen_strset (destptr, dest, value));
20368 if ((countval & 0x04) && max_size > 4)
20370 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20371 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20374 if ((countval & 0x02) && max_size > 2)
20376 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20377 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20380 if ((countval & 0x01) && max_size > 1)
20382 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20383 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20390 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20395 rtx label = ix86_expand_aligntest (count, 16, true);
20398 dest = change_address (destmem, DImode, destptr);
20399 emit_insn (gen_strset (destptr, dest, value));
20400 emit_insn (gen_strset (destptr, dest, value));
20404 dest = change_address (destmem, SImode, destptr);
20405 emit_insn (gen_strset (destptr, dest, value));
20406 emit_insn (gen_strset (destptr, dest, value));
20407 emit_insn (gen_strset (destptr, dest, value));
20408 emit_insn (gen_strset (destptr, dest, value));
20410 emit_label (label);
20411 LABEL_NUSES (label) = 1;
20415 rtx label = ix86_expand_aligntest (count, 8, true);
20418 dest = change_address (destmem, DImode, destptr);
20419 emit_insn (gen_strset (destptr, dest, value));
20423 dest = change_address (destmem, SImode, destptr);
20424 emit_insn (gen_strset (destptr, dest, value));
20425 emit_insn (gen_strset (destptr, dest, value));
20427 emit_label (label);
20428 LABEL_NUSES (label) = 1;
20432 rtx label = ix86_expand_aligntest (count, 4, true);
20433 dest = change_address (destmem, SImode, destptr);
20434 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20435 emit_label (label);
20436 LABEL_NUSES (label) = 1;
20440 rtx label = ix86_expand_aligntest (count, 2, true);
20441 dest = change_address (destmem, HImode, destptr);
20442 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20443 emit_label (label);
20444 LABEL_NUSES (label) = 1;
20448 rtx label = ix86_expand_aligntest (count, 1, true);
20449 dest = change_address (destmem, QImode, destptr);
20450 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20451 emit_label (label);
20452 LABEL_NUSES (label) = 1;
20456 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20457 DESIRED_ALIGNMENT. */
20459 expand_movmem_prologue (rtx destmem, rtx srcmem,
20460 rtx destptr, rtx srcptr, rtx count,
20461 int align, int desired_alignment)
20463 if (align <= 1 && desired_alignment > 1)
20465 rtx label = ix86_expand_aligntest (destptr, 1, false);
20466 srcmem = change_address (srcmem, QImode, srcptr);
20467 destmem = change_address (destmem, QImode, destptr);
20468 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20469 ix86_adjust_counter (count, 1);
20470 emit_label (label);
20471 LABEL_NUSES (label) = 1;
20473 if (align <= 2 && desired_alignment > 2)
20475 rtx label = ix86_expand_aligntest (destptr, 2, false);
20476 srcmem = change_address (srcmem, HImode, srcptr);
20477 destmem = change_address (destmem, HImode, destptr);
20478 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20479 ix86_adjust_counter (count, 2);
20480 emit_label (label);
20481 LABEL_NUSES (label) = 1;
20483 if (align <= 4 && desired_alignment > 4)
20485 rtx label = ix86_expand_aligntest (destptr, 4, false);
20486 srcmem = change_address (srcmem, SImode, srcptr);
20487 destmem = change_address (destmem, SImode, destptr);
20488 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20489 ix86_adjust_counter (count, 4);
20490 emit_label (label);
20491 LABEL_NUSES (label) = 1;
20493 gcc_assert (desired_alignment <= 8);
20496 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20497 ALIGN_BYTES is how many bytes need to be copied. */
20499 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20500 int desired_align, int align_bytes)
20503 rtx src_size, dst_size;
20505 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20506 if (src_align_bytes >= 0)
20507 src_align_bytes = desired_align - src_align_bytes;
20508 src_size = MEM_SIZE (src);
20509 dst_size = MEM_SIZE (dst);
20510 if (align_bytes & 1)
20512 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20513 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20515 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20517 if (align_bytes & 2)
20519 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20520 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20521 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20522 set_mem_align (dst, 2 * BITS_PER_UNIT);
20523 if (src_align_bytes >= 0
20524 && (src_align_bytes & 1) == (align_bytes & 1)
20525 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20526 set_mem_align (src, 2 * BITS_PER_UNIT);
20528 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20530 if (align_bytes & 4)
20532 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20533 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20534 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20535 set_mem_align (dst, 4 * BITS_PER_UNIT);
20536 if (src_align_bytes >= 0)
20538 unsigned int src_align = 0;
20539 if ((src_align_bytes & 3) == (align_bytes & 3))
20541 else if ((src_align_bytes & 1) == (align_bytes & 1))
20543 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20544 set_mem_align (src, src_align * BITS_PER_UNIT);
20547 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20549 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20550 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20551 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20552 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20553 if (src_align_bytes >= 0)
20555 unsigned int src_align = 0;
20556 if ((src_align_bytes & 7) == (align_bytes & 7))
20558 else if ((src_align_bytes & 3) == (align_bytes & 3))
20560 else if ((src_align_bytes & 1) == (align_bytes & 1))
20562 if (src_align > (unsigned int) desired_align)
20563 src_align = desired_align;
20564 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20565 set_mem_align (src, src_align * BITS_PER_UNIT);
20568 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20570 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20575 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20576 DESIRED_ALIGNMENT. */
20578 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20579 int align, int desired_alignment)
20581 if (align <= 1 && desired_alignment > 1)
20583 rtx label = ix86_expand_aligntest (destptr, 1, false);
20584 destmem = change_address (destmem, QImode, destptr);
20585 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20586 ix86_adjust_counter (count, 1);
20587 emit_label (label);
20588 LABEL_NUSES (label) = 1;
20590 if (align <= 2 && desired_alignment > 2)
20592 rtx label = ix86_expand_aligntest (destptr, 2, false);
20593 destmem = change_address (destmem, HImode, destptr);
20594 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20595 ix86_adjust_counter (count, 2);
20596 emit_label (label);
20597 LABEL_NUSES (label) = 1;
20599 if (align <= 4 && desired_alignment > 4)
20601 rtx label = ix86_expand_aligntest (destptr, 4, false);
20602 destmem = change_address (destmem, SImode, destptr);
20603 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20604 ix86_adjust_counter (count, 4);
20605 emit_label (label);
20606 LABEL_NUSES (label) = 1;
20608 gcc_assert (desired_alignment <= 8);
20611 /* Set enough from DST to align DST known to by aligned by ALIGN to
20612 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20614 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20615 int desired_align, int align_bytes)
20618 rtx dst_size = MEM_SIZE (dst);
20619 if (align_bytes & 1)
20621 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20623 emit_insn (gen_strset (destreg, dst,
20624 gen_lowpart (QImode, value)));
20626 if (align_bytes & 2)
20628 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20629 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20630 set_mem_align (dst, 2 * BITS_PER_UNIT);
20632 emit_insn (gen_strset (destreg, dst,
20633 gen_lowpart (HImode, value)));
20635 if (align_bytes & 4)
20637 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20638 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20639 set_mem_align (dst, 4 * BITS_PER_UNIT);
20641 emit_insn (gen_strset (destreg, dst,
20642 gen_lowpart (SImode, value)));
20644 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20645 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20646 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20648 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20652 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20653 static enum stringop_alg
20654 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20655 int *dynamic_check)
20657 const struct stringop_algs * algs;
20658 bool optimize_for_speed;
20659 /* Algorithms using the rep prefix want at least edi and ecx;
20660 additionally, memset wants eax and memcpy wants esi. Don't
20661 consider such algorithms if the user has appropriated those
20662 registers for their own purposes. */
20663 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20665 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20667 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20668 || (alg != rep_prefix_1_byte \
20669 && alg != rep_prefix_4_byte \
20670 && alg != rep_prefix_8_byte))
20671 const struct processor_costs *cost;
20673 /* Even if the string operation call is cold, we still might spend a lot
20674 of time processing large blocks. */
20675 if (optimize_function_for_size_p (cfun)
20676 || (optimize_insn_for_size_p ()
20677 && expected_size != -1 && expected_size < 256))
20678 optimize_for_speed = false;
20680 optimize_for_speed = true;
20682 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20684 *dynamic_check = -1;
20686 algs = &cost->memset[TARGET_64BIT != 0];
20688 algs = &cost->memcpy[TARGET_64BIT != 0];
20689 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20690 return stringop_alg;
20691 /* rep; movq or rep; movl is the smallest variant. */
20692 else if (!optimize_for_speed)
20694 if (!count || (count & 3))
20695 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20697 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20699 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20701 else if (expected_size != -1 && expected_size < 4)
20702 return loop_1_byte;
20703 else if (expected_size != -1)
20706 enum stringop_alg alg = libcall;
20707 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20709 /* We get here if the algorithms that were not libcall-based
20710 were rep-prefix based and we are unable to use rep prefixes
20711 based on global register usage. Break out of the loop and
20712 use the heuristic below. */
20713 if (algs->size[i].max == 0)
20715 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20717 enum stringop_alg candidate = algs->size[i].alg;
20719 if (candidate != libcall && ALG_USABLE_P (candidate))
20721 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20722 last non-libcall inline algorithm. */
20723 if (TARGET_INLINE_ALL_STRINGOPS)
20725 /* When the current size is best to be copied by a libcall,
20726 but we are still forced to inline, run the heuristic below
20727 that will pick code for medium sized blocks. */
20728 if (alg != libcall)
20732 else if (ALG_USABLE_P (candidate))
20736 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20738 /* When asked to inline the call anyway, try to pick meaningful choice.
20739 We look for maximal size of block that is faster to copy by hand and
20740 take blocks of at most of that size guessing that average size will
20741 be roughly half of the block.
20743 If this turns out to be bad, we might simply specify the preferred
20744 choice in ix86_costs. */
20745 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20746 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20749 enum stringop_alg alg;
20751 bool any_alg_usable_p = true;
20753 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20755 enum stringop_alg candidate = algs->size[i].alg;
20756 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20758 if (candidate != libcall && candidate
20759 && ALG_USABLE_P (candidate))
20760 max = algs->size[i].max;
20762 /* If there aren't any usable algorithms, then recursing on
20763 smaller sizes isn't going to find anything. Just return the
20764 simple byte-at-a-time copy loop. */
20765 if (!any_alg_usable_p)
20767 /* Pick something reasonable. */
20768 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20769 *dynamic_check = 128;
20770 return loop_1_byte;
20774 alg = decide_alg (count, max / 2, memset, dynamic_check);
20775 gcc_assert (*dynamic_check == -1);
20776 gcc_assert (alg != libcall);
20777 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20778 *dynamic_check = max;
20781 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20782 #undef ALG_USABLE_P
20785 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20786 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20788 decide_alignment (int align,
20789 enum stringop_alg alg,
20792 int desired_align = 0;
20796 gcc_unreachable ();
20798 case unrolled_loop:
20799 desired_align = GET_MODE_SIZE (Pmode);
20801 case rep_prefix_8_byte:
20804 case rep_prefix_4_byte:
20805 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20806 copying whole cacheline at once. */
20807 if (TARGET_PENTIUMPRO)
20812 case rep_prefix_1_byte:
20813 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20814 copying whole cacheline at once. */
20815 if (TARGET_PENTIUMPRO)
20829 if (desired_align < align)
20830 desired_align = align;
20831 if (expected_size != -1 && expected_size < 4)
20832 desired_align = align;
20833 return desired_align;
20836 /* Return the smallest power of 2 greater than VAL. */
20838 smallest_pow2_greater_than (int val)
20846 /* Expand string move (memcpy) operation. Use i386 string operations when
20847 profitable. expand_setmem contains similar code. The code depends upon
20848 architecture, block size and alignment, but always has the same
20851 1) Prologue guard: Conditional that jumps up to epilogues for small
20852 blocks that can be handled by epilogue alone. This is faster but
20853 also needed for correctness, since prologue assume the block is larger
20854 than the desired alignment.
20856 Optional dynamic check for size and libcall for large
20857 blocks is emitted here too, with -minline-stringops-dynamically.
20859 2) Prologue: copy first few bytes in order to get destination aligned
20860 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20861 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20862 We emit either a jump tree on power of two sized blocks, or a byte loop.
20864 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20865 with specified algorithm.
20867 4) Epilogue: code copying tail of the block that is too small to be
20868 handled by main body (or up to size guarded by prologue guard). */
20871 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20872 rtx expected_align_exp, rtx expected_size_exp)
20878 rtx jump_around_label = NULL;
20879 HOST_WIDE_INT align = 1;
20880 unsigned HOST_WIDE_INT count = 0;
20881 HOST_WIDE_INT expected_size = -1;
20882 int size_needed = 0, epilogue_size_needed;
20883 int desired_align = 0, align_bytes = 0;
20884 enum stringop_alg alg;
20886 bool need_zero_guard = false;
20888 if (CONST_INT_P (align_exp))
20889 align = INTVAL (align_exp);
20890 /* i386 can do misaligned access on reasonably increased cost. */
20891 if (CONST_INT_P (expected_align_exp)
20892 && INTVAL (expected_align_exp) > align)
20893 align = INTVAL (expected_align_exp);
20894 /* ALIGN is the minimum of destination and source alignment, but we care here
20895 just about destination alignment. */
20896 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20897 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20899 if (CONST_INT_P (count_exp))
20900 count = expected_size = INTVAL (count_exp);
20901 if (CONST_INT_P (expected_size_exp) && count == 0)
20902 expected_size = INTVAL (expected_size_exp);
20904 /* Make sure we don't need to care about overflow later on. */
20905 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20908 /* Step 0: Decide on preferred algorithm, desired alignment and
20909 size of chunks to be copied by main loop. */
20911 alg = decide_alg (count, expected_size, false, &dynamic_check);
20912 desired_align = decide_alignment (align, alg, expected_size);
20914 if (!TARGET_ALIGN_STRINGOPS)
20915 align = desired_align;
20917 if (alg == libcall)
20919 gcc_assert (alg != no_stringop);
20921 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20922 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20923 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20928 gcc_unreachable ();
20930 need_zero_guard = true;
20931 size_needed = GET_MODE_SIZE (Pmode);
20933 case unrolled_loop:
20934 need_zero_guard = true;
20935 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20937 case rep_prefix_8_byte:
20940 case rep_prefix_4_byte:
20943 case rep_prefix_1_byte:
20947 need_zero_guard = true;
20952 epilogue_size_needed = size_needed;
20954 /* Step 1: Prologue guard. */
20956 /* Alignment code needs count to be in register. */
20957 if (CONST_INT_P (count_exp) && desired_align > align)
20959 if (INTVAL (count_exp) > desired_align
20960 && INTVAL (count_exp) > size_needed)
20963 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20964 if (align_bytes <= 0)
20967 align_bytes = desired_align - align_bytes;
20969 if (align_bytes == 0)
20970 count_exp = force_reg (counter_mode (count_exp), count_exp);
20972 gcc_assert (desired_align >= 1 && align >= 1);
20974 /* Ensure that alignment prologue won't copy past end of block. */
20975 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20977 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20978 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20979 Make sure it is power of 2. */
20980 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20984 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20986 /* If main algorithm works on QImode, no epilogue is needed.
20987 For small sizes just don't align anything. */
20988 if (size_needed == 1)
20989 desired_align = align;
20996 label = gen_label_rtx ();
20997 emit_cmp_and_jump_insns (count_exp,
20998 GEN_INT (epilogue_size_needed),
20999 LTU, 0, counter_mode (count_exp), 1, label);
21000 if (expected_size == -1 || expected_size < epilogue_size_needed)
21001 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21003 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21007 /* Emit code to decide on runtime whether library call or inline should be
21009 if (dynamic_check != -1)
21011 if (CONST_INT_P (count_exp))
21013 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
21015 emit_block_move_via_libcall (dst, src, count_exp, false);
21016 count_exp = const0_rtx;
21022 rtx hot_label = gen_label_rtx ();
21023 jump_around_label = gen_label_rtx ();
21024 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21025 LEU, 0, GET_MODE (count_exp), 1, hot_label);
21026 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21027 emit_block_move_via_libcall (dst, src, count_exp, false);
21028 emit_jump (jump_around_label);
21029 emit_label (hot_label);
21033 /* Step 2: Alignment prologue. */
21035 if (desired_align > align)
21037 if (align_bytes == 0)
21039 /* Except for the first move in epilogue, we no longer know
21040 constant offset in aliasing info. It don't seems to worth
21041 the pain to maintain it for the first move, so throw away
21043 src = change_address (src, BLKmode, srcreg);
21044 dst = change_address (dst, BLKmode, destreg);
21045 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
21050 /* If we know how many bytes need to be stored before dst is
21051 sufficiently aligned, maintain aliasing info accurately. */
21052 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
21053 desired_align, align_bytes);
21054 count_exp = plus_constant (count_exp, -align_bytes);
21055 count -= align_bytes;
21057 if (need_zero_guard
21058 && (count < (unsigned HOST_WIDE_INT) size_needed
21059 || (align_bytes == 0
21060 && count < ((unsigned HOST_WIDE_INT) size_needed
21061 + desired_align - align))))
21063 /* It is possible that we copied enough so the main loop will not
21065 gcc_assert (size_needed > 1);
21066 if (label == NULL_RTX)
21067 label = gen_label_rtx ();
21068 emit_cmp_and_jump_insns (count_exp,
21069 GEN_INT (size_needed),
21070 LTU, 0, counter_mode (count_exp), 1, label);
21071 if (expected_size == -1
21072 || expected_size < (desired_align - align) / 2 + size_needed)
21073 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21075 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21078 if (label && size_needed == 1)
21080 emit_label (label);
21081 LABEL_NUSES (label) = 1;
21083 epilogue_size_needed = 1;
21085 else if (label == NULL_RTX)
21086 epilogue_size_needed = size_needed;
21088 /* Step 3: Main loop. */
21094 gcc_unreachable ();
21096 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21097 count_exp, QImode, 1, expected_size);
21100 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21101 count_exp, Pmode, 1, expected_size);
21103 case unrolled_loop:
21104 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
21105 registers for 4 temporaries anyway. */
21106 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
21107 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
21110 case rep_prefix_8_byte:
21111 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21114 case rep_prefix_4_byte:
21115 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21118 case rep_prefix_1_byte:
21119 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21123 /* Adjust properly the offset of src and dest memory for aliasing. */
21124 if (CONST_INT_P (count_exp))
21126 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21127 (count / size_needed) * size_needed);
21128 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21129 (count / size_needed) * size_needed);
21133 src = change_address (src, BLKmode, srcreg);
21134 dst = change_address (dst, BLKmode, destreg);
21137 /* Step 4: Epilogue to copy the remaining bytes. */
21141 /* When the main loop is done, COUNT_EXP might hold original count,
21142 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21143 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21144 bytes. Compensate if needed. */
21146 if (size_needed < epilogue_size_needed)
21149 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21150 GEN_INT (size_needed - 1), count_exp, 1,
21152 if (tmp != count_exp)
21153 emit_move_insn (count_exp, tmp);
21155 emit_label (label);
21156 LABEL_NUSES (label) = 1;
21159 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21160 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21161 epilogue_size_needed);
21162 if (jump_around_label)
21163 emit_label (jump_around_label);
21167 /* Helper function for memcpy. For QImode value 0xXY produce
21168 0xXYXYXYXY of wide specified by MODE. This is essentially
21169 a * 0x10101010, but we can do slightly better than
21170 synth_mult by unwinding the sequence by hand on CPUs with
21173 promote_duplicated_reg (enum machine_mode mode, rtx val)
21175 enum machine_mode valmode = GET_MODE (val);
21177 int nops = mode == DImode ? 3 : 2;
21179 gcc_assert (mode == SImode || mode == DImode);
21180 if (val == const0_rtx)
21181 return copy_to_mode_reg (mode, const0_rtx);
21182 if (CONST_INT_P (val))
21184 HOST_WIDE_INT v = INTVAL (val) & 255;
21188 if (mode == DImode)
21189 v |= (v << 16) << 16;
21190 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21193 if (valmode == VOIDmode)
21195 if (valmode != QImode)
21196 val = gen_lowpart (QImode, val);
21197 if (mode == QImode)
21199 if (!TARGET_PARTIAL_REG_STALL)
21201 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21202 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21203 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21204 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21206 rtx reg = convert_modes (mode, QImode, val, true);
21207 tmp = promote_duplicated_reg (mode, const1_rtx);
21208 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21213 rtx reg = convert_modes (mode, QImode, val, true);
21215 if (!TARGET_PARTIAL_REG_STALL)
21216 if (mode == SImode)
21217 emit_insn (gen_movsi_insv_1 (reg, reg));
21219 emit_insn (gen_movdi_insv_1 (reg, reg));
21222 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21223 NULL, 1, OPTAB_DIRECT);
21225 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21227 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21228 NULL, 1, OPTAB_DIRECT);
21229 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21230 if (mode == SImode)
21232 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21233 NULL, 1, OPTAB_DIRECT);
21234 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21239 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21240 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21241 alignment from ALIGN to DESIRED_ALIGN. */
21243 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21248 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21249 promoted_val = promote_duplicated_reg (DImode, val);
21250 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21251 promoted_val = promote_duplicated_reg (SImode, val);
21252 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21253 promoted_val = promote_duplicated_reg (HImode, val);
21255 promoted_val = val;
21257 return promoted_val;
21260 /* Expand string clear operation (bzero). Use i386 string operations when
21261 profitable. See expand_movmem comment for explanation of individual
21262 steps performed. */
21264 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21265 rtx expected_align_exp, rtx expected_size_exp)
21270 rtx jump_around_label = NULL;
21271 HOST_WIDE_INT align = 1;
21272 unsigned HOST_WIDE_INT count = 0;
21273 HOST_WIDE_INT expected_size = -1;
21274 int size_needed = 0, epilogue_size_needed;
21275 int desired_align = 0, align_bytes = 0;
21276 enum stringop_alg alg;
21277 rtx promoted_val = NULL;
21278 bool force_loopy_epilogue = false;
21280 bool need_zero_guard = false;
21282 if (CONST_INT_P (align_exp))
21283 align = INTVAL (align_exp);
21284 /* i386 can do misaligned access on reasonably increased cost. */
21285 if (CONST_INT_P (expected_align_exp)
21286 && INTVAL (expected_align_exp) > align)
21287 align = INTVAL (expected_align_exp);
21288 if (CONST_INT_P (count_exp))
21289 count = expected_size = INTVAL (count_exp);
21290 if (CONST_INT_P (expected_size_exp) && count == 0)
21291 expected_size = INTVAL (expected_size_exp);
21293 /* Make sure we don't need to care about overflow later on. */
21294 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21297 /* Step 0: Decide on preferred algorithm, desired alignment and
21298 size of chunks to be copied by main loop. */
21300 alg = decide_alg (count, expected_size, true, &dynamic_check);
21301 desired_align = decide_alignment (align, alg, expected_size);
21303 if (!TARGET_ALIGN_STRINGOPS)
21304 align = desired_align;
21306 if (alg == libcall)
21308 gcc_assert (alg != no_stringop);
21310 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21311 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21316 gcc_unreachable ();
21318 need_zero_guard = true;
21319 size_needed = GET_MODE_SIZE (Pmode);
21321 case unrolled_loop:
21322 need_zero_guard = true;
21323 size_needed = GET_MODE_SIZE (Pmode) * 4;
21325 case rep_prefix_8_byte:
21328 case rep_prefix_4_byte:
21331 case rep_prefix_1_byte:
21335 need_zero_guard = true;
21339 epilogue_size_needed = size_needed;
21341 /* Step 1: Prologue guard. */
21343 /* Alignment code needs count to be in register. */
21344 if (CONST_INT_P (count_exp) && desired_align > align)
21346 if (INTVAL (count_exp) > desired_align
21347 && INTVAL (count_exp) > size_needed)
21350 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21351 if (align_bytes <= 0)
21354 align_bytes = desired_align - align_bytes;
21356 if (align_bytes == 0)
21358 enum machine_mode mode = SImode;
21359 if (TARGET_64BIT && (count & ~0xffffffff))
21361 count_exp = force_reg (mode, count_exp);
21364 /* Do the cheap promotion to allow better CSE across the
21365 main loop and epilogue (ie one load of the big constant in the
21366 front of all code. */
21367 if (CONST_INT_P (val_exp))
21368 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21369 desired_align, align);
21370 /* Ensure that alignment prologue won't copy past end of block. */
21371 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21373 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21374 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21375 Make sure it is power of 2. */
21376 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21378 /* To improve performance of small blocks, we jump around the VAL
21379 promoting mode. This mean that if the promoted VAL is not constant,
21380 we might not use it in the epilogue and have to use byte
21382 if (epilogue_size_needed > 2 && !promoted_val)
21383 force_loopy_epilogue = true;
21386 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21388 /* If main algorithm works on QImode, no epilogue is needed.
21389 For small sizes just don't align anything. */
21390 if (size_needed == 1)
21391 desired_align = align;
21398 label = gen_label_rtx ();
21399 emit_cmp_and_jump_insns (count_exp,
21400 GEN_INT (epilogue_size_needed),
21401 LTU, 0, counter_mode (count_exp), 1, label);
21402 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21403 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21405 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21408 if (dynamic_check != -1)
21410 rtx hot_label = gen_label_rtx ();
21411 jump_around_label = gen_label_rtx ();
21412 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21413 LEU, 0, counter_mode (count_exp), 1, hot_label);
21414 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21415 set_storage_via_libcall (dst, count_exp, val_exp, false);
21416 emit_jump (jump_around_label);
21417 emit_label (hot_label);
21420 /* Step 2: Alignment prologue. */
21422 /* Do the expensive promotion once we branched off the small blocks. */
21424 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21425 desired_align, align);
21426 gcc_assert (desired_align >= 1 && align >= 1);
21428 if (desired_align > align)
21430 if (align_bytes == 0)
21432 /* Except for the first move in epilogue, we no longer know
21433 constant offset in aliasing info. It don't seems to worth
21434 the pain to maintain it for the first move, so throw away
21436 dst = change_address (dst, BLKmode, destreg);
21437 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21442 /* If we know how many bytes need to be stored before dst is
21443 sufficiently aligned, maintain aliasing info accurately. */
21444 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21445 desired_align, align_bytes);
21446 count_exp = plus_constant (count_exp, -align_bytes);
21447 count -= align_bytes;
21449 if (need_zero_guard
21450 && (count < (unsigned HOST_WIDE_INT) size_needed
21451 || (align_bytes == 0
21452 && count < ((unsigned HOST_WIDE_INT) size_needed
21453 + desired_align - align))))
21455 /* It is possible that we copied enough so the main loop will not
21457 gcc_assert (size_needed > 1);
21458 if (label == NULL_RTX)
21459 label = gen_label_rtx ();
21460 emit_cmp_and_jump_insns (count_exp,
21461 GEN_INT (size_needed),
21462 LTU, 0, counter_mode (count_exp), 1, label);
21463 if (expected_size == -1
21464 || expected_size < (desired_align - align) / 2 + size_needed)
21465 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21467 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21470 if (label && size_needed == 1)
21472 emit_label (label);
21473 LABEL_NUSES (label) = 1;
21475 promoted_val = val_exp;
21476 epilogue_size_needed = 1;
21478 else if (label == NULL_RTX)
21479 epilogue_size_needed = size_needed;
21481 /* Step 3: Main loop. */
21487 gcc_unreachable ();
21489 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21490 count_exp, QImode, 1, expected_size);
21493 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21494 count_exp, Pmode, 1, expected_size);
21496 case unrolled_loop:
21497 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21498 count_exp, Pmode, 4, expected_size);
21500 case rep_prefix_8_byte:
21501 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21504 case rep_prefix_4_byte:
21505 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21508 case rep_prefix_1_byte:
21509 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21513 /* Adjust properly the offset of src and dest memory for aliasing. */
21514 if (CONST_INT_P (count_exp))
21515 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21516 (count / size_needed) * size_needed);
21518 dst = change_address (dst, BLKmode, destreg);
21520 /* Step 4: Epilogue to copy the remaining bytes. */
21524 /* When the main loop is done, COUNT_EXP might hold original count,
21525 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21526 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21527 bytes. Compensate if needed. */
21529 if (size_needed < epilogue_size_needed)
21532 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21533 GEN_INT (size_needed - 1), count_exp, 1,
21535 if (tmp != count_exp)
21536 emit_move_insn (count_exp, tmp);
21538 emit_label (label);
21539 LABEL_NUSES (label) = 1;
21542 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21544 if (force_loopy_epilogue)
21545 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21546 epilogue_size_needed);
21548 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21549 epilogue_size_needed);
21551 if (jump_around_label)
21552 emit_label (jump_around_label);
21556 /* Expand the appropriate insns for doing strlen if not just doing
21559 out = result, initialized with the start address
21560 align_rtx = alignment of the address.
21561 scratch = scratch register, initialized with the startaddress when
21562 not aligned, otherwise undefined
21564 This is just the body. It needs the initializations mentioned above and
21565 some address computing at the end. These things are done in i386.md. */
21568 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21572 rtx align_2_label = NULL_RTX;
21573 rtx align_3_label = NULL_RTX;
21574 rtx align_4_label = gen_label_rtx ();
21575 rtx end_0_label = gen_label_rtx ();
21577 rtx tmpreg = gen_reg_rtx (SImode);
21578 rtx scratch = gen_reg_rtx (SImode);
21582 if (CONST_INT_P (align_rtx))
21583 align = INTVAL (align_rtx);
21585 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21587 /* Is there a known alignment and is it less than 4? */
21590 rtx scratch1 = gen_reg_rtx (Pmode);
21591 emit_move_insn (scratch1, out);
21592 /* Is there a known alignment and is it not 2? */
21595 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21596 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21598 /* Leave just the 3 lower bits. */
21599 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21600 NULL_RTX, 0, OPTAB_WIDEN);
21602 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21603 Pmode, 1, align_4_label);
21604 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21605 Pmode, 1, align_2_label);
21606 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21607 Pmode, 1, align_3_label);
21611 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21612 check if is aligned to 4 - byte. */
21614 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21615 NULL_RTX, 0, OPTAB_WIDEN);
21617 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21618 Pmode, 1, align_4_label);
21621 mem = change_address (src, QImode, out);
21623 /* Now compare the bytes. */
21625 /* Compare the first n unaligned byte on a byte per byte basis. */
21626 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21627 QImode, 1, end_0_label);
21629 /* Increment the address. */
21630 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21632 /* Not needed with an alignment of 2 */
21635 emit_label (align_2_label);
21637 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21640 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21642 emit_label (align_3_label);
21645 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21648 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21651 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21652 align this loop. It gives only huge programs, but does not help to
21654 emit_label (align_4_label);
21656 mem = change_address (src, SImode, out);
21657 emit_move_insn (scratch, mem);
21658 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21660 /* This formula yields a nonzero result iff one of the bytes is zero.
21661 This saves three branches inside loop and many cycles. */
21663 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21664 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21665 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21666 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21667 gen_int_mode (0x80808080, SImode)));
21668 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21673 rtx reg = gen_reg_rtx (SImode);
21674 rtx reg2 = gen_reg_rtx (Pmode);
21675 emit_move_insn (reg, tmpreg);
21676 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21678 /* If zero is not in the first two bytes, move two bytes forward. */
21679 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21680 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21681 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21682 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21683 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21686 /* Emit lea manually to avoid clobbering of flags. */
21687 emit_insn (gen_rtx_SET (SImode, reg2,
21688 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21690 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21691 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21692 emit_insn (gen_rtx_SET (VOIDmode, out,
21693 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21699 rtx end_2_label = gen_label_rtx ();
21700 /* Is zero in the first two bytes? */
21702 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21703 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21704 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21705 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21706 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21708 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21709 JUMP_LABEL (tmp) = end_2_label;
21711 /* Not in the first two. Move two bytes forward. */
21712 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21713 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21715 emit_label (end_2_label);
21719 /* Avoid branch in fixing the byte. */
21720 tmpreg = gen_lowpart (QImode, tmpreg);
21721 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21722 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21723 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21724 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21726 emit_label (end_0_label);
21729 /* Expand strlen. */
21732 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21734 rtx addr, scratch1, scratch2, scratch3, scratch4;
21736 /* The generic case of strlen expander is long. Avoid it's
21737 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21739 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21740 && !TARGET_INLINE_ALL_STRINGOPS
21741 && !optimize_insn_for_size_p ()
21742 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21745 addr = force_reg (Pmode, XEXP (src, 0));
21746 scratch1 = gen_reg_rtx (Pmode);
21748 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21749 && !optimize_insn_for_size_p ())
21751 /* Well it seems that some optimizer does not combine a call like
21752 foo(strlen(bar), strlen(bar));
21753 when the move and the subtraction is done here. It does calculate
21754 the length just once when these instructions are done inside of
21755 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21756 often used and I use one fewer register for the lifetime of
21757 output_strlen_unroll() this is better. */
21759 emit_move_insn (out, addr);
21761 ix86_expand_strlensi_unroll_1 (out, src, align);
21763 /* strlensi_unroll_1 returns the address of the zero at the end of
21764 the string, like memchr(), so compute the length by subtracting
21765 the start address. */
21766 emit_insn (ix86_gen_sub3 (out, out, addr));
21772 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21773 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21776 scratch2 = gen_reg_rtx (Pmode);
21777 scratch3 = gen_reg_rtx (Pmode);
21778 scratch4 = force_reg (Pmode, constm1_rtx);
21780 emit_move_insn (scratch3, addr);
21781 eoschar = force_reg (QImode, eoschar);
21783 src = replace_equiv_address_nv (src, scratch3);
21785 /* If .md starts supporting :P, this can be done in .md. */
21786 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21787 scratch4), UNSPEC_SCAS);
21788 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21789 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21790 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21795 /* For given symbol (function) construct code to compute address of it's PLT
21796 entry in large x86-64 PIC model. */
21798 construct_plt_address (rtx symbol)
21800 rtx tmp = gen_reg_rtx (Pmode);
21801 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21803 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21804 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21806 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21807 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21812 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21814 rtx pop, int sibcall)
21816 rtx use = NULL, call;
21818 if (pop == const0_rtx)
21820 gcc_assert (!TARGET_64BIT || !pop);
21822 if (TARGET_MACHO && !TARGET_64BIT)
21825 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21826 fnaddr = machopic_indirect_call_target (fnaddr);
21831 /* Static functions and indirect calls don't need the pic register. */
21832 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21833 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21834 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21835 use_reg (&use, pic_offset_table_rtx);
21838 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21840 rtx al = gen_rtx_REG (QImode, AX_REG);
21841 emit_move_insn (al, callarg2);
21842 use_reg (&use, al);
21845 if (ix86_cmodel == CM_LARGE_PIC
21847 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21848 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21849 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21851 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21852 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21854 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21855 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21858 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21860 call = gen_rtx_SET (VOIDmode, retval, call);
21863 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21864 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21865 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21868 && ix86_cfun_abi () == MS_ABI
21869 && (!callarg2 || INTVAL (callarg2) != -2))
21871 /* We need to represent that SI and DI registers are clobbered
21873 static int clobbered_registers[] = {
21874 XMM6_REG, XMM7_REG, XMM8_REG,
21875 XMM9_REG, XMM10_REG, XMM11_REG,
21876 XMM12_REG, XMM13_REG, XMM14_REG,
21877 XMM15_REG, SI_REG, DI_REG
21880 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21881 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21882 UNSPEC_MS_TO_SYSV_CALL);
21886 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21887 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21890 (SSE_REGNO_P (clobbered_registers[i])
21892 clobbered_registers[i]));
21894 call = gen_rtx_PARALLEL (VOIDmode,
21895 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21899 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21900 if (TARGET_VZEROUPPER)
21905 if (cfun->machine->callee_pass_avx256_p)
21907 if (cfun->machine->callee_return_avx256_p)
21908 avx256 = callee_return_pass_avx256;
21910 avx256 = callee_pass_avx256;
21912 else if (cfun->machine->callee_return_avx256_p)
21913 avx256 = callee_return_avx256;
21915 avx256 = call_no_avx256;
21917 if (reload_completed)
21918 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21921 unspec = gen_rtx_UNSPEC (VOIDmode,
21922 gen_rtvec (1, GEN_INT (avx256)),
21923 UNSPEC_CALL_NEEDS_VZEROUPPER);
21924 call = gen_rtx_PARALLEL (VOIDmode,
21925 gen_rtvec (2, call, unspec));
21929 call = emit_call_insn (call);
21931 CALL_INSN_FUNCTION_USAGE (call) = use;
21937 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21939 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21940 emit_insn (gen_avx_vzeroupper (vzeroupper));
21941 emit_call_insn (call);
21944 /* Output the assembly for a call instruction. */
21947 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21949 bool direct_p = constant_call_address_operand (call_op, Pmode);
21950 bool seh_nop_p = false;
21952 gcc_assert (addr_op == 0 || addr_op == 1);
21954 if (SIBLING_CALL_P (insn))
21957 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21958 /* SEH epilogue detection requires the indirect branch case
21959 to include REX.W. */
21960 else if (TARGET_SEH)
21961 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21963 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21966 /* SEH unwinding can require an extra nop to be emitted in several
21967 circumstances. Determine if we have one of those. */
21972 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21974 /* If we get to another real insn, we don't need the nop. */
21978 /* If we get to the epilogue note, prevent a catch region from
21979 being adjacent to the standard epilogue sequence. If non-
21980 call-exceptions, we'll have done this during epilogue emission. */
21981 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21982 && !flag_non_call_exceptions
21983 && !can_throw_internal (insn))
21990 /* If we didn't find a real insn following the call, prevent the
21991 unwinder from looking into the next function. */
21999 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
22001 return addr_op ? "call\t%P1" : "call\t%P0";
22006 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
22008 return addr_op ? "call\t%A1" : "call\t%A0";
22012 /* Clear stack slot assignments remembered from previous functions.
22013 This is called from INIT_EXPANDERS once before RTL is emitted for each
22016 static struct machine_function *
22017 ix86_init_machine_status (void)
22019 struct machine_function *f;
22021 f = ggc_alloc_cleared_machine_function ();
22022 f->use_fast_prologue_epilogue_nregs = -1;
22023 f->tls_descriptor_call_expanded_p = 0;
22024 f->call_abi = ix86_abi;
22029 /* Return a MEM corresponding to a stack slot with mode MODE.
22030 Allocate a new slot if necessary.
22032 The RTL for a function can have several slots available: N is
22033 which slot to use. */
22036 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
22038 struct stack_local_entry *s;
22040 gcc_assert (n < MAX_386_STACK_LOCALS);
22042 /* Virtual slot is valid only before vregs are instantiated. */
22043 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
22045 for (s = ix86_stack_locals; s; s = s->next)
22046 if (s->mode == mode && s->n == n)
22047 return copy_rtx (s->rtl);
22049 s = ggc_alloc_stack_local_entry ();
22052 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
22054 s->next = ix86_stack_locals;
22055 ix86_stack_locals = s;
22059 /* Construct the SYMBOL_REF for the tls_get_addr function. */
22061 static GTY(()) rtx ix86_tls_symbol;
22063 ix86_tls_get_addr (void)
22066 if (!ix86_tls_symbol)
22068 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
22069 (TARGET_ANY_GNU_TLS
22071 ? "___tls_get_addr"
22072 : "__tls_get_addr");
22075 return ix86_tls_symbol;
22078 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
22080 static GTY(()) rtx ix86_tls_module_base_symbol;
22082 ix86_tls_module_base (void)
22085 if (!ix86_tls_module_base_symbol)
22087 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
22088 "_TLS_MODULE_BASE_");
22089 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
22090 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
22093 return ix86_tls_module_base_symbol;
22096 /* Calculate the length of the memory address in the instruction
22097 encoding. Does not include the one-byte modrm, opcode, or prefix. */
22100 memory_address_length (rtx addr)
22102 struct ix86_address parts;
22103 rtx base, index, disp;
22107 if (GET_CODE (addr) == PRE_DEC
22108 || GET_CODE (addr) == POST_INC
22109 || GET_CODE (addr) == PRE_MODIFY
22110 || GET_CODE (addr) == POST_MODIFY)
22113 ok = ix86_decompose_address (addr, &parts);
22116 if (parts.base && GET_CODE (parts.base) == SUBREG)
22117 parts.base = SUBREG_REG (parts.base);
22118 if (parts.index && GET_CODE (parts.index) == SUBREG)
22119 parts.index = SUBREG_REG (parts.index);
22122 index = parts.index;
22127 - esp as the base always wants an index,
22128 - ebp as the base always wants a displacement,
22129 - r12 as the base always wants an index,
22130 - r13 as the base always wants a displacement. */
22132 /* Register Indirect. */
22133 if (base && !index && !disp)
22135 /* esp (for its index) and ebp (for its displacement) need
22136 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22139 && (addr == arg_pointer_rtx
22140 || addr == frame_pointer_rtx
22141 || REGNO (addr) == SP_REG
22142 || REGNO (addr) == BP_REG
22143 || REGNO (addr) == R12_REG
22144 || REGNO (addr) == R13_REG))
22148 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22149 is not disp32, but disp32(%rip), so for disp32
22150 SIB byte is needed, unless print_operand_address
22151 optimizes it into disp32(%rip) or (%rip) is implied
22153 else if (disp && !base && !index)
22160 if (GET_CODE (disp) == CONST)
22161 symbol = XEXP (disp, 0);
22162 if (GET_CODE (symbol) == PLUS
22163 && CONST_INT_P (XEXP (symbol, 1)))
22164 symbol = XEXP (symbol, 0);
22166 if (GET_CODE (symbol) != LABEL_REF
22167 && (GET_CODE (symbol) != SYMBOL_REF
22168 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22169 && (GET_CODE (symbol) != UNSPEC
22170 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22171 && XINT (symbol, 1) != UNSPEC_PCREL
22172 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22179 /* Find the length of the displacement constant. */
22182 if (base && satisfies_constraint_K (disp))
22187 /* ebp always wants a displacement. Similarly r13. */
22188 else if (base && REG_P (base)
22189 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22192 /* An index requires the two-byte modrm form.... */
22194 /* ...like esp (or r12), which always wants an index. */
22195 || base == arg_pointer_rtx
22196 || base == frame_pointer_rtx
22197 || (base && REG_P (base)
22198 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22215 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22216 is set, expect that insn have 8bit immediate alternative. */
22218 ix86_attr_length_immediate_default (rtx insn, int shortform)
22222 extract_insn_cached (insn);
22223 for (i = recog_data.n_operands - 1; i >= 0; --i)
22224 if (CONSTANT_P (recog_data.operand[i]))
22226 enum attr_mode mode = get_attr_mode (insn);
22229 if (shortform && CONST_INT_P (recog_data.operand[i]))
22231 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22238 ival = trunc_int_for_mode (ival, HImode);
22241 ival = trunc_int_for_mode (ival, SImode);
22246 if (IN_RANGE (ival, -128, 127))
22263 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22268 fatal_insn ("unknown insn mode", insn);
22273 /* Compute default value for "length_address" attribute. */
22275 ix86_attr_length_address_default (rtx insn)
22279 if (get_attr_type (insn) == TYPE_LEA)
22281 rtx set = PATTERN (insn), addr;
22283 if (GET_CODE (set) == PARALLEL)
22284 set = XVECEXP (set, 0, 0);
22286 gcc_assert (GET_CODE (set) == SET);
22288 addr = SET_SRC (set);
22289 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22291 if (GET_CODE (addr) == ZERO_EXTEND)
22292 addr = XEXP (addr, 0);
22293 if (GET_CODE (addr) == SUBREG)
22294 addr = SUBREG_REG (addr);
22297 return memory_address_length (addr);
22300 extract_insn_cached (insn);
22301 for (i = recog_data.n_operands - 1; i >= 0; --i)
22302 if (MEM_P (recog_data.operand[i]))
22304 constrain_operands_cached (reload_completed);
22305 if (which_alternative != -1)
22307 const char *constraints = recog_data.constraints[i];
22308 int alt = which_alternative;
22310 while (*constraints == '=' || *constraints == '+')
22313 while (*constraints++ != ',')
22315 /* Skip ignored operands. */
22316 if (*constraints == 'X')
22319 return memory_address_length (XEXP (recog_data.operand[i], 0));
22324 /* Compute default value for "length_vex" attribute. It includes
22325 2 or 3 byte VEX prefix and 1 opcode byte. */
22328 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22333 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22334 byte VEX prefix. */
22335 if (!has_0f_opcode || has_vex_w)
22338 /* We can always use 2 byte VEX prefix in 32bit. */
22342 extract_insn_cached (insn);
22344 for (i = recog_data.n_operands - 1; i >= 0; --i)
22345 if (REG_P (recog_data.operand[i]))
22347 /* REX.W bit uses 3 byte VEX prefix. */
22348 if (GET_MODE (recog_data.operand[i]) == DImode
22349 && GENERAL_REG_P (recog_data.operand[i]))
22354 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22355 if (MEM_P (recog_data.operand[i])
22356 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22363 /* Return the maximum number of instructions a cpu can issue. */
22366 ix86_issue_rate (void)
22370 case PROCESSOR_PENTIUM:
22371 case PROCESSOR_ATOM:
22375 case PROCESSOR_PENTIUMPRO:
22376 case PROCESSOR_PENTIUM4:
22377 case PROCESSOR_CORE2_32:
22378 case PROCESSOR_CORE2_64:
22379 case PROCESSOR_COREI7_32:
22380 case PROCESSOR_COREI7_64:
22381 case PROCESSOR_ATHLON:
22383 case PROCESSOR_AMDFAM10:
22384 case PROCESSOR_NOCONA:
22385 case PROCESSOR_GENERIC32:
22386 case PROCESSOR_GENERIC64:
22387 case PROCESSOR_BDVER1:
22388 case PROCESSOR_BTVER1:
22396 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22397 by DEP_INSN and nothing set by DEP_INSN. */
22400 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22404 /* Simplify the test for uninteresting insns. */
22405 if (insn_type != TYPE_SETCC
22406 && insn_type != TYPE_ICMOV
22407 && insn_type != TYPE_FCMOV
22408 && insn_type != TYPE_IBR)
22411 if ((set = single_set (dep_insn)) != 0)
22413 set = SET_DEST (set);
22416 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22417 && XVECLEN (PATTERN (dep_insn), 0) == 2
22418 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22419 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22421 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22422 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22427 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22430 /* This test is true if the dependent insn reads the flags but
22431 not any other potentially set register. */
22432 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22435 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22441 /* Return true iff USE_INSN has a memory address with operands set by
22445 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22448 extract_insn_cached (use_insn);
22449 for (i = recog_data.n_operands - 1; i >= 0; --i)
22450 if (MEM_P (recog_data.operand[i]))
22452 rtx addr = XEXP (recog_data.operand[i], 0);
22453 return modified_in_p (addr, set_insn) != 0;
22459 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22461 enum attr_type insn_type, dep_insn_type;
22462 enum attr_memory memory;
22464 int dep_insn_code_number;
22466 /* Anti and output dependencies have zero cost on all CPUs. */
22467 if (REG_NOTE_KIND (link) != 0)
22470 dep_insn_code_number = recog_memoized (dep_insn);
22472 /* If we can't recognize the insns, we can't really do anything. */
22473 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22476 insn_type = get_attr_type (insn);
22477 dep_insn_type = get_attr_type (dep_insn);
22481 case PROCESSOR_PENTIUM:
22482 /* Address Generation Interlock adds a cycle of latency. */
22483 if (insn_type == TYPE_LEA)
22485 rtx addr = PATTERN (insn);
22487 if (GET_CODE (addr) == PARALLEL)
22488 addr = XVECEXP (addr, 0, 0);
22490 gcc_assert (GET_CODE (addr) == SET);
22492 addr = SET_SRC (addr);
22493 if (modified_in_p (addr, dep_insn))
22496 else if (ix86_agi_dependent (dep_insn, insn))
22499 /* ??? Compares pair with jump/setcc. */
22500 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22503 /* Floating point stores require value to be ready one cycle earlier. */
22504 if (insn_type == TYPE_FMOV
22505 && get_attr_memory (insn) == MEMORY_STORE
22506 && !ix86_agi_dependent (dep_insn, insn))
22510 case PROCESSOR_PENTIUMPRO:
22511 memory = get_attr_memory (insn);
22513 /* INT->FP conversion is expensive. */
22514 if (get_attr_fp_int_src (dep_insn))
22517 /* There is one cycle extra latency between an FP op and a store. */
22518 if (insn_type == TYPE_FMOV
22519 && (set = single_set (dep_insn)) != NULL_RTX
22520 && (set2 = single_set (insn)) != NULL_RTX
22521 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22522 && MEM_P (SET_DEST (set2)))
22525 /* Show ability of reorder buffer to hide latency of load by executing
22526 in parallel with previous instruction in case
22527 previous instruction is not needed to compute the address. */
22528 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22529 && !ix86_agi_dependent (dep_insn, insn))
22531 /* Claim moves to take one cycle, as core can issue one load
22532 at time and the next load can start cycle later. */
22533 if (dep_insn_type == TYPE_IMOV
22534 || dep_insn_type == TYPE_FMOV)
22542 memory = get_attr_memory (insn);
22544 /* The esp dependency is resolved before the instruction is really
22546 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22547 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22550 /* INT->FP conversion is expensive. */
22551 if (get_attr_fp_int_src (dep_insn))
22554 /* Show ability of reorder buffer to hide latency of load by executing
22555 in parallel with previous instruction in case
22556 previous instruction is not needed to compute the address. */
22557 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22558 && !ix86_agi_dependent (dep_insn, insn))
22560 /* Claim moves to take one cycle, as core can issue one load
22561 at time and the next load can start cycle later. */
22562 if (dep_insn_type == TYPE_IMOV
22563 || dep_insn_type == TYPE_FMOV)
22572 case PROCESSOR_ATHLON:
22574 case PROCESSOR_AMDFAM10:
22575 case PROCESSOR_BDVER1:
22576 case PROCESSOR_BTVER1:
22577 case PROCESSOR_ATOM:
22578 case PROCESSOR_GENERIC32:
22579 case PROCESSOR_GENERIC64:
22580 memory = get_attr_memory (insn);
22582 /* Show ability of reorder buffer to hide latency of load by executing
22583 in parallel with previous instruction in case
22584 previous instruction is not needed to compute the address. */
22585 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22586 && !ix86_agi_dependent (dep_insn, insn))
22588 enum attr_unit unit = get_attr_unit (insn);
22591 /* Because of the difference between the length of integer and
22592 floating unit pipeline preparation stages, the memory operands
22593 for floating point are cheaper.
22595 ??? For Athlon it the difference is most probably 2. */
22596 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22599 loadcost = TARGET_ATHLON ? 2 : 0;
22601 if (cost >= loadcost)
22614 /* How many alternative schedules to try. This should be as wide as the
22615 scheduling freedom in the DFA, but no wider. Making this value too
22616 large results extra work for the scheduler. */
22619 ia32_multipass_dfa_lookahead (void)
22623 case PROCESSOR_PENTIUM:
22626 case PROCESSOR_PENTIUMPRO:
22630 case PROCESSOR_CORE2_32:
22631 case PROCESSOR_CORE2_64:
22632 case PROCESSOR_COREI7_32:
22633 case PROCESSOR_COREI7_64:
22634 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22635 as many instructions can be executed on a cycle, i.e.,
22636 issue_rate. I wonder why tuning for many CPUs does not do this. */
22637 return ix86_issue_rate ();
22646 /* Model decoder of Core 2/i7.
22647 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22648 track the instruction fetch block boundaries and make sure that long
22649 (9+ bytes) instructions are assigned to D0. */
22651 /* Maximum length of an insn that can be handled by
22652 a secondary decoder unit. '8' for Core 2/i7. */
22653 static int core2i7_secondary_decoder_max_insn_size;
22655 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22656 '16' for Core 2/i7. */
22657 static int core2i7_ifetch_block_size;
22659 /* Maximum number of instructions decoder can handle per cycle.
22660 '6' for Core 2/i7. */
22661 static int core2i7_ifetch_block_max_insns;
22663 typedef struct ix86_first_cycle_multipass_data_ *
22664 ix86_first_cycle_multipass_data_t;
22665 typedef const struct ix86_first_cycle_multipass_data_ *
22666 const_ix86_first_cycle_multipass_data_t;
22668 /* A variable to store target state across calls to max_issue within
22670 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22671 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22673 /* Initialize DATA. */
22675 core2i7_first_cycle_multipass_init (void *_data)
22677 ix86_first_cycle_multipass_data_t data
22678 = (ix86_first_cycle_multipass_data_t) _data;
22680 data->ifetch_block_len = 0;
22681 data->ifetch_block_n_insns = 0;
22682 data->ready_try_change = NULL;
22683 data->ready_try_change_size = 0;
22686 /* Advancing the cycle; reset ifetch block counts. */
22688 core2i7_dfa_post_advance_cycle (void)
22690 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22692 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22694 data->ifetch_block_len = 0;
22695 data->ifetch_block_n_insns = 0;
22698 static int min_insn_size (rtx);
22700 /* Filter out insns from ready_try that the core will not be able to issue
22701 on current cycle due to decoder. */
22703 core2i7_first_cycle_multipass_filter_ready_try
22704 (const_ix86_first_cycle_multipass_data_t data,
22705 char *ready_try, int n_ready, bool first_cycle_insn_p)
22712 if (ready_try[n_ready])
22715 insn = get_ready_element (n_ready);
22716 insn_size = min_insn_size (insn);
22718 if (/* If this is a too long an insn for a secondary decoder ... */
22719 (!first_cycle_insn_p
22720 && insn_size > core2i7_secondary_decoder_max_insn_size)
22721 /* ... or it would not fit into the ifetch block ... */
22722 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22723 /* ... or the decoder is full already ... */
22724 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22725 /* ... mask the insn out. */
22727 ready_try[n_ready] = 1;
22729 if (data->ready_try_change)
22730 SET_BIT (data->ready_try_change, n_ready);
22735 /* Prepare for a new round of multipass lookahead scheduling. */
22737 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22738 bool first_cycle_insn_p)
22740 ix86_first_cycle_multipass_data_t data
22741 = (ix86_first_cycle_multipass_data_t) _data;
22742 const_ix86_first_cycle_multipass_data_t prev_data
22743 = ix86_first_cycle_multipass_data;
22745 /* Restore the state from the end of the previous round. */
22746 data->ifetch_block_len = prev_data->ifetch_block_len;
22747 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22749 /* Filter instructions that cannot be issued on current cycle due to
22750 decoder restrictions. */
22751 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22752 first_cycle_insn_p);
22755 /* INSN is being issued in current solution. Account for its impact on
22756 the decoder model. */
22758 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22759 rtx insn, const void *_prev_data)
22761 ix86_first_cycle_multipass_data_t data
22762 = (ix86_first_cycle_multipass_data_t) _data;
22763 const_ix86_first_cycle_multipass_data_t prev_data
22764 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22766 int insn_size = min_insn_size (insn);
22768 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22769 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22770 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22771 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22773 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22774 if (!data->ready_try_change)
22776 data->ready_try_change = sbitmap_alloc (n_ready);
22777 data->ready_try_change_size = n_ready;
22779 else if (data->ready_try_change_size < n_ready)
22781 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22783 data->ready_try_change_size = n_ready;
22785 sbitmap_zero (data->ready_try_change);
22787 /* Filter out insns from ready_try that the core will not be able to issue
22788 on current cycle due to decoder. */
22789 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22793 /* Revert the effect on ready_try. */
22795 core2i7_first_cycle_multipass_backtrack (const void *_data,
22797 int n_ready ATTRIBUTE_UNUSED)
22799 const_ix86_first_cycle_multipass_data_t data
22800 = (const_ix86_first_cycle_multipass_data_t) _data;
22801 unsigned int i = 0;
22802 sbitmap_iterator sbi;
22804 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22805 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22811 /* Save the result of multipass lookahead scheduling for the next round. */
22813 core2i7_first_cycle_multipass_end (const void *_data)
22815 const_ix86_first_cycle_multipass_data_t data
22816 = (const_ix86_first_cycle_multipass_data_t) _data;
22817 ix86_first_cycle_multipass_data_t next_data
22818 = ix86_first_cycle_multipass_data;
22822 next_data->ifetch_block_len = data->ifetch_block_len;
22823 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22827 /* Deallocate target data. */
22829 core2i7_first_cycle_multipass_fini (void *_data)
22831 ix86_first_cycle_multipass_data_t data
22832 = (ix86_first_cycle_multipass_data_t) _data;
22834 if (data->ready_try_change)
22836 sbitmap_free (data->ready_try_change);
22837 data->ready_try_change = NULL;
22838 data->ready_try_change_size = 0;
22842 /* Prepare for scheduling pass. */
22844 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22845 int verbose ATTRIBUTE_UNUSED,
22846 int max_uid ATTRIBUTE_UNUSED)
22848 /* Install scheduling hooks for current CPU. Some of these hooks are used
22849 in time-critical parts of the scheduler, so we only set them up when
22850 they are actually used. */
22853 case PROCESSOR_CORE2_32:
22854 case PROCESSOR_CORE2_64:
22855 case PROCESSOR_COREI7_32:
22856 case PROCESSOR_COREI7_64:
22857 targetm.sched.dfa_post_advance_cycle
22858 = core2i7_dfa_post_advance_cycle;
22859 targetm.sched.first_cycle_multipass_init
22860 = core2i7_first_cycle_multipass_init;
22861 targetm.sched.first_cycle_multipass_begin
22862 = core2i7_first_cycle_multipass_begin;
22863 targetm.sched.first_cycle_multipass_issue
22864 = core2i7_first_cycle_multipass_issue;
22865 targetm.sched.first_cycle_multipass_backtrack
22866 = core2i7_first_cycle_multipass_backtrack;
22867 targetm.sched.first_cycle_multipass_end
22868 = core2i7_first_cycle_multipass_end;
22869 targetm.sched.first_cycle_multipass_fini
22870 = core2i7_first_cycle_multipass_fini;
22872 /* Set decoder parameters. */
22873 core2i7_secondary_decoder_max_insn_size = 8;
22874 core2i7_ifetch_block_size = 16;
22875 core2i7_ifetch_block_max_insns = 6;
22879 targetm.sched.dfa_post_advance_cycle = NULL;
22880 targetm.sched.first_cycle_multipass_init = NULL;
22881 targetm.sched.first_cycle_multipass_begin = NULL;
22882 targetm.sched.first_cycle_multipass_issue = NULL;
22883 targetm.sched.first_cycle_multipass_backtrack = NULL;
22884 targetm.sched.first_cycle_multipass_end = NULL;
22885 targetm.sched.first_cycle_multipass_fini = NULL;
22891 /* Compute the alignment given to a constant that is being placed in memory.
22892 EXP is the constant and ALIGN is the alignment that the object would
22894 The value of this function is used instead of that alignment to align
22898 ix86_constant_alignment (tree exp, int align)
22900 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22901 || TREE_CODE (exp) == INTEGER_CST)
22903 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22905 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22908 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22909 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22910 return BITS_PER_WORD;
22915 /* Compute the alignment for a static variable.
22916 TYPE is the data type, and ALIGN is the alignment that
22917 the object would ordinarily have. The value of this function is used
22918 instead of that alignment to align the object. */
22921 ix86_data_alignment (tree type, int align)
22923 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22925 if (AGGREGATE_TYPE_P (type)
22926 && TYPE_SIZE (type)
22927 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22928 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22929 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22930 && align < max_align)
22933 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22934 to 16byte boundary. */
22937 if (AGGREGATE_TYPE_P (type)
22938 && TYPE_SIZE (type)
22939 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22940 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22941 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22945 if (TREE_CODE (type) == ARRAY_TYPE)
22947 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22949 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22952 else if (TREE_CODE (type) == COMPLEX_TYPE)
22955 if (TYPE_MODE (type) == DCmode && align < 64)
22957 if ((TYPE_MODE (type) == XCmode
22958 || TYPE_MODE (type) == TCmode) && align < 128)
22961 else if ((TREE_CODE (type) == RECORD_TYPE
22962 || TREE_CODE (type) == UNION_TYPE
22963 || TREE_CODE (type) == QUAL_UNION_TYPE)
22964 && TYPE_FIELDS (type))
22966 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22968 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22971 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22972 || TREE_CODE (type) == INTEGER_TYPE)
22974 if (TYPE_MODE (type) == DFmode && align < 64)
22976 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22983 /* Compute the alignment for a local variable or a stack slot. EXP is
22984 the data type or decl itself, MODE is the widest mode available and
22985 ALIGN is the alignment that the object would ordinarily have. The
22986 value of this macro is used instead of that alignment to align the
22990 ix86_local_alignment (tree exp, enum machine_mode mode,
22991 unsigned int align)
22995 if (exp && DECL_P (exp))
22997 type = TREE_TYPE (exp);
23006 /* Don't do dynamic stack realignment for long long objects with
23007 -mpreferred-stack-boundary=2. */
23010 && ix86_preferred_stack_boundary < 64
23011 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
23012 && (!type || !TYPE_USER_ALIGN (type))
23013 && (!decl || !DECL_USER_ALIGN (decl)))
23016 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23017 register in MODE. We will return the largest alignment of XF
23021 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
23022 align = GET_MODE_ALIGNMENT (DFmode);
23026 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23027 to 16byte boundary. Exact wording is:
23029 An array uses the same alignment as its elements, except that a local or
23030 global array variable of length at least 16 bytes or
23031 a C99 variable-length array variable always has alignment of at least 16 bytes.
23033 This was added to allow use of aligned SSE instructions at arrays. This
23034 rule is meant for static storage (where compiler can not do the analysis
23035 by itself). We follow it for automatic variables only when convenient.
23036 We fully control everything in the function compiled and functions from
23037 other unit can not rely on the alignment.
23039 Exclude va_list type. It is the common case of local array where
23040 we can not benefit from the alignment. */
23041 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
23044 if (AGGREGATE_TYPE_P (type)
23045 && (va_list_type_node == NULL_TREE
23046 || (TYPE_MAIN_VARIANT (type)
23047 != TYPE_MAIN_VARIANT (va_list_type_node)))
23048 && TYPE_SIZE (type)
23049 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
23050 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
23051 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
23054 if (TREE_CODE (type) == ARRAY_TYPE)
23056 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
23058 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
23061 else if (TREE_CODE (type) == COMPLEX_TYPE)
23063 if (TYPE_MODE (type) == DCmode && align < 64)
23065 if ((TYPE_MODE (type) == XCmode
23066 || TYPE_MODE (type) == TCmode) && align < 128)
23069 else if ((TREE_CODE (type) == RECORD_TYPE
23070 || TREE_CODE (type) == UNION_TYPE
23071 || TREE_CODE (type) == QUAL_UNION_TYPE)
23072 && TYPE_FIELDS (type))
23074 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
23076 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
23079 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
23080 || TREE_CODE (type) == INTEGER_TYPE)
23083 if (TYPE_MODE (type) == DFmode && align < 64)
23085 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
23091 /* Compute the minimum required alignment for dynamic stack realignment
23092 purposes for a local variable, parameter or a stack slot. EXP is
23093 the data type or decl itself, MODE is its mode and ALIGN is the
23094 alignment that the object would ordinarily have. */
23097 ix86_minimum_alignment (tree exp, enum machine_mode mode,
23098 unsigned int align)
23102 if (exp && DECL_P (exp))
23104 type = TREE_TYPE (exp);
23113 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
23116 /* Don't do dynamic stack realignment for long long objects with
23117 -mpreferred-stack-boundary=2. */
23118 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
23119 && (!type || !TYPE_USER_ALIGN (type))
23120 && (!decl || !DECL_USER_ALIGN (decl)))
23126 /* Find a location for the static chain incoming to a nested function.
23127 This is a register, unless all free registers are used by arguments. */
23130 ix86_static_chain (const_tree fndecl, bool incoming_p)
23134 if (!DECL_STATIC_CHAIN (fndecl))
23139 /* We always use R10 in 64-bit mode. */
23145 /* By default in 32-bit mode we use ECX to pass the static chain. */
23148 fntype = TREE_TYPE (fndecl);
23149 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
23151 /* Fastcall functions use ecx/edx for arguments, which leaves
23152 us with EAX for the static chain. */
23155 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
23157 /* Thiscall functions use ecx for arguments, which leaves
23158 us with EAX for the static chain. */
23161 else if (ix86_function_regparm (fntype, fndecl) == 3)
23163 /* For regparm 3, we have no free call-clobbered registers in
23164 which to store the static chain. In order to implement this,
23165 we have the trampoline push the static chain to the stack.
23166 However, we can't push a value below the return address when
23167 we call the nested function directly, so we have to use an
23168 alternate entry point. For this we use ESI, and have the
23169 alternate entry point push ESI, so that things appear the
23170 same once we're executing the nested function. */
23173 if (fndecl == current_function_decl)
23174 ix86_static_chain_on_stack = true;
23175 return gen_frame_mem (SImode,
23176 plus_constant (arg_pointer_rtx, -8));
23182 return gen_rtx_REG (Pmode, regno);
23185 /* Emit RTL insns to initialize the variable parts of a trampoline.
23186 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23187 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23188 to be passed to the target function. */
23191 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23195 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23202 /* Depending on the static chain location, either load a register
23203 with a constant, or push the constant to the stack. All of the
23204 instructions are the same size. */
23205 chain = ix86_static_chain (fndecl, true);
23208 if (REGNO (chain) == CX_REG)
23210 else if (REGNO (chain) == AX_REG)
23213 gcc_unreachable ();
23218 mem = adjust_address (m_tramp, QImode, 0);
23219 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23221 mem = adjust_address (m_tramp, SImode, 1);
23222 emit_move_insn (mem, chain_value);
23224 /* Compute offset from the end of the jmp to the target function.
23225 In the case in which the trampoline stores the static chain on
23226 the stack, we need to skip the first insn which pushes the
23227 (call-saved) register static chain; this push is 1 byte. */
23228 disp = expand_binop (SImode, sub_optab, fnaddr,
23229 plus_constant (XEXP (m_tramp, 0),
23230 MEM_P (chain) ? 9 : 10),
23231 NULL_RTX, 1, OPTAB_DIRECT);
23233 mem = adjust_address (m_tramp, QImode, 5);
23234 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23236 mem = adjust_address (m_tramp, SImode, 6);
23237 emit_move_insn (mem, disp);
23243 /* Load the function address to r11. Try to load address using
23244 the shorter movl instead of movabs. We may want to support
23245 movq for kernel mode, but kernel does not use trampolines at
23247 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23249 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23251 mem = adjust_address (m_tramp, HImode, offset);
23252 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23254 mem = adjust_address (m_tramp, SImode, offset + 2);
23255 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23260 mem = adjust_address (m_tramp, HImode, offset);
23261 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23263 mem = adjust_address (m_tramp, DImode, offset + 2);
23264 emit_move_insn (mem, fnaddr);
23268 /* Load static chain using movabs to r10. */
23269 mem = adjust_address (m_tramp, HImode, offset);
23270 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23272 mem = adjust_address (m_tramp, DImode, offset + 2);
23273 emit_move_insn (mem, chain_value);
23276 /* Jump to r11; the last (unused) byte is a nop, only there to
23277 pad the write out to a single 32-bit store. */
23278 mem = adjust_address (m_tramp, SImode, offset);
23279 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23282 gcc_assert (offset <= TRAMPOLINE_SIZE);
23285 #ifdef ENABLE_EXECUTE_STACK
23286 #ifdef CHECK_EXECUTE_STACK_ENABLED
23287 if (CHECK_EXECUTE_STACK_ENABLED)
23289 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23290 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23294 /* The following file contains several enumerations and data structures
23295 built from the definitions in i386-builtin-types.def. */
23297 #include "i386-builtin-types.inc"
23299 /* Table for the ix86 builtin non-function types. */
23300 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23302 /* Retrieve an element from the above table, building some of
23303 the types lazily. */
23306 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23308 unsigned int index;
23311 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23313 type = ix86_builtin_type_tab[(int) tcode];
23317 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23318 if (tcode <= IX86_BT_LAST_VECT)
23320 enum machine_mode mode;
23322 index = tcode - IX86_BT_LAST_PRIM - 1;
23323 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23324 mode = ix86_builtin_type_vect_mode[index];
23326 type = build_vector_type_for_mode (itype, mode);
23332 index = tcode - IX86_BT_LAST_VECT - 1;
23333 if (tcode <= IX86_BT_LAST_PTR)
23334 quals = TYPE_UNQUALIFIED;
23336 quals = TYPE_QUAL_CONST;
23338 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23339 if (quals != TYPE_UNQUALIFIED)
23340 itype = build_qualified_type (itype, quals);
23342 type = build_pointer_type (itype);
23345 ix86_builtin_type_tab[(int) tcode] = type;
23349 /* Table for the ix86 builtin function types. */
23350 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23352 /* Retrieve an element from the above table, building some of
23353 the types lazily. */
23356 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23360 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23362 type = ix86_builtin_func_type_tab[(int) tcode];
23366 if (tcode <= IX86_BT_LAST_FUNC)
23368 unsigned start = ix86_builtin_func_start[(int) tcode];
23369 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23370 tree rtype, atype, args = void_list_node;
23373 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23374 for (i = after - 1; i > start; --i)
23376 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23377 args = tree_cons (NULL, atype, args);
23380 type = build_function_type (rtype, args);
23384 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23385 enum ix86_builtin_func_type icode;
23387 icode = ix86_builtin_func_alias_base[index];
23388 type = ix86_get_builtin_func_type (icode);
23391 ix86_builtin_func_type_tab[(int) tcode] = type;
23396 /* Codes for all the SSE/MMX builtins. */
23399 IX86_BUILTIN_ADDPS,
23400 IX86_BUILTIN_ADDSS,
23401 IX86_BUILTIN_DIVPS,
23402 IX86_BUILTIN_DIVSS,
23403 IX86_BUILTIN_MULPS,
23404 IX86_BUILTIN_MULSS,
23405 IX86_BUILTIN_SUBPS,
23406 IX86_BUILTIN_SUBSS,
23408 IX86_BUILTIN_CMPEQPS,
23409 IX86_BUILTIN_CMPLTPS,
23410 IX86_BUILTIN_CMPLEPS,
23411 IX86_BUILTIN_CMPGTPS,
23412 IX86_BUILTIN_CMPGEPS,
23413 IX86_BUILTIN_CMPNEQPS,
23414 IX86_BUILTIN_CMPNLTPS,
23415 IX86_BUILTIN_CMPNLEPS,
23416 IX86_BUILTIN_CMPNGTPS,
23417 IX86_BUILTIN_CMPNGEPS,
23418 IX86_BUILTIN_CMPORDPS,
23419 IX86_BUILTIN_CMPUNORDPS,
23420 IX86_BUILTIN_CMPEQSS,
23421 IX86_BUILTIN_CMPLTSS,
23422 IX86_BUILTIN_CMPLESS,
23423 IX86_BUILTIN_CMPNEQSS,
23424 IX86_BUILTIN_CMPNLTSS,
23425 IX86_BUILTIN_CMPNLESS,
23426 IX86_BUILTIN_CMPNGTSS,
23427 IX86_BUILTIN_CMPNGESS,
23428 IX86_BUILTIN_CMPORDSS,
23429 IX86_BUILTIN_CMPUNORDSS,
23431 IX86_BUILTIN_COMIEQSS,
23432 IX86_BUILTIN_COMILTSS,
23433 IX86_BUILTIN_COMILESS,
23434 IX86_BUILTIN_COMIGTSS,
23435 IX86_BUILTIN_COMIGESS,
23436 IX86_BUILTIN_COMINEQSS,
23437 IX86_BUILTIN_UCOMIEQSS,
23438 IX86_BUILTIN_UCOMILTSS,
23439 IX86_BUILTIN_UCOMILESS,
23440 IX86_BUILTIN_UCOMIGTSS,
23441 IX86_BUILTIN_UCOMIGESS,
23442 IX86_BUILTIN_UCOMINEQSS,
23444 IX86_BUILTIN_CVTPI2PS,
23445 IX86_BUILTIN_CVTPS2PI,
23446 IX86_BUILTIN_CVTSI2SS,
23447 IX86_BUILTIN_CVTSI642SS,
23448 IX86_BUILTIN_CVTSS2SI,
23449 IX86_BUILTIN_CVTSS2SI64,
23450 IX86_BUILTIN_CVTTPS2PI,
23451 IX86_BUILTIN_CVTTSS2SI,
23452 IX86_BUILTIN_CVTTSS2SI64,
23454 IX86_BUILTIN_MAXPS,
23455 IX86_BUILTIN_MAXSS,
23456 IX86_BUILTIN_MINPS,
23457 IX86_BUILTIN_MINSS,
23459 IX86_BUILTIN_LOADUPS,
23460 IX86_BUILTIN_STOREUPS,
23461 IX86_BUILTIN_MOVSS,
23463 IX86_BUILTIN_MOVHLPS,
23464 IX86_BUILTIN_MOVLHPS,
23465 IX86_BUILTIN_LOADHPS,
23466 IX86_BUILTIN_LOADLPS,
23467 IX86_BUILTIN_STOREHPS,
23468 IX86_BUILTIN_STORELPS,
23470 IX86_BUILTIN_MASKMOVQ,
23471 IX86_BUILTIN_MOVMSKPS,
23472 IX86_BUILTIN_PMOVMSKB,
23474 IX86_BUILTIN_MOVNTPS,
23475 IX86_BUILTIN_MOVNTQ,
23477 IX86_BUILTIN_LOADDQU,
23478 IX86_BUILTIN_STOREDQU,
23480 IX86_BUILTIN_PACKSSWB,
23481 IX86_BUILTIN_PACKSSDW,
23482 IX86_BUILTIN_PACKUSWB,
23484 IX86_BUILTIN_PADDB,
23485 IX86_BUILTIN_PADDW,
23486 IX86_BUILTIN_PADDD,
23487 IX86_BUILTIN_PADDQ,
23488 IX86_BUILTIN_PADDSB,
23489 IX86_BUILTIN_PADDSW,
23490 IX86_BUILTIN_PADDUSB,
23491 IX86_BUILTIN_PADDUSW,
23492 IX86_BUILTIN_PSUBB,
23493 IX86_BUILTIN_PSUBW,
23494 IX86_BUILTIN_PSUBD,
23495 IX86_BUILTIN_PSUBQ,
23496 IX86_BUILTIN_PSUBSB,
23497 IX86_BUILTIN_PSUBSW,
23498 IX86_BUILTIN_PSUBUSB,
23499 IX86_BUILTIN_PSUBUSW,
23502 IX86_BUILTIN_PANDN,
23506 IX86_BUILTIN_PAVGB,
23507 IX86_BUILTIN_PAVGW,
23509 IX86_BUILTIN_PCMPEQB,
23510 IX86_BUILTIN_PCMPEQW,
23511 IX86_BUILTIN_PCMPEQD,
23512 IX86_BUILTIN_PCMPGTB,
23513 IX86_BUILTIN_PCMPGTW,
23514 IX86_BUILTIN_PCMPGTD,
23516 IX86_BUILTIN_PMADDWD,
23518 IX86_BUILTIN_PMAXSW,
23519 IX86_BUILTIN_PMAXUB,
23520 IX86_BUILTIN_PMINSW,
23521 IX86_BUILTIN_PMINUB,
23523 IX86_BUILTIN_PMULHUW,
23524 IX86_BUILTIN_PMULHW,
23525 IX86_BUILTIN_PMULLW,
23527 IX86_BUILTIN_PSADBW,
23528 IX86_BUILTIN_PSHUFW,
23530 IX86_BUILTIN_PSLLW,
23531 IX86_BUILTIN_PSLLD,
23532 IX86_BUILTIN_PSLLQ,
23533 IX86_BUILTIN_PSRAW,
23534 IX86_BUILTIN_PSRAD,
23535 IX86_BUILTIN_PSRLW,
23536 IX86_BUILTIN_PSRLD,
23537 IX86_BUILTIN_PSRLQ,
23538 IX86_BUILTIN_PSLLWI,
23539 IX86_BUILTIN_PSLLDI,
23540 IX86_BUILTIN_PSLLQI,
23541 IX86_BUILTIN_PSRAWI,
23542 IX86_BUILTIN_PSRADI,
23543 IX86_BUILTIN_PSRLWI,
23544 IX86_BUILTIN_PSRLDI,
23545 IX86_BUILTIN_PSRLQI,
23547 IX86_BUILTIN_PUNPCKHBW,
23548 IX86_BUILTIN_PUNPCKHWD,
23549 IX86_BUILTIN_PUNPCKHDQ,
23550 IX86_BUILTIN_PUNPCKLBW,
23551 IX86_BUILTIN_PUNPCKLWD,
23552 IX86_BUILTIN_PUNPCKLDQ,
23554 IX86_BUILTIN_SHUFPS,
23556 IX86_BUILTIN_RCPPS,
23557 IX86_BUILTIN_RCPSS,
23558 IX86_BUILTIN_RSQRTPS,
23559 IX86_BUILTIN_RSQRTPS_NR,
23560 IX86_BUILTIN_RSQRTSS,
23561 IX86_BUILTIN_RSQRTF,
23562 IX86_BUILTIN_SQRTPS,
23563 IX86_BUILTIN_SQRTPS_NR,
23564 IX86_BUILTIN_SQRTSS,
23566 IX86_BUILTIN_UNPCKHPS,
23567 IX86_BUILTIN_UNPCKLPS,
23569 IX86_BUILTIN_ANDPS,
23570 IX86_BUILTIN_ANDNPS,
23572 IX86_BUILTIN_XORPS,
23575 IX86_BUILTIN_LDMXCSR,
23576 IX86_BUILTIN_STMXCSR,
23577 IX86_BUILTIN_SFENCE,
23579 /* 3DNow! Original */
23580 IX86_BUILTIN_FEMMS,
23581 IX86_BUILTIN_PAVGUSB,
23582 IX86_BUILTIN_PF2ID,
23583 IX86_BUILTIN_PFACC,
23584 IX86_BUILTIN_PFADD,
23585 IX86_BUILTIN_PFCMPEQ,
23586 IX86_BUILTIN_PFCMPGE,
23587 IX86_BUILTIN_PFCMPGT,
23588 IX86_BUILTIN_PFMAX,
23589 IX86_BUILTIN_PFMIN,
23590 IX86_BUILTIN_PFMUL,
23591 IX86_BUILTIN_PFRCP,
23592 IX86_BUILTIN_PFRCPIT1,
23593 IX86_BUILTIN_PFRCPIT2,
23594 IX86_BUILTIN_PFRSQIT1,
23595 IX86_BUILTIN_PFRSQRT,
23596 IX86_BUILTIN_PFSUB,
23597 IX86_BUILTIN_PFSUBR,
23598 IX86_BUILTIN_PI2FD,
23599 IX86_BUILTIN_PMULHRW,
23601 /* 3DNow! Athlon Extensions */
23602 IX86_BUILTIN_PF2IW,
23603 IX86_BUILTIN_PFNACC,
23604 IX86_BUILTIN_PFPNACC,
23605 IX86_BUILTIN_PI2FW,
23606 IX86_BUILTIN_PSWAPDSI,
23607 IX86_BUILTIN_PSWAPDSF,
23610 IX86_BUILTIN_ADDPD,
23611 IX86_BUILTIN_ADDSD,
23612 IX86_BUILTIN_DIVPD,
23613 IX86_BUILTIN_DIVSD,
23614 IX86_BUILTIN_MULPD,
23615 IX86_BUILTIN_MULSD,
23616 IX86_BUILTIN_SUBPD,
23617 IX86_BUILTIN_SUBSD,
23619 IX86_BUILTIN_CMPEQPD,
23620 IX86_BUILTIN_CMPLTPD,
23621 IX86_BUILTIN_CMPLEPD,
23622 IX86_BUILTIN_CMPGTPD,
23623 IX86_BUILTIN_CMPGEPD,
23624 IX86_BUILTIN_CMPNEQPD,
23625 IX86_BUILTIN_CMPNLTPD,
23626 IX86_BUILTIN_CMPNLEPD,
23627 IX86_BUILTIN_CMPNGTPD,
23628 IX86_BUILTIN_CMPNGEPD,
23629 IX86_BUILTIN_CMPORDPD,
23630 IX86_BUILTIN_CMPUNORDPD,
23631 IX86_BUILTIN_CMPEQSD,
23632 IX86_BUILTIN_CMPLTSD,
23633 IX86_BUILTIN_CMPLESD,
23634 IX86_BUILTIN_CMPNEQSD,
23635 IX86_BUILTIN_CMPNLTSD,
23636 IX86_BUILTIN_CMPNLESD,
23637 IX86_BUILTIN_CMPORDSD,
23638 IX86_BUILTIN_CMPUNORDSD,
23640 IX86_BUILTIN_COMIEQSD,
23641 IX86_BUILTIN_COMILTSD,
23642 IX86_BUILTIN_COMILESD,
23643 IX86_BUILTIN_COMIGTSD,
23644 IX86_BUILTIN_COMIGESD,
23645 IX86_BUILTIN_COMINEQSD,
23646 IX86_BUILTIN_UCOMIEQSD,
23647 IX86_BUILTIN_UCOMILTSD,
23648 IX86_BUILTIN_UCOMILESD,
23649 IX86_BUILTIN_UCOMIGTSD,
23650 IX86_BUILTIN_UCOMIGESD,
23651 IX86_BUILTIN_UCOMINEQSD,
23653 IX86_BUILTIN_MAXPD,
23654 IX86_BUILTIN_MAXSD,
23655 IX86_BUILTIN_MINPD,
23656 IX86_BUILTIN_MINSD,
23658 IX86_BUILTIN_ANDPD,
23659 IX86_BUILTIN_ANDNPD,
23661 IX86_BUILTIN_XORPD,
23663 IX86_BUILTIN_SQRTPD,
23664 IX86_BUILTIN_SQRTSD,
23666 IX86_BUILTIN_UNPCKHPD,
23667 IX86_BUILTIN_UNPCKLPD,
23669 IX86_BUILTIN_SHUFPD,
23671 IX86_BUILTIN_LOADUPD,
23672 IX86_BUILTIN_STOREUPD,
23673 IX86_BUILTIN_MOVSD,
23675 IX86_BUILTIN_LOADHPD,
23676 IX86_BUILTIN_LOADLPD,
23678 IX86_BUILTIN_CVTDQ2PD,
23679 IX86_BUILTIN_CVTDQ2PS,
23681 IX86_BUILTIN_CVTPD2DQ,
23682 IX86_BUILTIN_CVTPD2PI,
23683 IX86_BUILTIN_CVTPD2PS,
23684 IX86_BUILTIN_CVTTPD2DQ,
23685 IX86_BUILTIN_CVTTPD2PI,
23687 IX86_BUILTIN_CVTPI2PD,
23688 IX86_BUILTIN_CVTSI2SD,
23689 IX86_BUILTIN_CVTSI642SD,
23691 IX86_BUILTIN_CVTSD2SI,
23692 IX86_BUILTIN_CVTSD2SI64,
23693 IX86_BUILTIN_CVTSD2SS,
23694 IX86_BUILTIN_CVTSS2SD,
23695 IX86_BUILTIN_CVTTSD2SI,
23696 IX86_BUILTIN_CVTTSD2SI64,
23698 IX86_BUILTIN_CVTPS2DQ,
23699 IX86_BUILTIN_CVTPS2PD,
23700 IX86_BUILTIN_CVTTPS2DQ,
23702 IX86_BUILTIN_MOVNTI,
23703 IX86_BUILTIN_MOVNTPD,
23704 IX86_BUILTIN_MOVNTDQ,
23706 IX86_BUILTIN_MOVQ128,
23709 IX86_BUILTIN_MASKMOVDQU,
23710 IX86_BUILTIN_MOVMSKPD,
23711 IX86_BUILTIN_PMOVMSKB128,
23713 IX86_BUILTIN_PACKSSWB128,
23714 IX86_BUILTIN_PACKSSDW128,
23715 IX86_BUILTIN_PACKUSWB128,
23717 IX86_BUILTIN_PADDB128,
23718 IX86_BUILTIN_PADDW128,
23719 IX86_BUILTIN_PADDD128,
23720 IX86_BUILTIN_PADDQ128,
23721 IX86_BUILTIN_PADDSB128,
23722 IX86_BUILTIN_PADDSW128,
23723 IX86_BUILTIN_PADDUSB128,
23724 IX86_BUILTIN_PADDUSW128,
23725 IX86_BUILTIN_PSUBB128,
23726 IX86_BUILTIN_PSUBW128,
23727 IX86_BUILTIN_PSUBD128,
23728 IX86_BUILTIN_PSUBQ128,
23729 IX86_BUILTIN_PSUBSB128,
23730 IX86_BUILTIN_PSUBSW128,
23731 IX86_BUILTIN_PSUBUSB128,
23732 IX86_BUILTIN_PSUBUSW128,
23734 IX86_BUILTIN_PAND128,
23735 IX86_BUILTIN_PANDN128,
23736 IX86_BUILTIN_POR128,
23737 IX86_BUILTIN_PXOR128,
23739 IX86_BUILTIN_PAVGB128,
23740 IX86_BUILTIN_PAVGW128,
23742 IX86_BUILTIN_PCMPEQB128,
23743 IX86_BUILTIN_PCMPEQW128,
23744 IX86_BUILTIN_PCMPEQD128,
23745 IX86_BUILTIN_PCMPGTB128,
23746 IX86_BUILTIN_PCMPGTW128,
23747 IX86_BUILTIN_PCMPGTD128,
23749 IX86_BUILTIN_PMADDWD128,
23751 IX86_BUILTIN_PMAXSW128,
23752 IX86_BUILTIN_PMAXUB128,
23753 IX86_BUILTIN_PMINSW128,
23754 IX86_BUILTIN_PMINUB128,
23756 IX86_BUILTIN_PMULUDQ,
23757 IX86_BUILTIN_PMULUDQ128,
23758 IX86_BUILTIN_PMULHUW128,
23759 IX86_BUILTIN_PMULHW128,
23760 IX86_BUILTIN_PMULLW128,
23762 IX86_BUILTIN_PSADBW128,
23763 IX86_BUILTIN_PSHUFHW,
23764 IX86_BUILTIN_PSHUFLW,
23765 IX86_BUILTIN_PSHUFD,
23767 IX86_BUILTIN_PSLLDQI128,
23768 IX86_BUILTIN_PSLLWI128,
23769 IX86_BUILTIN_PSLLDI128,
23770 IX86_BUILTIN_PSLLQI128,
23771 IX86_BUILTIN_PSRAWI128,
23772 IX86_BUILTIN_PSRADI128,
23773 IX86_BUILTIN_PSRLDQI128,
23774 IX86_BUILTIN_PSRLWI128,
23775 IX86_BUILTIN_PSRLDI128,
23776 IX86_BUILTIN_PSRLQI128,
23778 IX86_BUILTIN_PSLLDQ128,
23779 IX86_BUILTIN_PSLLW128,
23780 IX86_BUILTIN_PSLLD128,
23781 IX86_BUILTIN_PSLLQ128,
23782 IX86_BUILTIN_PSRAW128,
23783 IX86_BUILTIN_PSRAD128,
23784 IX86_BUILTIN_PSRLW128,
23785 IX86_BUILTIN_PSRLD128,
23786 IX86_BUILTIN_PSRLQ128,
23788 IX86_BUILTIN_PUNPCKHBW128,
23789 IX86_BUILTIN_PUNPCKHWD128,
23790 IX86_BUILTIN_PUNPCKHDQ128,
23791 IX86_BUILTIN_PUNPCKHQDQ128,
23792 IX86_BUILTIN_PUNPCKLBW128,
23793 IX86_BUILTIN_PUNPCKLWD128,
23794 IX86_BUILTIN_PUNPCKLDQ128,
23795 IX86_BUILTIN_PUNPCKLQDQ128,
23797 IX86_BUILTIN_CLFLUSH,
23798 IX86_BUILTIN_MFENCE,
23799 IX86_BUILTIN_LFENCE,
23801 IX86_BUILTIN_BSRSI,
23802 IX86_BUILTIN_BSRDI,
23803 IX86_BUILTIN_RDPMC,
23804 IX86_BUILTIN_RDTSC,
23805 IX86_BUILTIN_RDTSCP,
23806 IX86_BUILTIN_ROLQI,
23807 IX86_BUILTIN_ROLHI,
23808 IX86_BUILTIN_RORQI,
23809 IX86_BUILTIN_RORHI,
23812 IX86_BUILTIN_ADDSUBPS,
23813 IX86_BUILTIN_HADDPS,
23814 IX86_BUILTIN_HSUBPS,
23815 IX86_BUILTIN_MOVSHDUP,
23816 IX86_BUILTIN_MOVSLDUP,
23817 IX86_BUILTIN_ADDSUBPD,
23818 IX86_BUILTIN_HADDPD,
23819 IX86_BUILTIN_HSUBPD,
23820 IX86_BUILTIN_LDDQU,
23822 IX86_BUILTIN_MONITOR,
23823 IX86_BUILTIN_MWAIT,
23826 IX86_BUILTIN_PHADDW,
23827 IX86_BUILTIN_PHADDD,
23828 IX86_BUILTIN_PHADDSW,
23829 IX86_BUILTIN_PHSUBW,
23830 IX86_BUILTIN_PHSUBD,
23831 IX86_BUILTIN_PHSUBSW,
23832 IX86_BUILTIN_PMADDUBSW,
23833 IX86_BUILTIN_PMULHRSW,
23834 IX86_BUILTIN_PSHUFB,
23835 IX86_BUILTIN_PSIGNB,
23836 IX86_BUILTIN_PSIGNW,
23837 IX86_BUILTIN_PSIGND,
23838 IX86_BUILTIN_PALIGNR,
23839 IX86_BUILTIN_PABSB,
23840 IX86_BUILTIN_PABSW,
23841 IX86_BUILTIN_PABSD,
23843 IX86_BUILTIN_PHADDW128,
23844 IX86_BUILTIN_PHADDD128,
23845 IX86_BUILTIN_PHADDSW128,
23846 IX86_BUILTIN_PHSUBW128,
23847 IX86_BUILTIN_PHSUBD128,
23848 IX86_BUILTIN_PHSUBSW128,
23849 IX86_BUILTIN_PMADDUBSW128,
23850 IX86_BUILTIN_PMULHRSW128,
23851 IX86_BUILTIN_PSHUFB128,
23852 IX86_BUILTIN_PSIGNB128,
23853 IX86_BUILTIN_PSIGNW128,
23854 IX86_BUILTIN_PSIGND128,
23855 IX86_BUILTIN_PALIGNR128,
23856 IX86_BUILTIN_PABSB128,
23857 IX86_BUILTIN_PABSW128,
23858 IX86_BUILTIN_PABSD128,
23860 /* AMDFAM10 - SSE4A New Instructions. */
23861 IX86_BUILTIN_MOVNTSD,
23862 IX86_BUILTIN_MOVNTSS,
23863 IX86_BUILTIN_EXTRQI,
23864 IX86_BUILTIN_EXTRQ,
23865 IX86_BUILTIN_INSERTQI,
23866 IX86_BUILTIN_INSERTQ,
23869 IX86_BUILTIN_BLENDPD,
23870 IX86_BUILTIN_BLENDPS,
23871 IX86_BUILTIN_BLENDVPD,
23872 IX86_BUILTIN_BLENDVPS,
23873 IX86_BUILTIN_PBLENDVB128,
23874 IX86_BUILTIN_PBLENDW128,
23879 IX86_BUILTIN_INSERTPS128,
23881 IX86_BUILTIN_MOVNTDQA,
23882 IX86_BUILTIN_MPSADBW128,
23883 IX86_BUILTIN_PACKUSDW128,
23884 IX86_BUILTIN_PCMPEQQ,
23885 IX86_BUILTIN_PHMINPOSUW128,
23887 IX86_BUILTIN_PMAXSB128,
23888 IX86_BUILTIN_PMAXSD128,
23889 IX86_BUILTIN_PMAXUD128,
23890 IX86_BUILTIN_PMAXUW128,
23892 IX86_BUILTIN_PMINSB128,
23893 IX86_BUILTIN_PMINSD128,
23894 IX86_BUILTIN_PMINUD128,
23895 IX86_BUILTIN_PMINUW128,
23897 IX86_BUILTIN_PMOVSXBW128,
23898 IX86_BUILTIN_PMOVSXBD128,
23899 IX86_BUILTIN_PMOVSXBQ128,
23900 IX86_BUILTIN_PMOVSXWD128,
23901 IX86_BUILTIN_PMOVSXWQ128,
23902 IX86_BUILTIN_PMOVSXDQ128,
23904 IX86_BUILTIN_PMOVZXBW128,
23905 IX86_BUILTIN_PMOVZXBD128,
23906 IX86_BUILTIN_PMOVZXBQ128,
23907 IX86_BUILTIN_PMOVZXWD128,
23908 IX86_BUILTIN_PMOVZXWQ128,
23909 IX86_BUILTIN_PMOVZXDQ128,
23911 IX86_BUILTIN_PMULDQ128,
23912 IX86_BUILTIN_PMULLD128,
23914 IX86_BUILTIN_ROUNDPD,
23915 IX86_BUILTIN_ROUNDPS,
23916 IX86_BUILTIN_ROUNDSD,
23917 IX86_BUILTIN_ROUNDSS,
23919 IX86_BUILTIN_FLOORPD,
23920 IX86_BUILTIN_CEILPD,
23921 IX86_BUILTIN_TRUNCPD,
23922 IX86_BUILTIN_RINTPD,
23923 IX86_BUILTIN_FLOORPS,
23924 IX86_BUILTIN_CEILPS,
23925 IX86_BUILTIN_TRUNCPS,
23926 IX86_BUILTIN_RINTPS,
23928 IX86_BUILTIN_PTESTZ,
23929 IX86_BUILTIN_PTESTC,
23930 IX86_BUILTIN_PTESTNZC,
23932 IX86_BUILTIN_VEC_INIT_V2SI,
23933 IX86_BUILTIN_VEC_INIT_V4HI,
23934 IX86_BUILTIN_VEC_INIT_V8QI,
23935 IX86_BUILTIN_VEC_EXT_V2DF,
23936 IX86_BUILTIN_VEC_EXT_V2DI,
23937 IX86_BUILTIN_VEC_EXT_V4SF,
23938 IX86_BUILTIN_VEC_EXT_V4SI,
23939 IX86_BUILTIN_VEC_EXT_V8HI,
23940 IX86_BUILTIN_VEC_EXT_V2SI,
23941 IX86_BUILTIN_VEC_EXT_V4HI,
23942 IX86_BUILTIN_VEC_EXT_V16QI,
23943 IX86_BUILTIN_VEC_SET_V2DI,
23944 IX86_BUILTIN_VEC_SET_V4SF,
23945 IX86_BUILTIN_VEC_SET_V4SI,
23946 IX86_BUILTIN_VEC_SET_V8HI,
23947 IX86_BUILTIN_VEC_SET_V4HI,
23948 IX86_BUILTIN_VEC_SET_V16QI,
23950 IX86_BUILTIN_VEC_PACK_SFIX,
23953 IX86_BUILTIN_CRC32QI,
23954 IX86_BUILTIN_CRC32HI,
23955 IX86_BUILTIN_CRC32SI,
23956 IX86_BUILTIN_CRC32DI,
23958 IX86_BUILTIN_PCMPESTRI128,
23959 IX86_BUILTIN_PCMPESTRM128,
23960 IX86_BUILTIN_PCMPESTRA128,
23961 IX86_BUILTIN_PCMPESTRC128,
23962 IX86_BUILTIN_PCMPESTRO128,
23963 IX86_BUILTIN_PCMPESTRS128,
23964 IX86_BUILTIN_PCMPESTRZ128,
23965 IX86_BUILTIN_PCMPISTRI128,
23966 IX86_BUILTIN_PCMPISTRM128,
23967 IX86_BUILTIN_PCMPISTRA128,
23968 IX86_BUILTIN_PCMPISTRC128,
23969 IX86_BUILTIN_PCMPISTRO128,
23970 IX86_BUILTIN_PCMPISTRS128,
23971 IX86_BUILTIN_PCMPISTRZ128,
23973 IX86_BUILTIN_PCMPGTQ,
23975 /* AES instructions */
23976 IX86_BUILTIN_AESENC128,
23977 IX86_BUILTIN_AESENCLAST128,
23978 IX86_BUILTIN_AESDEC128,
23979 IX86_BUILTIN_AESDECLAST128,
23980 IX86_BUILTIN_AESIMC128,
23981 IX86_BUILTIN_AESKEYGENASSIST128,
23983 /* PCLMUL instruction */
23984 IX86_BUILTIN_PCLMULQDQ128,
23987 IX86_BUILTIN_ADDPD256,
23988 IX86_BUILTIN_ADDPS256,
23989 IX86_BUILTIN_ADDSUBPD256,
23990 IX86_BUILTIN_ADDSUBPS256,
23991 IX86_BUILTIN_ANDPD256,
23992 IX86_BUILTIN_ANDPS256,
23993 IX86_BUILTIN_ANDNPD256,
23994 IX86_BUILTIN_ANDNPS256,
23995 IX86_BUILTIN_BLENDPD256,
23996 IX86_BUILTIN_BLENDPS256,
23997 IX86_BUILTIN_BLENDVPD256,
23998 IX86_BUILTIN_BLENDVPS256,
23999 IX86_BUILTIN_DIVPD256,
24000 IX86_BUILTIN_DIVPS256,
24001 IX86_BUILTIN_DPPS256,
24002 IX86_BUILTIN_HADDPD256,
24003 IX86_BUILTIN_HADDPS256,
24004 IX86_BUILTIN_HSUBPD256,
24005 IX86_BUILTIN_HSUBPS256,
24006 IX86_BUILTIN_MAXPD256,
24007 IX86_BUILTIN_MAXPS256,
24008 IX86_BUILTIN_MINPD256,
24009 IX86_BUILTIN_MINPS256,
24010 IX86_BUILTIN_MULPD256,
24011 IX86_BUILTIN_MULPS256,
24012 IX86_BUILTIN_ORPD256,
24013 IX86_BUILTIN_ORPS256,
24014 IX86_BUILTIN_SHUFPD256,
24015 IX86_BUILTIN_SHUFPS256,
24016 IX86_BUILTIN_SUBPD256,
24017 IX86_BUILTIN_SUBPS256,
24018 IX86_BUILTIN_XORPD256,
24019 IX86_BUILTIN_XORPS256,
24020 IX86_BUILTIN_CMPSD,
24021 IX86_BUILTIN_CMPSS,
24022 IX86_BUILTIN_CMPPD,
24023 IX86_BUILTIN_CMPPS,
24024 IX86_BUILTIN_CMPPD256,
24025 IX86_BUILTIN_CMPPS256,
24026 IX86_BUILTIN_CVTDQ2PD256,
24027 IX86_BUILTIN_CVTDQ2PS256,
24028 IX86_BUILTIN_CVTPD2PS256,
24029 IX86_BUILTIN_CVTPS2DQ256,
24030 IX86_BUILTIN_CVTPS2PD256,
24031 IX86_BUILTIN_CVTTPD2DQ256,
24032 IX86_BUILTIN_CVTPD2DQ256,
24033 IX86_BUILTIN_CVTTPS2DQ256,
24034 IX86_BUILTIN_EXTRACTF128PD256,
24035 IX86_BUILTIN_EXTRACTF128PS256,
24036 IX86_BUILTIN_EXTRACTF128SI256,
24037 IX86_BUILTIN_VZEROALL,
24038 IX86_BUILTIN_VZEROUPPER,
24039 IX86_BUILTIN_VPERMILVARPD,
24040 IX86_BUILTIN_VPERMILVARPS,
24041 IX86_BUILTIN_VPERMILVARPD256,
24042 IX86_BUILTIN_VPERMILVARPS256,
24043 IX86_BUILTIN_VPERMILPD,
24044 IX86_BUILTIN_VPERMILPS,
24045 IX86_BUILTIN_VPERMILPD256,
24046 IX86_BUILTIN_VPERMILPS256,
24047 IX86_BUILTIN_VPERMIL2PD,
24048 IX86_BUILTIN_VPERMIL2PS,
24049 IX86_BUILTIN_VPERMIL2PD256,
24050 IX86_BUILTIN_VPERMIL2PS256,
24051 IX86_BUILTIN_VPERM2F128PD256,
24052 IX86_BUILTIN_VPERM2F128PS256,
24053 IX86_BUILTIN_VPERM2F128SI256,
24054 IX86_BUILTIN_VBROADCASTSS,
24055 IX86_BUILTIN_VBROADCASTSD256,
24056 IX86_BUILTIN_VBROADCASTSS256,
24057 IX86_BUILTIN_VBROADCASTPD256,
24058 IX86_BUILTIN_VBROADCASTPS256,
24059 IX86_BUILTIN_VINSERTF128PD256,
24060 IX86_BUILTIN_VINSERTF128PS256,
24061 IX86_BUILTIN_VINSERTF128SI256,
24062 IX86_BUILTIN_LOADUPD256,
24063 IX86_BUILTIN_LOADUPS256,
24064 IX86_BUILTIN_STOREUPD256,
24065 IX86_BUILTIN_STOREUPS256,
24066 IX86_BUILTIN_LDDQU256,
24067 IX86_BUILTIN_MOVNTDQ256,
24068 IX86_BUILTIN_MOVNTPD256,
24069 IX86_BUILTIN_MOVNTPS256,
24070 IX86_BUILTIN_LOADDQU256,
24071 IX86_BUILTIN_STOREDQU256,
24072 IX86_BUILTIN_MASKLOADPD,
24073 IX86_BUILTIN_MASKLOADPS,
24074 IX86_BUILTIN_MASKSTOREPD,
24075 IX86_BUILTIN_MASKSTOREPS,
24076 IX86_BUILTIN_MASKLOADPD256,
24077 IX86_BUILTIN_MASKLOADPS256,
24078 IX86_BUILTIN_MASKSTOREPD256,
24079 IX86_BUILTIN_MASKSTOREPS256,
24080 IX86_BUILTIN_MOVSHDUP256,
24081 IX86_BUILTIN_MOVSLDUP256,
24082 IX86_BUILTIN_MOVDDUP256,
24084 IX86_BUILTIN_SQRTPD256,
24085 IX86_BUILTIN_SQRTPS256,
24086 IX86_BUILTIN_SQRTPS_NR256,
24087 IX86_BUILTIN_RSQRTPS256,
24088 IX86_BUILTIN_RSQRTPS_NR256,
24090 IX86_BUILTIN_RCPPS256,
24092 IX86_BUILTIN_ROUNDPD256,
24093 IX86_BUILTIN_ROUNDPS256,
24095 IX86_BUILTIN_FLOORPD256,
24096 IX86_BUILTIN_CEILPD256,
24097 IX86_BUILTIN_TRUNCPD256,
24098 IX86_BUILTIN_RINTPD256,
24099 IX86_BUILTIN_FLOORPS256,
24100 IX86_BUILTIN_CEILPS256,
24101 IX86_BUILTIN_TRUNCPS256,
24102 IX86_BUILTIN_RINTPS256,
24104 IX86_BUILTIN_UNPCKHPD256,
24105 IX86_BUILTIN_UNPCKLPD256,
24106 IX86_BUILTIN_UNPCKHPS256,
24107 IX86_BUILTIN_UNPCKLPS256,
24109 IX86_BUILTIN_SI256_SI,
24110 IX86_BUILTIN_PS256_PS,
24111 IX86_BUILTIN_PD256_PD,
24112 IX86_BUILTIN_SI_SI256,
24113 IX86_BUILTIN_PS_PS256,
24114 IX86_BUILTIN_PD_PD256,
24116 IX86_BUILTIN_VTESTZPD,
24117 IX86_BUILTIN_VTESTCPD,
24118 IX86_BUILTIN_VTESTNZCPD,
24119 IX86_BUILTIN_VTESTZPS,
24120 IX86_BUILTIN_VTESTCPS,
24121 IX86_BUILTIN_VTESTNZCPS,
24122 IX86_BUILTIN_VTESTZPD256,
24123 IX86_BUILTIN_VTESTCPD256,
24124 IX86_BUILTIN_VTESTNZCPD256,
24125 IX86_BUILTIN_VTESTZPS256,
24126 IX86_BUILTIN_VTESTCPS256,
24127 IX86_BUILTIN_VTESTNZCPS256,
24128 IX86_BUILTIN_PTESTZ256,
24129 IX86_BUILTIN_PTESTC256,
24130 IX86_BUILTIN_PTESTNZC256,
24132 IX86_BUILTIN_MOVMSKPD256,
24133 IX86_BUILTIN_MOVMSKPS256,
24135 /* TFmode support builtins. */
24137 IX86_BUILTIN_HUGE_VALQ,
24138 IX86_BUILTIN_FABSQ,
24139 IX86_BUILTIN_COPYSIGNQ,
24141 /* Vectorizer support builtins. */
24142 IX86_BUILTIN_CPYSGNPS,
24143 IX86_BUILTIN_CPYSGNPD,
24144 IX86_BUILTIN_CPYSGNPS256,
24145 IX86_BUILTIN_CPYSGNPD256,
24147 IX86_BUILTIN_CVTUDQ2PS,
24149 IX86_BUILTIN_VEC_PERM_V2DF,
24150 IX86_BUILTIN_VEC_PERM_V4SF,
24151 IX86_BUILTIN_VEC_PERM_V2DI,
24152 IX86_BUILTIN_VEC_PERM_V4SI,
24153 IX86_BUILTIN_VEC_PERM_V8HI,
24154 IX86_BUILTIN_VEC_PERM_V16QI,
24155 IX86_BUILTIN_VEC_PERM_V2DI_U,
24156 IX86_BUILTIN_VEC_PERM_V4SI_U,
24157 IX86_BUILTIN_VEC_PERM_V8HI_U,
24158 IX86_BUILTIN_VEC_PERM_V16QI_U,
24159 IX86_BUILTIN_VEC_PERM_V4DF,
24160 IX86_BUILTIN_VEC_PERM_V8SF,
24162 /* FMA4 and XOP instructions. */
24163 IX86_BUILTIN_VFMADDSS,
24164 IX86_BUILTIN_VFMADDSD,
24165 IX86_BUILTIN_VFMADDPS,
24166 IX86_BUILTIN_VFMADDPD,
24167 IX86_BUILTIN_VFMADDPS256,
24168 IX86_BUILTIN_VFMADDPD256,
24169 IX86_BUILTIN_VFMADDSUBPS,
24170 IX86_BUILTIN_VFMADDSUBPD,
24171 IX86_BUILTIN_VFMADDSUBPS256,
24172 IX86_BUILTIN_VFMADDSUBPD256,
24174 IX86_BUILTIN_VPCMOV,
24175 IX86_BUILTIN_VPCMOV_V2DI,
24176 IX86_BUILTIN_VPCMOV_V4SI,
24177 IX86_BUILTIN_VPCMOV_V8HI,
24178 IX86_BUILTIN_VPCMOV_V16QI,
24179 IX86_BUILTIN_VPCMOV_V4SF,
24180 IX86_BUILTIN_VPCMOV_V2DF,
24181 IX86_BUILTIN_VPCMOV256,
24182 IX86_BUILTIN_VPCMOV_V4DI256,
24183 IX86_BUILTIN_VPCMOV_V8SI256,
24184 IX86_BUILTIN_VPCMOV_V16HI256,
24185 IX86_BUILTIN_VPCMOV_V32QI256,
24186 IX86_BUILTIN_VPCMOV_V8SF256,
24187 IX86_BUILTIN_VPCMOV_V4DF256,
24189 IX86_BUILTIN_VPPERM,
24191 IX86_BUILTIN_VPMACSSWW,
24192 IX86_BUILTIN_VPMACSWW,
24193 IX86_BUILTIN_VPMACSSWD,
24194 IX86_BUILTIN_VPMACSWD,
24195 IX86_BUILTIN_VPMACSSDD,
24196 IX86_BUILTIN_VPMACSDD,
24197 IX86_BUILTIN_VPMACSSDQL,
24198 IX86_BUILTIN_VPMACSSDQH,
24199 IX86_BUILTIN_VPMACSDQL,
24200 IX86_BUILTIN_VPMACSDQH,
24201 IX86_BUILTIN_VPMADCSSWD,
24202 IX86_BUILTIN_VPMADCSWD,
24204 IX86_BUILTIN_VPHADDBW,
24205 IX86_BUILTIN_VPHADDBD,
24206 IX86_BUILTIN_VPHADDBQ,
24207 IX86_BUILTIN_VPHADDWD,
24208 IX86_BUILTIN_VPHADDWQ,
24209 IX86_BUILTIN_VPHADDDQ,
24210 IX86_BUILTIN_VPHADDUBW,
24211 IX86_BUILTIN_VPHADDUBD,
24212 IX86_BUILTIN_VPHADDUBQ,
24213 IX86_BUILTIN_VPHADDUWD,
24214 IX86_BUILTIN_VPHADDUWQ,
24215 IX86_BUILTIN_VPHADDUDQ,
24216 IX86_BUILTIN_VPHSUBBW,
24217 IX86_BUILTIN_VPHSUBWD,
24218 IX86_BUILTIN_VPHSUBDQ,
24220 IX86_BUILTIN_VPROTB,
24221 IX86_BUILTIN_VPROTW,
24222 IX86_BUILTIN_VPROTD,
24223 IX86_BUILTIN_VPROTQ,
24224 IX86_BUILTIN_VPROTB_IMM,
24225 IX86_BUILTIN_VPROTW_IMM,
24226 IX86_BUILTIN_VPROTD_IMM,
24227 IX86_BUILTIN_VPROTQ_IMM,
24229 IX86_BUILTIN_VPSHLB,
24230 IX86_BUILTIN_VPSHLW,
24231 IX86_BUILTIN_VPSHLD,
24232 IX86_BUILTIN_VPSHLQ,
24233 IX86_BUILTIN_VPSHAB,
24234 IX86_BUILTIN_VPSHAW,
24235 IX86_BUILTIN_VPSHAD,
24236 IX86_BUILTIN_VPSHAQ,
24238 IX86_BUILTIN_VFRCZSS,
24239 IX86_BUILTIN_VFRCZSD,
24240 IX86_BUILTIN_VFRCZPS,
24241 IX86_BUILTIN_VFRCZPD,
24242 IX86_BUILTIN_VFRCZPS256,
24243 IX86_BUILTIN_VFRCZPD256,
24245 IX86_BUILTIN_VPCOMEQUB,
24246 IX86_BUILTIN_VPCOMNEUB,
24247 IX86_BUILTIN_VPCOMLTUB,
24248 IX86_BUILTIN_VPCOMLEUB,
24249 IX86_BUILTIN_VPCOMGTUB,
24250 IX86_BUILTIN_VPCOMGEUB,
24251 IX86_BUILTIN_VPCOMFALSEUB,
24252 IX86_BUILTIN_VPCOMTRUEUB,
24254 IX86_BUILTIN_VPCOMEQUW,
24255 IX86_BUILTIN_VPCOMNEUW,
24256 IX86_BUILTIN_VPCOMLTUW,
24257 IX86_BUILTIN_VPCOMLEUW,
24258 IX86_BUILTIN_VPCOMGTUW,
24259 IX86_BUILTIN_VPCOMGEUW,
24260 IX86_BUILTIN_VPCOMFALSEUW,
24261 IX86_BUILTIN_VPCOMTRUEUW,
24263 IX86_BUILTIN_VPCOMEQUD,
24264 IX86_BUILTIN_VPCOMNEUD,
24265 IX86_BUILTIN_VPCOMLTUD,
24266 IX86_BUILTIN_VPCOMLEUD,
24267 IX86_BUILTIN_VPCOMGTUD,
24268 IX86_BUILTIN_VPCOMGEUD,
24269 IX86_BUILTIN_VPCOMFALSEUD,
24270 IX86_BUILTIN_VPCOMTRUEUD,
24272 IX86_BUILTIN_VPCOMEQUQ,
24273 IX86_BUILTIN_VPCOMNEUQ,
24274 IX86_BUILTIN_VPCOMLTUQ,
24275 IX86_BUILTIN_VPCOMLEUQ,
24276 IX86_BUILTIN_VPCOMGTUQ,
24277 IX86_BUILTIN_VPCOMGEUQ,
24278 IX86_BUILTIN_VPCOMFALSEUQ,
24279 IX86_BUILTIN_VPCOMTRUEUQ,
24281 IX86_BUILTIN_VPCOMEQB,
24282 IX86_BUILTIN_VPCOMNEB,
24283 IX86_BUILTIN_VPCOMLTB,
24284 IX86_BUILTIN_VPCOMLEB,
24285 IX86_BUILTIN_VPCOMGTB,
24286 IX86_BUILTIN_VPCOMGEB,
24287 IX86_BUILTIN_VPCOMFALSEB,
24288 IX86_BUILTIN_VPCOMTRUEB,
24290 IX86_BUILTIN_VPCOMEQW,
24291 IX86_BUILTIN_VPCOMNEW,
24292 IX86_BUILTIN_VPCOMLTW,
24293 IX86_BUILTIN_VPCOMLEW,
24294 IX86_BUILTIN_VPCOMGTW,
24295 IX86_BUILTIN_VPCOMGEW,
24296 IX86_BUILTIN_VPCOMFALSEW,
24297 IX86_BUILTIN_VPCOMTRUEW,
24299 IX86_BUILTIN_VPCOMEQD,
24300 IX86_BUILTIN_VPCOMNED,
24301 IX86_BUILTIN_VPCOMLTD,
24302 IX86_BUILTIN_VPCOMLED,
24303 IX86_BUILTIN_VPCOMGTD,
24304 IX86_BUILTIN_VPCOMGED,
24305 IX86_BUILTIN_VPCOMFALSED,
24306 IX86_BUILTIN_VPCOMTRUED,
24308 IX86_BUILTIN_VPCOMEQQ,
24309 IX86_BUILTIN_VPCOMNEQ,
24310 IX86_BUILTIN_VPCOMLTQ,
24311 IX86_BUILTIN_VPCOMLEQ,
24312 IX86_BUILTIN_VPCOMGTQ,
24313 IX86_BUILTIN_VPCOMGEQ,
24314 IX86_BUILTIN_VPCOMFALSEQ,
24315 IX86_BUILTIN_VPCOMTRUEQ,
24317 /* LWP instructions. */
24318 IX86_BUILTIN_LLWPCB,
24319 IX86_BUILTIN_SLWPCB,
24320 IX86_BUILTIN_LWPVAL32,
24321 IX86_BUILTIN_LWPVAL64,
24322 IX86_BUILTIN_LWPINS32,
24323 IX86_BUILTIN_LWPINS64,
24327 /* BMI instructions. */
24328 IX86_BUILTIN_BEXTR32,
24329 IX86_BUILTIN_BEXTR64,
24332 /* TBM instructions. */
24333 IX86_BUILTIN_BEXTRI32,
24334 IX86_BUILTIN_BEXTRI64,
24337 /* FSGSBASE instructions. */
24338 IX86_BUILTIN_RDFSBASE32,
24339 IX86_BUILTIN_RDFSBASE64,
24340 IX86_BUILTIN_RDGSBASE32,
24341 IX86_BUILTIN_RDGSBASE64,
24342 IX86_BUILTIN_WRFSBASE32,
24343 IX86_BUILTIN_WRFSBASE64,
24344 IX86_BUILTIN_WRGSBASE32,
24345 IX86_BUILTIN_WRGSBASE64,
24347 /* RDRND instructions. */
24348 IX86_BUILTIN_RDRAND16_STEP,
24349 IX86_BUILTIN_RDRAND32_STEP,
24350 IX86_BUILTIN_RDRAND64_STEP,
24352 /* F16C instructions. */
24353 IX86_BUILTIN_CVTPH2PS,
24354 IX86_BUILTIN_CVTPH2PS256,
24355 IX86_BUILTIN_CVTPS2PH,
24356 IX86_BUILTIN_CVTPS2PH256,
24358 /* CFString built-in for darwin */
24359 IX86_BUILTIN_CFSTRING,
24364 /* Table for the ix86 builtin decls. */
24365 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24367 /* Table of all of the builtin functions that are possible with different ISA's
24368 but are waiting to be built until a function is declared to use that
24370 struct builtin_isa {
24371 const char *name; /* function name */
24372 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24373 int isa; /* isa_flags this builtin is defined for */
24374 bool const_p; /* true if the declaration is constant */
24375 bool set_and_not_built_p;
24378 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24381 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24382 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24383 function decl in the ix86_builtins array. Returns the function decl or
24384 NULL_TREE, if the builtin was not added.
24386 If the front end has a special hook for builtin functions, delay adding
24387 builtin functions that aren't in the current ISA until the ISA is changed
24388 with function specific optimization. Doing so, can save about 300K for the
24389 default compiler. When the builtin is expanded, check at that time whether
24392 If the front end doesn't have a special hook, record all builtins, even if
24393 it isn't an instruction set in the current ISA in case the user uses
24394 function specific options for a different ISA, so that we don't get scope
24395 errors if a builtin is added in the middle of a function scope. */
24398 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24399 enum ix86_builtins code)
24401 tree decl = NULL_TREE;
24403 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24405 ix86_builtins_isa[(int) code].isa = mask;
24407 mask &= ~OPTION_MASK_ISA_64BIT;
24409 || (mask & ix86_isa_flags) != 0
24410 || (lang_hooks.builtin_function
24411 == lang_hooks.builtin_function_ext_scope))
24414 tree type = ix86_get_builtin_func_type (tcode);
24415 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24417 ix86_builtins[(int) code] = decl;
24418 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24422 ix86_builtins[(int) code] = NULL_TREE;
24423 ix86_builtins_isa[(int) code].tcode = tcode;
24424 ix86_builtins_isa[(int) code].name = name;
24425 ix86_builtins_isa[(int) code].const_p = false;
24426 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24433 /* Like def_builtin, but also marks the function decl "const". */
24436 def_builtin_const (int mask, const char *name,
24437 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24439 tree decl = def_builtin (mask, name, tcode, code);
24441 TREE_READONLY (decl) = 1;
24443 ix86_builtins_isa[(int) code].const_p = true;
24448 /* Add any new builtin functions for a given ISA that may not have been
24449 declared. This saves a bit of space compared to adding all of the
24450 declarations to the tree, even if we didn't use them. */
24453 ix86_add_new_builtins (int isa)
24457 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24459 if ((ix86_builtins_isa[i].isa & isa) != 0
24460 && ix86_builtins_isa[i].set_and_not_built_p)
24464 /* Don't define the builtin again. */
24465 ix86_builtins_isa[i].set_and_not_built_p = false;
24467 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24468 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24469 type, i, BUILT_IN_MD, NULL,
24472 ix86_builtins[i] = decl;
24473 if (ix86_builtins_isa[i].const_p)
24474 TREE_READONLY (decl) = 1;
24479 /* Bits for builtin_description.flag. */
24481 /* Set when we don't support the comparison natively, and should
24482 swap_comparison in order to support it. */
24483 #define BUILTIN_DESC_SWAP_OPERANDS 1
24485 struct builtin_description
24487 const unsigned int mask;
24488 const enum insn_code icode;
24489 const char *const name;
24490 const enum ix86_builtins code;
24491 const enum rtx_code comparison;
24495 static const struct builtin_description bdesc_comi[] =
24497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24523 static const struct builtin_description bdesc_pcmpestr[] =
24526 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24527 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24528 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24529 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24530 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24531 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24532 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24535 static const struct builtin_description bdesc_pcmpistr[] =
24538 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24539 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24540 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24541 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24542 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24543 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24544 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24547 /* Special builtins with variable number of arguments. */
24548 static const struct builtin_description bdesc_special_args[] =
24550 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24551 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24554 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24557 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24569 /* SSE or 3DNow!A */
24570 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24571 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24588 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24591 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24594 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24595 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24601 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24602 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24603 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
24620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
24621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
24622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
24623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
24624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
24625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
24626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
24628 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24629 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24630 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24631 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24632 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24633 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24636 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24637 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24638 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24639 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24640 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24641 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24642 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24643 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24646 /* Builtins with variable number of arguments. */
24647 static const struct builtin_description bdesc_args[] =
24649 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24650 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24651 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24652 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24653 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24654 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24655 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24658 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24659 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24660 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24661 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24662 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24663 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24665 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24667 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24668 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24670 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24671 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24672 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24674 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24684 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24692 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24696 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24697 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24700 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24703 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24704 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24705 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24706 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24707 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24709 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24710 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24711 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24712 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24713 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24714 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24716 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24717 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24718 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24719 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24722 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24723 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24724 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24725 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24727 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24728 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24729 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24730 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24731 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24732 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24733 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24734 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24735 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24736 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24737 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24738 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24739 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24740 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24741 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24744 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24745 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24746 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24747 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24748 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24749 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24754 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24755 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24756 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24760 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24763 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24767 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24768 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24769 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24783 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24790 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24794 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24799 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24800 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24804 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24806 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24807 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24809 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24814 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24815 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24818 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24819 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24821 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24823 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24824 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24825 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24827 /* SSE MMX or 3Dnow!A */
24828 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24829 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24830 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24832 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24833 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24834 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24835 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24837 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24838 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24840 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24845 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24846 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24847 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24848 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24849 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24850 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24851 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24852 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24853 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24854 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24855 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24856 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24875 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24876 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24882 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24883 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24884 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24885 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24913 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24917 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24919 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24920 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24922 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24925 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24926 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24928 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24930 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24931 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24932 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24933 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24934 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24935 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24936 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24937 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24948 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24949 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24951 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24953 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24954 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24966 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24967 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24968 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24971 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24972 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24973 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24974 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24975 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24976 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24977 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24978 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24984 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24993 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24998 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24999 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25000 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25001 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25002 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25008 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25010 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25011 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
25013 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
25014 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
25015 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
25016 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
25018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
25019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
25022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
25024 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
25025 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
25027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25030 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25031 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
25034 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
25035 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
25037 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25038 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25039 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25040 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25041 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
25042 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
25045 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
25046 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
25047 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25048 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
25049 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
25050 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
25052 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25053 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25054 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25055 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25056 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25057 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25058 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25059 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25060 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25061 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25062 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25063 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25064 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
25065 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
25066 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25067 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25068 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25069 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25070 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25071 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
25072 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25073 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
25074 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25075 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
25078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
25079 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
25082 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25083 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
25085 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
25086 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25087 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25088 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25089 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
25090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
25091 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
25093 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25094 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25095 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25096 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25097 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25098 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25099 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
25100 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
25101 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
25102 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
25103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
25104 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
25105 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
25107 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
25108 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25110 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25111 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25113 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
25114 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25115 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
25117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
25118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
25121 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25122 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25123 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25124 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25126 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
25127 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
25128 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
25129 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
25131 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
25132 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
25133 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
25134 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
25136 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25137 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25138 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
25141 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25142 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
25143 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
25144 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25145 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25148 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
25149 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
25150 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25151 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25154 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25155 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25157 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25158 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25159 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25160 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25163 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25166 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25167 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25170 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25171 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25174 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25180 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25181 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25182 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25183 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25184 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25185 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25186 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25187 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25188 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25189 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25190 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25191 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25237 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25239 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25241 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
25249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
25250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
25251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
25253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
25254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
25255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
25256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
25258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25266 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25267 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25268 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25289 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25290 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25292 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25295 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25296 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25297 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25300 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25301 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25304 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25305 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25306 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25307 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25310 /* FMA4 and XOP. */
25311 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25312 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25313 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25314 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25315 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25316 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25317 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25318 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25319 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25320 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25321 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25322 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25323 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25324 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25325 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25326 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25327 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25328 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25329 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25330 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25331 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25332 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25333 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25334 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25335 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25336 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25337 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25338 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25339 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25340 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25341 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25342 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25343 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25344 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25345 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25346 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25347 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25348 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25349 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25350 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25351 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25352 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25353 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25354 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25355 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25356 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25357 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25358 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25359 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25360 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25361 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25362 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25364 static const struct builtin_description bdesc_multi_arg[] =
25366 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25367 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25368 UNKNOWN, (int)MULTI_ARG_3_SF },
25369 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25370 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25371 UNKNOWN, (int)MULTI_ARG_3_DF },
25373 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25374 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25375 UNKNOWN, (int)MULTI_ARG_3_SF },
25376 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25377 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25378 UNKNOWN, (int)MULTI_ARG_3_DF },
25379 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25380 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25381 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25382 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25383 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25384 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25386 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25387 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25388 UNKNOWN, (int)MULTI_ARG_3_SF },
25389 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25390 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25391 UNKNOWN, (int)MULTI_ARG_3_DF },
25392 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25393 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25394 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25395 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25396 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25397 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25559 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25560 in the current target ISA to allow the user to compile particular modules
25561 with different target specific options that differ from the command line
25564 ix86_init_mmx_sse_builtins (void)
25566 const struct builtin_description * d;
25567 enum ix86_builtin_func_type ftype;
25570 /* Add all special builtins with variable number of operands. */
25571 for (i = 0, d = bdesc_special_args;
25572 i < ARRAY_SIZE (bdesc_special_args);
25578 ftype = (enum ix86_builtin_func_type) d->flag;
25579 def_builtin (d->mask, d->name, ftype, d->code);
25582 /* Add all builtins with variable number of operands. */
25583 for (i = 0, d = bdesc_args;
25584 i < ARRAY_SIZE (bdesc_args);
25590 ftype = (enum ix86_builtin_func_type) d->flag;
25591 def_builtin_const (d->mask, d->name, ftype, d->code);
25594 /* pcmpestr[im] insns. */
25595 for (i = 0, d = bdesc_pcmpestr;
25596 i < ARRAY_SIZE (bdesc_pcmpestr);
25599 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25600 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25602 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25603 def_builtin_const (d->mask, d->name, ftype, d->code);
25606 /* pcmpistr[im] insns. */
25607 for (i = 0, d = bdesc_pcmpistr;
25608 i < ARRAY_SIZE (bdesc_pcmpistr);
25611 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25612 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25614 ftype = INT_FTYPE_V16QI_V16QI_INT;
25615 def_builtin_const (d->mask, d->name, ftype, d->code);
25618 /* comi/ucomi insns. */
25619 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25621 if (d->mask == OPTION_MASK_ISA_SSE2)
25622 ftype = INT_FTYPE_V2DF_V2DF;
25624 ftype = INT_FTYPE_V4SF_V4SF;
25625 def_builtin_const (d->mask, d->name, ftype, d->code);
25629 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25630 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25631 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25632 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25634 /* SSE or 3DNow!A */
25635 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25636 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25637 IX86_BUILTIN_MASKMOVQ);
25640 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25641 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25643 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25644 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25645 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25646 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25649 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25650 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25651 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25652 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25655 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25656 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25657 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25658 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25659 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25660 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25661 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25662 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25663 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25664 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25665 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25666 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25669 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25670 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25673 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25674 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25675 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25676 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25677 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25678 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25679 IX86_BUILTIN_RDRAND64_STEP);
25681 /* MMX access to the vec_init patterns. */
25682 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25683 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25685 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25686 V4HI_FTYPE_HI_HI_HI_HI,
25687 IX86_BUILTIN_VEC_INIT_V4HI);
25689 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25690 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25691 IX86_BUILTIN_VEC_INIT_V8QI);
25693 /* Access to the vec_extract patterns. */
25694 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25695 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25696 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25697 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25698 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25699 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25700 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25701 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25702 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25703 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25705 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25706 "__builtin_ia32_vec_ext_v4hi",
25707 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25709 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25710 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25712 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25713 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25715 /* Access to the vec_set patterns. */
25716 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25717 "__builtin_ia32_vec_set_v2di",
25718 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25720 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25721 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25723 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25724 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25726 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25727 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25729 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25730 "__builtin_ia32_vec_set_v4hi",
25731 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25733 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25734 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25736 /* Add FMA4 multi-arg argument instructions */
25737 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25742 ftype = (enum ix86_builtin_func_type) d->flag;
25743 def_builtin_const (d->mask, d->name, ftype, d->code);
25747 /* Internal method for ix86_init_builtins. */
25750 ix86_init_builtins_va_builtins_abi (void)
25752 tree ms_va_ref, sysv_va_ref;
25753 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25754 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25755 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25756 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25760 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25761 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25762 ms_va_ref = build_reference_type (ms_va_list_type_node);
25764 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25767 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25768 fnvoid_va_start_ms =
25769 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25770 fnvoid_va_end_sysv =
25771 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25772 fnvoid_va_start_sysv =
25773 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25775 fnvoid_va_copy_ms =
25776 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25778 fnvoid_va_copy_sysv =
25779 build_function_type_list (void_type_node, sysv_va_ref,
25780 sysv_va_ref, NULL_TREE);
25782 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25783 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25784 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25785 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25786 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25787 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25788 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25789 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25790 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25791 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25792 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25793 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25797 ix86_init_builtin_types (void)
25799 tree float128_type_node, float80_type_node;
25801 /* The __float80 type. */
25802 float80_type_node = long_double_type_node;
25803 if (TYPE_MODE (float80_type_node) != XFmode)
25805 /* The __float80 type. */
25806 float80_type_node = make_node (REAL_TYPE);
25808 TYPE_PRECISION (float80_type_node) = 80;
25809 layout_type (float80_type_node);
25811 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25813 /* The __float128 type. */
25814 float128_type_node = make_node (REAL_TYPE);
25815 TYPE_PRECISION (float128_type_node) = 128;
25816 layout_type (float128_type_node);
25817 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25819 /* This macro is built by i386-builtin-types.awk. */
25820 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25824 ix86_init_builtins (void)
25828 ix86_init_builtin_types ();
25830 /* TFmode support builtins. */
25831 def_builtin_const (0, "__builtin_infq",
25832 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25833 def_builtin_const (0, "__builtin_huge_valq",
25834 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25836 /* We will expand them to normal call if SSE2 isn't available since
25837 they are used by libgcc. */
25838 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25839 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25840 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25841 TREE_READONLY (t) = 1;
25842 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25844 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25845 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25846 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25847 TREE_READONLY (t) = 1;
25848 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25850 ix86_init_mmx_sse_builtins ();
25853 ix86_init_builtins_va_builtins_abi ();
25855 #ifdef SUBTARGET_INIT_BUILTINS
25856 SUBTARGET_INIT_BUILTINS;
25860 /* Return the ix86 builtin for CODE. */
25863 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25865 if (code >= IX86_BUILTIN_MAX)
25866 return error_mark_node;
25868 return ix86_builtins[code];
25871 /* Errors in the source file can cause expand_expr to return const0_rtx
25872 where we expect a vector. To avoid crashing, use one of the vector
25873 clear instructions. */
25875 safe_vector_operand (rtx x, enum machine_mode mode)
25877 if (x == const0_rtx)
25878 x = CONST0_RTX (mode);
25882 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25885 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25888 tree arg0 = CALL_EXPR_ARG (exp, 0);
25889 tree arg1 = CALL_EXPR_ARG (exp, 1);
25890 rtx op0 = expand_normal (arg0);
25891 rtx op1 = expand_normal (arg1);
25892 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25893 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25894 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25896 if (VECTOR_MODE_P (mode0))
25897 op0 = safe_vector_operand (op0, mode0);
25898 if (VECTOR_MODE_P (mode1))
25899 op1 = safe_vector_operand (op1, mode1);
25901 if (optimize || !target
25902 || GET_MODE (target) != tmode
25903 || !insn_data[icode].operand[0].predicate (target, tmode))
25904 target = gen_reg_rtx (tmode);
25906 if (GET_MODE (op1) == SImode && mode1 == TImode)
25908 rtx x = gen_reg_rtx (V4SImode);
25909 emit_insn (gen_sse2_loadd (x, op1));
25910 op1 = gen_lowpart (TImode, x);
25913 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25914 op0 = copy_to_mode_reg (mode0, op0);
25915 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25916 op1 = copy_to_mode_reg (mode1, op1);
25918 pat = GEN_FCN (icode) (target, op0, op1);
25927 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25930 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25931 enum ix86_builtin_func_type m_type,
25932 enum rtx_code sub_code)
25937 bool comparison_p = false;
25939 bool last_arg_constant = false;
25940 int num_memory = 0;
25943 enum machine_mode mode;
25946 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25950 case MULTI_ARG_4_DF2_DI_I:
25951 case MULTI_ARG_4_DF2_DI_I1:
25952 case MULTI_ARG_4_SF2_SI_I:
25953 case MULTI_ARG_4_SF2_SI_I1:
25955 last_arg_constant = true;
25958 case MULTI_ARG_3_SF:
25959 case MULTI_ARG_3_DF:
25960 case MULTI_ARG_3_SF2:
25961 case MULTI_ARG_3_DF2:
25962 case MULTI_ARG_3_DI:
25963 case MULTI_ARG_3_SI:
25964 case MULTI_ARG_3_SI_DI:
25965 case MULTI_ARG_3_HI:
25966 case MULTI_ARG_3_HI_SI:
25967 case MULTI_ARG_3_QI:
25968 case MULTI_ARG_3_DI2:
25969 case MULTI_ARG_3_SI2:
25970 case MULTI_ARG_3_HI2:
25971 case MULTI_ARG_3_QI2:
25975 case MULTI_ARG_2_SF:
25976 case MULTI_ARG_2_DF:
25977 case MULTI_ARG_2_DI:
25978 case MULTI_ARG_2_SI:
25979 case MULTI_ARG_2_HI:
25980 case MULTI_ARG_2_QI:
25984 case MULTI_ARG_2_DI_IMM:
25985 case MULTI_ARG_2_SI_IMM:
25986 case MULTI_ARG_2_HI_IMM:
25987 case MULTI_ARG_2_QI_IMM:
25989 last_arg_constant = true;
25992 case MULTI_ARG_1_SF:
25993 case MULTI_ARG_1_DF:
25994 case MULTI_ARG_1_SF2:
25995 case MULTI_ARG_1_DF2:
25996 case MULTI_ARG_1_DI:
25997 case MULTI_ARG_1_SI:
25998 case MULTI_ARG_1_HI:
25999 case MULTI_ARG_1_QI:
26000 case MULTI_ARG_1_SI_DI:
26001 case MULTI_ARG_1_HI_DI:
26002 case MULTI_ARG_1_HI_SI:
26003 case MULTI_ARG_1_QI_DI:
26004 case MULTI_ARG_1_QI_SI:
26005 case MULTI_ARG_1_QI_HI:
26009 case MULTI_ARG_2_DI_CMP:
26010 case MULTI_ARG_2_SI_CMP:
26011 case MULTI_ARG_2_HI_CMP:
26012 case MULTI_ARG_2_QI_CMP:
26014 comparison_p = true;
26017 case MULTI_ARG_2_SF_TF:
26018 case MULTI_ARG_2_DF_TF:
26019 case MULTI_ARG_2_DI_TF:
26020 case MULTI_ARG_2_SI_TF:
26021 case MULTI_ARG_2_HI_TF:
26022 case MULTI_ARG_2_QI_TF:
26028 gcc_unreachable ();
26031 if (optimize || !target
26032 || GET_MODE (target) != tmode
26033 || !insn_data[icode].operand[0].predicate (target, tmode))
26034 target = gen_reg_rtx (tmode);
26036 gcc_assert (nargs <= 4);
26038 for (i = 0; i < nargs; i++)
26040 tree arg = CALL_EXPR_ARG (exp, i);
26041 rtx op = expand_normal (arg);
26042 int adjust = (comparison_p) ? 1 : 0;
26043 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
26045 if (last_arg_constant && i == nargs-1)
26047 if (!CONST_INT_P (op))
26049 error ("last argument must be an immediate");
26050 return gen_reg_rtx (tmode);
26055 if (VECTOR_MODE_P (mode))
26056 op = safe_vector_operand (op, mode);
26058 /* If we aren't optimizing, only allow one memory operand to be
26060 if (memory_operand (op, mode))
26063 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
26066 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
26068 op = force_reg (mode, op);
26072 args[i].mode = mode;
26078 pat = GEN_FCN (icode) (target, args[0].op);
26083 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
26084 GEN_INT ((int)sub_code));
26085 else if (! comparison_p)
26086 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26089 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
26093 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
26098 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26102 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
26106 gcc_unreachable ();
26116 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
26117 insns with vec_merge. */
26120 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
26124 tree arg0 = CALL_EXPR_ARG (exp, 0);
26125 rtx op1, op0 = expand_normal (arg0);
26126 enum machine_mode tmode = insn_data[icode].operand[0].mode;
26127 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
26129 if (optimize || !target
26130 || GET_MODE (target) != tmode
26131 || !insn_data[icode].operand[0].predicate (target, tmode))
26132 target = gen_reg_rtx (tmode);
26134 if (VECTOR_MODE_P (mode0))
26135 op0 = safe_vector_operand (op0, mode0);
26137 if ((optimize && !register_operand (op0, mode0))
26138 || !insn_data[icode].operand[1].predicate (op0, mode0))
26139 op0 = copy_to_mode_reg (mode0, op0);
26142 if (!insn_data[icode].operand[2].predicate (op1, mode0))
26143 op1 = copy_to_mode_reg (mode0, op1);
26145 pat = GEN_FCN (icode) (target, op0, op1);
26152 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
26155 ix86_expand_sse_compare (const struct builtin_description *d,
26156 tree exp, rtx target, bool swap)
26159 tree arg0 = CALL_EXPR_ARG (exp, 0);
26160 tree arg1 = CALL_EXPR_ARG (exp, 1);
26161 rtx op0 = expand_normal (arg0);
26162 rtx op1 = expand_normal (arg1);
26164 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26165 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26166 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26167 enum rtx_code comparison = d->comparison;
26169 if (VECTOR_MODE_P (mode0))
26170 op0 = safe_vector_operand (op0, mode0);
26171 if (VECTOR_MODE_P (mode1))
26172 op1 = safe_vector_operand (op1, mode1);
26174 /* Swap operands if we have a comparison that isn't available in
26178 rtx tmp = gen_reg_rtx (mode1);
26179 emit_move_insn (tmp, op1);
26184 if (optimize || !target
26185 || GET_MODE (target) != tmode
26186 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26187 target = gen_reg_rtx (tmode);
26189 if ((optimize && !register_operand (op0, mode0))
26190 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26191 op0 = copy_to_mode_reg (mode0, op0);
26192 if ((optimize && !register_operand (op1, mode1))
26193 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26194 op1 = copy_to_mode_reg (mode1, op1);
26196 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26197 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26204 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26207 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26211 tree arg0 = CALL_EXPR_ARG (exp, 0);
26212 tree arg1 = CALL_EXPR_ARG (exp, 1);
26213 rtx op0 = expand_normal (arg0);
26214 rtx op1 = expand_normal (arg1);
26215 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26216 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26217 enum rtx_code comparison = d->comparison;
26219 if (VECTOR_MODE_P (mode0))
26220 op0 = safe_vector_operand (op0, mode0);
26221 if (VECTOR_MODE_P (mode1))
26222 op1 = safe_vector_operand (op1, mode1);
26224 /* Swap operands if we have a comparison that isn't available in
26226 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26233 target = gen_reg_rtx (SImode);
26234 emit_move_insn (target, const0_rtx);
26235 target = gen_rtx_SUBREG (QImode, target, 0);
26237 if ((optimize && !register_operand (op0, mode0))
26238 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26239 op0 = copy_to_mode_reg (mode0, op0);
26240 if ((optimize && !register_operand (op1, mode1))
26241 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26242 op1 = copy_to_mode_reg (mode1, op1);
26244 pat = GEN_FCN (d->icode) (op0, op1);
26248 emit_insn (gen_rtx_SET (VOIDmode,
26249 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26250 gen_rtx_fmt_ee (comparison, QImode,
26254 return SUBREG_REG (target);
26257 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
26260 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
26264 tree arg0 = CALL_EXPR_ARG (exp, 0);
26265 rtx op1, op0 = expand_normal (arg0);
26266 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26267 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26269 if (optimize || target == 0
26270 || GET_MODE (target) != tmode
26271 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26272 target = gen_reg_rtx (tmode);
26274 if (VECTOR_MODE_P (mode0))
26275 op0 = safe_vector_operand (op0, mode0);
26277 if ((optimize && !register_operand (op0, mode0))
26278 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26279 op0 = copy_to_mode_reg (mode0, op0);
26281 op1 = GEN_INT (d->comparison);
26283 pat = GEN_FCN (d->icode) (target, op0, op1);
26290 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26293 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26297 tree arg0 = CALL_EXPR_ARG (exp, 0);
26298 tree arg1 = CALL_EXPR_ARG (exp, 1);
26299 rtx op0 = expand_normal (arg0);
26300 rtx op1 = expand_normal (arg1);
26301 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26302 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26303 enum rtx_code comparison = d->comparison;
26305 if (VECTOR_MODE_P (mode0))
26306 op0 = safe_vector_operand (op0, mode0);
26307 if (VECTOR_MODE_P (mode1))
26308 op1 = safe_vector_operand (op1, mode1);
26310 target = gen_reg_rtx (SImode);
26311 emit_move_insn (target, const0_rtx);
26312 target = gen_rtx_SUBREG (QImode, target, 0);
26314 if ((optimize && !register_operand (op0, mode0))
26315 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26316 op0 = copy_to_mode_reg (mode0, op0);
26317 if ((optimize && !register_operand (op1, mode1))
26318 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26319 op1 = copy_to_mode_reg (mode1, op1);
26321 pat = GEN_FCN (d->icode) (op0, op1);
26325 emit_insn (gen_rtx_SET (VOIDmode,
26326 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26327 gen_rtx_fmt_ee (comparison, QImode,
26331 return SUBREG_REG (target);
26334 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26337 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26338 tree exp, rtx target)
26341 tree arg0 = CALL_EXPR_ARG (exp, 0);
26342 tree arg1 = CALL_EXPR_ARG (exp, 1);
26343 tree arg2 = CALL_EXPR_ARG (exp, 2);
26344 tree arg3 = CALL_EXPR_ARG (exp, 3);
26345 tree arg4 = CALL_EXPR_ARG (exp, 4);
26346 rtx scratch0, scratch1;
26347 rtx op0 = expand_normal (arg0);
26348 rtx op1 = expand_normal (arg1);
26349 rtx op2 = expand_normal (arg2);
26350 rtx op3 = expand_normal (arg3);
26351 rtx op4 = expand_normal (arg4);
26352 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26354 tmode0 = insn_data[d->icode].operand[0].mode;
26355 tmode1 = insn_data[d->icode].operand[1].mode;
26356 modev2 = insn_data[d->icode].operand[2].mode;
26357 modei3 = insn_data[d->icode].operand[3].mode;
26358 modev4 = insn_data[d->icode].operand[4].mode;
26359 modei5 = insn_data[d->icode].operand[5].mode;
26360 modeimm = insn_data[d->icode].operand[6].mode;
26362 if (VECTOR_MODE_P (modev2))
26363 op0 = safe_vector_operand (op0, modev2);
26364 if (VECTOR_MODE_P (modev4))
26365 op2 = safe_vector_operand (op2, modev4);
26367 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26368 op0 = copy_to_mode_reg (modev2, op0);
26369 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26370 op1 = copy_to_mode_reg (modei3, op1);
26371 if ((optimize && !register_operand (op2, modev4))
26372 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26373 op2 = copy_to_mode_reg (modev4, op2);
26374 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26375 op3 = copy_to_mode_reg (modei5, op3);
26377 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26379 error ("the fifth argument must be a 8-bit immediate");
26383 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26385 if (optimize || !target
26386 || GET_MODE (target) != tmode0
26387 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26388 target = gen_reg_rtx (tmode0);
26390 scratch1 = gen_reg_rtx (tmode1);
26392 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26394 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26396 if (optimize || !target
26397 || GET_MODE (target) != tmode1
26398 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26399 target = gen_reg_rtx (tmode1);
26401 scratch0 = gen_reg_rtx (tmode0);
26403 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26407 gcc_assert (d->flag);
26409 scratch0 = gen_reg_rtx (tmode0);
26410 scratch1 = gen_reg_rtx (tmode1);
26412 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26422 target = gen_reg_rtx (SImode);
26423 emit_move_insn (target, const0_rtx);
26424 target = gen_rtx_SUBREG (QImode, target, 0);
26427 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26428 gen_rtx_fmt_ee (EQ, QImode,
26429 gen_rtx_REG ((enum machine_mode) d->flag,
26432 return SUBREG_REG (target);
26439 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26442 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26443 tree exp, rtx target)
26446 tree arg0 = CALL_EXPR_ARG (exp, 0);
26447 tree arg1 = CALL_EXPR_ARG (exp, 1);
26448 tree arg2 = CALL_EXPR_ARG (exp, 2);
26449 rtx scratch0, scratch1;
26450 rtx op0 = expand_normal (arg0);
26451 rtx op1 = expand_normal (arg1);
26452 rtx op2 = expand_normal (arg2);
26453 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26455 tmode0 = insn_data[d->icode].operand[0].mode;
26456 tmode1 = insn_data[d->icode].operand[1].mode;
26457 modev2 = insn_data[d->icode].operand[2].mode;
26458 modev3 = insn_data[d->icode].operand[3].mode;
26459 modeimm = insn_data[d->icode].operand[4].mode;
26461 if (VECTOR_MODE_P (modev2))
26462 op0 = safe_vector_operand (op0, modev2);
26463 if (VECTOR_MODE_P (modev3))
26464 op1 = safe_vector_operand (op1, modev3);
26466 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26467 op0 = copy_to_mode_reg (modev2, op0);
26468 if ((optimize && !register_operand (op1, modev3))
26469 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26470 op1 = copy_to_mode_reg (modev3, op1);
26472 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26474 error ("the third argument must be a 8-bit immediate");
26478 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26480 if (optimize || !target
26481 || GET_MODE (target) != tmode0
26482 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26483 target = gen_reg_rtx (tmode0);
26485 scratch1 = gen_reg_rtx (tmode1);
26487 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26489 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26491 if (optimize || !target
26492 || GET_MODE (target) != tmode1
26493 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26494 target = gen_reg_rtx (tmode1);
26496 scratch0 = gen_reg_rtx (tmode0);
26498 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26502 gcc_assert (d->flag);
26504 scratch0 = gen_reg_rtx (tmode0);
26505 scratch1 = gen_reg_rtx (tmode1);
26507 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26517 target = gen_reg_rtx (SImode);
26518 emit_move_insn (target, const0_rtx);
26519 target = gen_rtx_SUBREG (QImode, target, 0);
26522 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26523 gen_rtx_fmt_ee (EQ, QImode,
26524 gen_rtx_REG ((enum machine_mode) d->flag,
26527 return SUBREG_REG (target);
26533 /* Subroutine of ix86_expand_builtin to take care of insns with
26534 variable number of operands. */
26537 ix86_expand_args_builtin (const struct builtin_description *d,
26538 tree exp, rtx target)
26540 rtx pat, real_target;
26541 unsigned int i, nargs;
26542 unsigned int nargs_constant = 0;
26543 int num_memory = 0;
26547 enum machine_mode mode;
26549 bool last_arg_count = false;
26550 enum insn_code icode = d->icode;
26551 const struct insn_data_d *insn_p = &insn_data[icode];
26552 enum machine_mode tmode = insn_p->operand[0].mode;
26553 enum machine_mode rmode = VOIDmode;
26555 enum rtx_code comparison = d->comparison;
26557 switch ((enum ix86_builtin_func_type) d->flag)
26559 case V2DF_FTYPE_V2DF_ROUND:
26560 case V4DF_FTYPE_V4DF_ROUND:
26561 case V4SF_FTYPE_V4SF_ROUND:
26562 case V8SF_FTYPE_V8SF_ROUND:
26563 return ix86_expand_sse_round (d, exp, target);
26564 case INT_FTYPE_V8SF_V8SF_PTEST:
26565 case INT_FTYPE_V4DI_V4DI_PTEST:
26566 case INT_FTYPE_V4DF_V4DF_PTEST:
26567 case INT_FTYPE_V4SF_V4SF_PTEST:
26568 case INT_FTYPE_V2DI_V2DI_PTEST:
26569 case INT_FTYPE_V2DF_V2DF_PTEST:
26570 return ix86_expand_sse_ptest (d, exp, target);
26571 case FLOAT128_FTYPE_FLOAT128:
26572 case FLOAT_FTYPE_FLOAT:
26573 case INT_FTYPE_INT:
26574 case UINT64_FTYPE_INT:
26575 case UINT16_FTYPE_UINT16:
26576 case INT64_FTYPE_INT64:
26577 case INT64_FTYPE_V4SF:
26578 case INT64_FTYPE_V2DF:
26579 case INT_FTYPE_V16QI:
26580 case INT_FTYPE_V8QI:
26581 case INT_FTYPE_V8SF:
26582 case INT_FTYPE_V4DF:
26583 case INT_FTYPE_V4SF:
26584 case INT_FTYPE_V2DF:
26585 case V16QI_FTYPE_V16QI:
26586 case V8SI_FTYPE_V8SF:
26587 case V8SI_FTYPE_V4SI:
26588 case V8HI_FTYPE_V8HI:
26589 case V8HI_FTYPE_V16QI:
26590 case V8QI_FTYPE_V8QI:
26591 case V8SF_FTYPE_V8SF:
26592 case V8SF_FTYPE_V8SI:
26593 case V8SF_FTYPE_V4SF:
26594 case V8SF_FTYPE_V8HI:
26595 case V4SI_FTYPE_V4SI:
26596 case V4SI_FTYPE_V16QI:
26597 case V4SI_FTYPE_V4SF:
26598 case V4SI_FTYPE_V8SI:
26599 case V4SI_FTYPE_V8HI:
26600 case V4SI_FTYPE_V4DF:
26601 case V4SI_FTYPE_V2DF:
26602 case V4HI_FTYPE_V4HI:
26603 case V4DF_FTYPE_V4DF:
26604 case V4DF_FTYPE_V4SI:
26605 case V4DF_FTYPE_V4SF:
26606 case V4DF_FTYPE_V2DF:
26607 case V4SF_FTYPE_V4SF:
26608 case V4SF_FTYPE_V4SI:
26609 case V4SF_FTYPE_V8SF:
26610 case V4SF_FTYPE_V4DF:
26611 case V4SF_FTYPE_V8HI:
26612 case V4SF_FTYPE_V2DF:
26613 case V2DI_FTYPE_V2DI:
26614 case V2DI_FTYPE_V16QI:
26615 case V2DI_FTYPE_V8HI:
26616 case V2DI_FTYPE_V4SI:
26617 case V2DF_FTYPE_V2DF:
26618 case V2DF_FTYPE_V4SI:
26619 case V2DF_FTYPE_V4DF:
26620 case V2DF_FTYPE_V4SF:
26621 case V2DF_FTYPE_V2SI:
26622 case V2SI_FTYPE_V2SI:
26623 case V2SI_FTYPE_V4SF:
26624 case V2SI_FTYPE_V2SF:
26625 case V2SI_FTYPE_V2DF:
26626 case V2SF_FTYPE_V2SF:
26627 case V2SF_FTYPE_V2SI:
26630 case V4SF_FTYPE_V4SF_VEC_MERGE:
26631 case V2DF_FTYPE_V2DF_VEC_MERGE:
26632 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26633 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26634 case V16QI_FTYPE_V16QI_V16QI:
26635 case V16QI_FTYPE_V8HI_V8HI:
26636 case V8QI_FTYPE_V8QI_V8QI:
26637 case V8QI_FTYPE_V4HI_V4HI:
26638 case V8HI_FTYPE_V8HI_V8HI:
26639 case V8HI_FTYPE_V16QI_V16QI:
26640 case V8HI_FTYPE_V4SI_V4SI:
26641 case V8SF_FTYPE_V8SF_V8SF:
26642 case V8SF_FTYPE_V8SF_V8SI:
26643 case V4SI_FTYPE_V4SI_V4SI:
26644 case V4SI_FTYPE_V8HI_V8HI:
26645 case V4SI_FTYPE_V4SF_V4SF:
26646 case V4SI_FTYPE_V2DF_V2DF:
26647 case V4HI_FTYPE_V4HI_V4HI:
26648 case V4HI_FTYPE_V8QI_V8QI:
26649 case V4HI_FTYPE_V2SI_V2SI:
26650 case V4DF_FTYPE_V4DF_V4DF:
26651 case V4DF_FTYPE_V4DF_V4DI:
26652 case V4SF_FTYPE_V4SF_V4SF:
26653 case V4SF_FTYPE_V4SF_V4SI:
26654 case V4SF_FTYPE_V4SF_V2SI:
26655 case V4SF_FTYPE_V4SF_V2DF:
26656 case V4SF_FTYPE_V4SF_DI:
26657 case V4SF_FTYPE_V4SF_SI:
26658 case V2DI_FTYPE_V2DI_V2DI:
26659 case V2DI_FTYPE_V16QI_V16QI:
26660 case V2DI_FTYPE_V4SI_V4SI:
26661 case V2DI_FTYPE_V2DI_V16QI:
26662 case V2DI_FTYPE_V2DF_V2DF:
26663 case V2SI_FTYPE_V2SI_V2SI:
26664 case V2SI_FTYPE_V4HI_V4HI:
26665 case V2SI_FTYPE_V2SF_V2SF:
26666 case V2DF_FTYPE_V2DF_V2DF:
26667 case V2DF_FTYPE_V2DF_V4SF:
26668 case V2DF_FTYPE_V2DF_V2DI:
26669 case V2DF_FTYPE_V2DF_DI:
26670 case V2DF_FTYPE_V2DF_SI:
26671 case V2SF_FTYPE_V2SF_V2SF:
26672 case V1DI_FTYPE_V1DI_V1DI:
26673 case V1DI_FTYPE_V8QI_V8QI:
26674 case V1DI_FTYPE_V2SI_V2SI:
26675 if (comparison == UNKNOWN)
26676 return ix86_expand_binop_builtin (icode, exp, target);
26679 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26680 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26681 gcc_assert (comparison != UNKNOWN);
26685 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26686 case V8HI_FTYPE_V8HI_SI_COUNT:
26687 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26688 case V4SI_FTYPE_V4SI_SI_COUNT:
26689 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26690 case V4HI_FTYPE_V4HI_SI_COUNT:
26691 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26692 case V2DI_FTYPE_V2DI_SI_COUNT:
26693 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26694 case V2SI_FTYPE_V2SI_SI_COUNT:
26695 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26696 case V1DI_FTYPE_V1DI_SI_COUNT:
26698 last_arg_count = true;
26700 case UINT64_FTYPE_UINT64_UINT64:
26701 case UINT_FTYPE_UINT_UINT:
26702 case UINT_FTYPE_UINT_USHORT:
26703 case UINT_FTYPE_UINT_UCHAR:
26704 case UINT16_FTYPE_UINT16_INT:
26705 case UINT8_FTYPE_UINT8_INT:
26708 case V2DI_FTYPE_V2DI_INT_CONVERT:
26711 nargs_constant = 1;
26713 case V8HI_FTYPE_V8HI_INT:
26714 case V8HI_FTYPE_V8SF_INT:
26715 case V8HI_FTYPE_V4SF_INT:
26716 case V8SF_FTYPE_V8SF_INT:
26717 case V4SI_FTYPE_V4SI_INT:
26718 case V4SI_FTYPE_V8SI_INT:
26719 case V4HI_FTYPE_V4HI_INT:
26720 case V4DF_FTYPE_V4DF_INT:
26721 case V4SF_FTYPE_V4SF_INT:
26722 case V4SF_FTYPE_V8SF_INT:
26723 case V2DI_FTYPE_V2DI_INT:
26724 case V2DF_FTYPE_V2DF_INT:
26725 case V2DF_FTYPE_V4DF_INT:
26727 nargs_constant = 1;
26729 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26730 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26731 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26732 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26733 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26736 case V16QI_FTYPE_V16QI_V16QI_INT:
26737 case V8HI_FTYPE_V8HI_V8HI_INT:
26738 case V8SI_FTYPE_V8SI_V8SI_INT:
26739 case V8SI_FTYPE_V8SI_V4SI_INT:
26740 case V8SF_FTYPE_V8SF_V8SF_INT:
26741 case V8SF_FTYPE_V8SF_V4SF_INT:
26742 case V4SI_FTYPE_V4SI_V4SI_INT:
26743 case V4DF_FTYPE_V4DF_V4DF_INT:
26744 case V4DF_FTYPE_V4DF_V2DF_INT:
26745 case V4SF_FTYPE_V4SF_V4SF_INT:
26746 case V2DI_FTYPE_V2DI_V2DI_INT:
26747 case V2DF_FTYPE_V2DF_V2DF_INT:
26749 nargs_constant = 1;
26751 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26754 nargs_constant = 1;
26756 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26759 nargs_constant = 1;
26761 case V2DI_FTYPE_V2DI_UINT_UINT:
26763 nargs_constant = 2;
26765 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26766 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26767 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26768 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26770 nargs_constant = 1;
26772 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26774 nargs_constant = 2;
26777 gcc_unreachable ();
26780 gcc_assert (nargs <= ARRAY_SIZE (args));
26782 if (comparison != UNKNOWN)
26784 gcc_assert (nargs == 2);
26785 return ix86_expand_sse_compare (d, exp, target, swap);
26788 if (rmode == VOIDmode || rmode == tmode)
26792 || GET_MODE (target) != tmode
26793 || !insn_p->operand[0].predicate (target, tmode))
26794 target = gen_reg_rtx (tmode);
26795 real_target = target;
26799 target = gen_reg_rtx (rmode);
26800 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26803 for (i = 0; i < nargs; i++)
26805 tree arg = CALL_EXPR_ARG (exp, i);
26806 rtx op = expand_normal (arg);
26807 enum machine_mode mode = insn_p->operand[i + 1].mode;
26808 bool match = insn_p->operand[i + 1].predicate (op, mode);
26810 if (last_arg_count && (i + 1) == nargs)
26812 /* SIMD shift insns take either an 8-bit immediate or
26813 register as count. But builtin functions take int as
26814 count. If count doesn't match, we put it in register. */
26817 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26818 if (!insn_p->operand[i + 1].predicate (op, mode))
26819 op = copy_to_reg (op);
26822 else if ((nargs - i) <= nargs_constant)
26827 case CODE_FOR_sse4_1_roundpd:
26828 case CODE_FOR_sse4_1_roundps:
26829 case CODE_FOR_sse4_1_roundsd:
26830 case CODE_FOR_sse4_1_roundss:
26831 case CODE_FOR_sse4_1_blendps:
26832 case CODE_FOR_avx_blendpd256:
26833 case CODE_FOR_avx_vpermilv4df:
26834 case CODE_FOR_avx_roundpd256:
26835 case CODE_FOR_avx_roundps256:
26836 error ("the last argument must be a 4-bit immediate");
26839 case CODE_FOR_sse4_1_blendpd:
26840 case CODE_FOR_avx_vpermilv2df:
26841 case CODE_FOR_xop_vpermil2v2df3:
26842 case CODE_FOR_xop_vpermil2v4sf3:
26843 case CODE_FOR_xop_vpermil2v4df3:
26844 case CODE_FOR_xop_vpermil2v8sf3:
26845 error ("the last argument must be a 2-bit immediate");
26848 case CODE_FOR_avx_vextractf128v4df:
26849 case CODE_FOR_avx_vextractf128v8sf:
26850 case CODE_FOR_avx_vextractf128v8si:
26851 case CODE_FOR_avx_vinsertf128v4df:
26852 case CODE_FOR_avx_vinsertf128v8sf:
26853 case CODE_FOR_avx_vinsertf128v8si:
26854 error ("the last argument must be a 1-bit immediate");
26857 case CODE_FOR_avx_cmpsdv2df3:
26858 case CODE_FOR_avx_cmpssv4sf3:
26859 case CODE_FOR_avx_cmppdv2df3:
26860 case CODE_FOR_avx_cmppsv4sf3:
26861 case CODE_FOR_avx_cmppdv4df3:
26862 case CODE_FOR_avx_cmppsv8sf3:
26863 error ("the last argument must be a 5-bit immediate");
26867 switch (nargs_constant)
26870 if ((nargs - i) == nargs_constant)
26872 error ("the next to last argument must be an 8-bit immediate");
26876 error ("the last argument must be an 8-bit immediate");
26879 gcc_unreachable ();
26886 if (VECTOR_MODE_P (mode))
26887 op = safe_vector_operand (op, mode);
26889 /* If we aren't optimizing, only allow one memory operand to
26891 if (memory_operand (op, mode))
26894 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26896 if (optimize || !match || num_memory > 1)
26897 op = copy_to_mode_reg (mode, op);
26901 op = copy_to_reg (op);
26902 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26907 args[i].mode = mode;
26913 pat = GEN_FCN (icode) (real_target, args[0].op);
26916 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26919 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26923 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26924 args[2].op, args[3].op);
26927 gcc_unreachable ();
26937 /* Subroutine of ix86_expand_builtin to take care of special insns
26938 with variable number of operands. */
26941 ix86_expand_special_args_builtin (const struct builtin_description *d,
26942 tree exp, rtx target)
26946 unsigned int i, nargs, arg_adjust, memory;
26950 enum machine_mode mode;
26952 enum insn_code icode = d->icode;
26953 bool last_arg_constant = false;
26954 const struct insn_data_d *insn_p = &insn_data[icode];
26955 enum machine_mode tmode = insn_p->operand[0].mode;
26956 enum { load, store } klass;
26958 switch ((enum ix86_builtin_func_type) d->flag)
26960 case VOID_FTYPE_VOID:
26961 if (icode == CODE_FOR_avx_vzeroupper)
26962 target = GEN_INT (vzeroupper_intrinsic);
26963 emit_insn (GEN_FCN (icode) (target));
26965 case VOID_FTYPE_UINT64:
26966 case VOID_FTYPE_UNSIGNED:
26972 case UINT64_FTYPE_VOID:
26973 case UNSIGNED_FTYPE_VOID:
26978 case UINT64_FTYPE_PUNSIGNED:
26979 case V2DI_FTYPE_PV2DI:
26980 case V32QI_FTYPE_PCCHAR:
26981 case V16QI_FTYPE_PCCHAR:
26982 case V8SF_FTYPE_PCV4SF:
26983 case V8SF_FTYPE_PCFLOAT:
26984 case V4SF_FTYPE_PCFLOAT:
26985 case V4DF_FTYPE_PCV2DF:
26986 case V4DF_FTYPE_PCDOUBLE:
26987 case V2DF_FTYPE_PCDOUBLE:
26988 case VOID_FTYPE_PVOID:
26993 case VOID_FTYPE_PV2SF_V4SF:
26994 case VOID_FTYPE_PV4DI_V4DI:
26995 case VOID_FTYPE_PV2DI_V2DI:
26996 case VOID_FTYPE_PCHAR_V32QI:
26997 case VOID_FTYPE_PCHAR_V16QI:
26998 case VOID_FTYPE_PFLOAT_V8SF:
26999 case VOID_FTYPE_PFLOAT_V4SF:
27000 case VOID_FTYPE_PDOUBLE_V4DF:
27001 case VOID_FTYPE_PDOUBLE_V2DF:
27002 case VOID_FTYPE_PULONGLONG_ULONGLONG:
27003 case VOID_FTYPE_PINT_INT:
27006 /* Reserve memory operand for target. */
27007 memory = ARRAY_SIZE (args);
27009 case V4SF_FTYPE_V4SF_PCV2SF:
27010 case V2DF_FTYPE_V2DF_PCDOUBLE:
27015 case V8SF_FTYPE_PCV8SF_V8SI:
27016 case V4DF_FTYPE_PCV4DF_V4DI:
27017 case V4SF_FTYPE_PCV4SF_V4SI:
27018 case V2DF_FTYPE_PCV2DF_V2DI:
27023 case VOID_FTYPE_PV8SF_V8SI_V8SF:
27024 case VOID_FTYPE_PV4DF_V4DI_V4DF:
27025 case VOID_FTYPE_PV4SF_V4SI_V4SF:
27026 case VOID_FTYPE_PV2DF_V2DI_V2DF:
27029 /* Reserve memory operand for target. */
27030 memory = ARRAY_SIZE (args);
27032 case VOID_FTYPE_UINT_UINT_UINT:
27033 case VOID_FTYPE_UINT64_UINT_UINT:
27034 case UCHAR_FTYPE_UINT_UINT_UINT:
27035 case UCHAR_FTYPE_UINT64_UINT_UINT:
27038 memory = ARRAY_SIZE (args);
27039 last_arg_constant = true;
27042 gcc_unreachable ();
27045 gcc_assert (nargs <= ARRAY_SIZE (args));
27047 if (klass == store)
27049 arg = CALL_EXPR_ARG (exp, 0);
27050 op = expand_normal (arg);
27051 gcc_assert (target == 0);
27053 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
27055 target = force_reg (tmode, op);
27063 || GET_MODE (target) != tmode
27064 || !insn_p->operand[0].predicate (target, tmode))
27065 target = gen_reg_rtx (tmode);
27068 for (i = 0; i < nargs; i++)
27070 enum machine_mode mode = insn_p->operand[i + 1].mode;
27073 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
27074 op = expand_normal (arg);
27075 match = insn_p->operand[i + 1].predicate (op, mode);
27077 if (last_arg_constant && (i + 1) == nargs)
27081 if (icode == CODE_FOR_lwp_lwpvalsi3
27082 || icode == CODE_FOR_lwp_lwpinssi3
27083 || icode == CODE_FOR_lwp_lwpvaldi3
27084 || icode == CODE_FOR_lwp_lwpinsdi3)
27085 error ("the last argument must be a 32-bit immediate");
27087 error ("the last argument must be an 8-bit immediate");
27095 /* This must be the memory operand. */
27096 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
27097 gcc_assert (GET_MODE (op) == mode
27098 || GET_MODE (op) == VOIDmode);
27102 /* This must be register. */
27103 if (VECTOR_MODE_P (mode))
27104 op = safe_vector_operand (op, mode);
27106 gcc_assert (GET_MODE (op) == mode
27107 || GET_MODE (op) == VOIDmode);
27108 op = copy_to_mode_reg (mode, op);
27113 args[i].mode = mode;
27119 pat = GEN_FCN (icode) (target);
27122 pat = GEN_FCN (icode) (target, args[0].op);
27125 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
27128 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
27131 gcc_unreachable ();
27137 return klass == store ? 0 : target;
27140 /* Return the integer constant in ARG. Constrain it to be in the range
27141 of the subparts of VEC_TYPE; issue an error if not. */
27144 get_element_number (tree vec_type, tree arg)
27146 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
27148 if (!host_integerp (arg, 1)
27149 || (elt = tree_low_cst (arg, 1), elt > max))
27151 error ("selector must be an integer constant in the range 0..%wi", max);
27158 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27159 ix86_expand_vector_init. We DO have language-level syntax for this, in
27160 the form of (type){ init-list }. Except that since we can't place emms
27161 instructions from inside the compiler, we can't allow the use of MMX
27162 registers unless the user explicitly asks for it. So we do *not* define
27163 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
27164 we have builtins invoked by mmintrin.h that gives us license to emit
27165 these sorts of instructions. */
27168 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
27170 enum machine_mode tmode = TYPE_MODE (type);
27171 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
27172 int i, n_elt = GET_MODE_NUNITS (tmode);
27173 rtvec v = rtvec_alloc (n_elt);
27175 gcc_assert (VECTOR_MODE_P (tmode));
27176 gcc_assert (call_expr_nargs (exp) == n_elt);
27178 for (i = 0; i < n_elt; ++i)
27180 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
27181 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
27184 if (!target || !register_operand (target, tmode))
27185 target = gen_reg_rtx (tmode);
27187 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
27191 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27192 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
27193 had a language-level syntax for referencing vector elements. */
27196 ix86_expand_vec_ext_builtin (tree exp, rtx target)
27198 enum machine_mode tmode, mode0;
27203 arg0 = CALL_EXPR_ARG (exp, 0);
27204 arg1 = CALL_EXPR_ARG (exp, 1);
27206 op0 = expand_normal (arg0);
27207 elt = get_element_number (TREE_TYPE (arg0), arg1);
27209 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27210 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27211 gcc_assert (VECTOR_MODE_P (mode0));
27213 op0 = force_reg (mode0, op0);
27215 if (optimize || !target || !register_operand (target, tmode))
27216 target = gen_reg_rtx (tmode);
27218 ix86_expand_vector_extract (true, target, op0, elt);
27223 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27224 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27225 a language-level syntax for referencing vector elements. */
27228 ix86_expand_vec_set_builtin (tree exp)
27230 enum machine_mode tmode, mode1;
27231 tree arg0, arg1, arg2;
27233 rtx op0, op1, target;
27235 arg0 = CALL_EXPR_ARG (exp, 0);
27236 arg1 = CALL_EXPR_ARG (exp, 1);
27237 arg2 = CALL_EXPR_ARG (exp, 2);
27239 tmode = TYPE_MODE (TREE_TYPE (arg0));
27240 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27241 gcc_assert (VECTOR_MODE_P (tmode));
27243 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27244 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27245 elt = get_element_number (TREE_TYPE (arg0), arg2);
27247 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27248 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27250 op0 = force_reg (tmode, op0);
27251 op1 = force_reg (mode1, op1);
27253 /* OP0 is the source of these builtin functions and shouldn't be
27254 modified. Create a copy, use it and return it as target. */
27255 target = gen_reg_rtx (tmode);
27256 emit_move_insn (target, op0);
27257 ix86_expand_vector_set (true, target, op1, elt);
27262 /* Expand an expression EXP that calls a built-in function,
27263 with result going to TARGET if that's convenient
27264 (and in mode MODE if that's convenient).
27265 SUBTARGET may be used as the target for computing one of EXP's operands.
27266 IGNORE is nonzero if the value is to be ignored. */
27269 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27270 enum machine_mode mode ATTRIBUTE_UNUSED,
27271 int ignore ATTRIBUTE_UNUSED)
27273 const struct builtin_description *d;
27275 enum insn_code icode;
27276 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27277 tree arg0, arg1, arg2;
27278 rtx op0, op1, op2, pat;
27279 enum machine_mode mode0, mode1, mode2;
27280 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27282 /* Determine whether the builtin function is available under the current ISA.
27283 Originally the builtin was not created if it wasn't applicable to the
27284 current ISA based on the command line switches. With function specific
27285 options, we need to check in the context of the function making the call
27286 whether it is supported. */
27287 if (ix86_builtins_isa[fcode].isa
27288 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27290 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27291 NULL, NULL, false);
27294 error ("%qE needs unknown isa option", fndecl);
27297 gcc_assert (opts != NULL);
27298 error ("%qE needs isa option %s", fndecl, opts);
27306 case IX86_BUILTIN_MASKMOVQ:
27307 case IX86_BUILTIN_MASKMOVDQU:
27308 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27309 ? CODE_FOR_mmx_maskmovq
27310 : CODE_FOR_sse2_maskmovdqu);
27311 /* Note the arg order is different from the operand order. */
27312 arg1 = CALL_EXPR_ARG (exp, 0);
27313 arg2 = CALL_EXPR_ARG (exp, 1);
27314 arg0 = CALL_EXPR_ARG (exp, 2);
27315 op0 = expand_normal (arg0);
27316 op1 = expand_normal (arg1);
27317 op2 = expand_normal (arg2);
27318 mode0 = insn_data[icode].operand[0].mode;
27319 mode1 = insn_data[icode].operand[1].mode;
27320 mode2 = insn_data[icode].operand[2].mode;
27322 op0 = force_reg (Pmode, op0);
27323 op0 = gen_rtx_MEM (mode1, op0);
27325 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27326 op0 = copy_to_mode_reg (mode0, op0);
27327 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27328 op1 = copy_to_mode_reg (mode1, op1);
27329 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27330 op2 = copy_to_mode_reg (mode2, op2);
27331 pat = GEN_FCN (icode) (op0, op1, op2);
27337 case IX86_BUILTIN_LDMXCSR:
27338 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27339 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27340 emit_move_insn (target, op0);
27341 emit_insn (gen_sse_ldmxcsr (target));
27344 case IX86_BUILTIN_STMXCSR:
27345 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27346 emit_insn (gen_sse_stmxcsr (target));
27347 return copy_to_mode_reg (SImode, target);
27349 case IX86_BUILTIN_CLFLUSH:
27350 arg0 = CALL_EXPR_ARG (exp, 0);
27351 op0 = expand_normal (arg0);
27352 icode = CODE_FOR_sse2_clflush;
27353 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27354 op0 = copy_to_mode_reg (Pmode, op0);
27356 emit_insn (gen_sse2_clflush (op0));
27359 case IX86_BUILTIN_MONITOR:
27360 arg0 = CALL_EXPR_ARG (exp, 0);
27361 arg1 = CALL_EXPR_ARG (exp, 1);
27362 arg2 = CALL_EXPR_ARG (exp, 2);
27363 op0 = expand_normal (arg0);
27364 op1 = expand_normal (arg1);
27365 op2 = expand_normal (arg2);
27367 op0 = copy_to_mode_reg (Pmode, op0);
27369 op1 = copy_to_mode_reg (SImode, op1);
27371 op2 = copy_to_mode_reg (SImode, op2);
27372 emit_insn (ix86_gen_monitor (op0, op1, op2));
27375 case IX86_BUILTIN_MWAIT:
27376 arg0 = CALL_EXPR_ARG (exp, 0);
27377 arg1 = CALL_EXPR_ARG (exp, 1);
27378 op0 = expand_normal (arg0);
27379 op1 = expand_normal (arg1);
27381 op0 = copy_to_mode_reg (SImode, op0);
27383 op1 = copy_to_mode_reg (SImode, op1);
27384 emit_insn (gen_sse3_mwait (op0, op1));
27387 case IX86_BUILTIN_VEC_INIT_V2SI:
27388 case IX86_BUILTIN_VEC_INIT_V4HI:
27389 case IX86_BUILTIN_VEC_INIT_V8QI:
27390 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27392 case IX86_BUILTIN_VEC_EXT_V2DF:
27393 case IX86_BUILTIN_VEC_EXT_V2DI:
27394 case IX86_BUILTIN_VEC_EXT_V4SF:
27395 case IX86_BUILTIN_VEC_EXT_V4SI:
27396 case IX86_BUILTIN_VEC_EXT_V8HI:
27397 case IX86_BUILTIN_VEC_EXT_V2SI:
27398 case IX86_BUILTIN_VEC_EXT_V4HI:
27399 case IX86_BUILTIN_VEC_EXT_V16QI:
27400 return ix86_expand_vec_ext_builtin (exp, target);
27402 case IX86_BUILTIN_VEC_SET_V2DI:
27403 case IX86_BUILTIN_VEC_SET_V4SF:
27404 case IX86_BUILTIN_VEC_SET_V4SI:
27405 case IX86_BUILTIN_VEC_SET_V8HI:
27406 case IX86_BUILTIN_VEC_SET_V4HI:
27407 case IX86_BUILTIN_VEC_SET_V16QI:
27408 return ix86_expand_vec_set_builtin (exp);
27410 case IX86_BUILTIN_VEC_PERM_V2DF:
27411 case IX86_BUILTIN_VEC_PERM_V4SF:
27412 case IX86_BUILTIN_VEC_PERM_V2DI:
27413 case IX86_BUILTIN_VEC_PERM_V4SI:
27414 case IX86_BUILTIN_VEC_PERM_V8HI:
27415 case IX86_BUILTIN_VEC_PERM_V16QI:
27416 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27417 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27418 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27419 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27420 case IX86_BUILTIN_VEC_PERM_V4DF:
27421 case IX86_BUILTIN_VEC_PERM_V8SF:
27422 return ix86_expand_vec_perm_builtin (exp);
27424 case IX86_BUILTIN_INFQ:
27425 case IX86_BUILTIN_HUGE_VALQ:
27427 REAL_VALUE_TYPE inf;
27431 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27433 tmp = validize_mem (force_const_mem (mode, tmp));
27436 target = gen_reg_rtx (mode);
27438 emit_move_insn (target, tmp);
27442 case IX86_BUILTIN_LLWPCB:
27443 arg0 = CALL_EXPR_ARG (exp, 0);
27444 op0 = expand_normal (arg0);
27445 icode = CODE_FOR_lwp_llwpcb;
27446 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27447 op0 = copy_to_mode_reg (Pmode, op0);
27448 emit_insn (gen_lwp_llwpcb (op0));
27451 case IX86_BUILTIN_SLWPCB:
27452 icode = CODE_FOR_lwp_slwpcb;
27454 || !insn_data[icode].operand[0].predicate (target, Pmode))
27455 target = gen_reg_rtx (Pmode);
27456 emit_insn (gen_lwp_slwpcb (target));
27459 case IX86_BUILTIN_BEXTRI32:
27460 case IX86_BUILTIN_BEXTRI64:
27461 arg0 = CALL_EXPR_ARG (exp, 0);
27462 arg1 = CALL_EXPR_ARG (exp, 1);
27463 op0 = expand_normal (arg0);
27464 op1 = expand_normal (arg1);
27465 icode = (fcode == IX86_BUILTIN_BEXTRI32
27466 ? CODE_FOR_tbm_bextri_si
27467 : CODE_FOR_tbm_bextri_di);
27468 if (!CONST_INT_P (op1))
27470 error ("last argument must be an immediate");
27475 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27476 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27477 op1 = GEN_INT (length);
27478 op2 = GEN_INT (lsb_index);
27479 pat = GEN_FCN (icode) (target, op0, op1, op2);
27485 case IX86_BUILTIN_RDRAND16_STEP:
27486 icode = CODE_FOR_rdrandhi_1;
27490 case IX86_BUILTIN_RDRAND32_STEP:
27491 icode = CODE_FOR_rdrandsi_1;
27495 case IX86_BUILTIN_RDRAND64_STEP:
27496 icode = CODE_FOR_rdranddi_1;
27500 op0 = gen_reg_rtx (mode0);
27501 emit_insn (GEN_FCN (icode) (op0));
27503 op1 = gen_reg_rtx (SImode);
27504 emit_move_insn (op1, CONST1_RTX (SImode));
27506 /* Emit SImode conditional move. */
27507 if (mode0 == HImode)
27509 op2 = gen_reg_rtx (SImode);
27510 emit_insn (gen_zero_extendhisi2 (op2, op0));
27512 else if (mode0 == SImode)
27515 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27517 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27519 emit_insn (gen_rtx_SET (VOIDmode, op1,
27520 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27521 emit_move_insn (target, op1);
27523 arg0 = CALL_EXPR_ARG (exp, 0);
27524 op1 = expand_normal (arg0);
27525 if (!address_operand (op1, VOIDmode))
27526 op1 = copy_addr_to_reg (op1);
27527 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27534 for (i = 0, d = bdesc_special_args;
27535 i < ARRAY_SIZE (bdesc_special_args);
27537 if (d->code == fcode)
27538 return ix86_expand_special_args_builtin (d, exp, target);
27540 for (i = 0, d = bdesc_args;
27541 i < ARRAY_SIZE (bdesc_args);
27543 if (d->code == fcode)
27546 case IX86_BUILTIN_FABSQ:
27547 case IX86_BUILTIN_COPYSIGNQ:
27549 /* Emit a normal call if SSE2 isn't available. */
27550 return expand_call (exp, target, ignore);
27552 return ix86_expand_args_builtin (d, exp, target);
27555 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27556 if (d->code == fcode)
27557 return ix86_expand_sse_comi (d, exp, target);
27559 for (i = 0, d = bdesc_pcmpestr;
27560 i < ARRAY_SIZE (bdesc_pcmpestr);
27562 if (d->code == fcode)
27563 return ix86_expand_sse_pcmpestr (d, exp, target);
27565 for (i = 0, d = bdesc_pcmpistr;
27566 i < ARRAY_SIZE (bdesc_pcmpistr);
27568 if (d->code == fcode)
27569 return ix86_expand_sse_pcmpistr (d, exp, target);
27571 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27572 if (d->code == fcode)
27573 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27574 (enum ix86_builtin_func_type)
27575 d->flag, d->comparison);
27577 gcc_unreachable ();
27580 /* Returns a function decl for a vectorized version of the builtin function
27581 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27582 if it is not available. */
27585 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27588 enum machine_mode in_mode, out_mode;
27590 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27592 if (TREE_CODE (type_out) != VECTOR_TYPE
27593 || TREE_CODE (type_in) != VECTOR_TYPE
27594 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27597 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27598 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27599 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27600 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27604 case BUILT_IN_SQRT:
27605 if (out_mode == DFmode && in_mode == DFmode)
27607 if (out_n == 2 && in_n == 2)
27608 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27609 else if (out_n == 4 && in_n == 4)
27610 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27614 case BUILT_IN_SQRTF:
27615 if (out_mode == SFmode && in_mode == SFmode)
27617 if (out_n == 4 && in_n == 4)
27618 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27619 else if (out_n == 8 && in_n == 8)
27620 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27624 case BUILT_IN_LRINT:
27625 if (out_mode == SImode && out_n == 4
27626 && in_mode == DFmode && in_n == 2)
27627 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27630 case BUILT_IN_LRINTF:
27631 if (out_mode == SImode && in_mode == SFmode)
27633 if (out_n == 4 && in_n == 4)
27634 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27635 else if (out_n == 8 && in_n == 8)
27636 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27640 case BUILT_IN_COPYSIGN:
27641 if (out_mode == DFmode && in_mode == DFmode)
27643 if (out_n == 2 && in_n == 2)
27644 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27645 else if (out_n == 4 && in_n == 4)
27646 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27650 case BUILT_IN_COPYSIGNF:
27651 if (out_mode == SFmode && in_mode == SFmode)
27653 if (out_n == 4 && in_n == 4)
27654 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27655 else if (out_n == 8 && in_n == 8)
27656 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27660 case BUILT_IN_FLOOR:
27661 /* The round insn does not trap on denormals. */
27662 if (flag_trapping_math)
27665 if (out_mode == DFmode && in_mode == DFmode)
27667 if (out_n == 2 && in_n == 2)
27668 return ix86_builtins[IX86_BUILTIN_FLOORPD];
27669 else if (out_n == 4 && in_n == 4)
27670 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
27674 case BUILT_IN_FLOORF:
27675 /* The round insn does not trap on denormals. */
27676 if (flag_trapping_math)
27679 if (out_mode == SFmode && in_mode == SFmode)
27681 if (out_n == 4 && in_n == 4)
27682 return ix86_builtins[IX86_BUILTIN_FLOORPS];
27683 else if (out_n == 8 && in_n == 8)
27684 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
27688 case BUILT_IN_CEIL:
27689 /* The round insn does not trap on denormals. */
27690 if (flag_trapping_math)
27693 if (out_mode == DFmode && in_mode == DFmode)
27695 if (out_n == 2 && in_n == 2)
27696 return ix86_builtins[IX86_BUILTIN_CEILPD];
27697 else if (out_n == 4 && in_n == 4)
27698 return ix86_builtins[IX86_BUILTIN_CEILPD256];
27702 case BUILT_IN_CEILF:
27703 /* The round insn does not trap on denormals. */
27704 if (flag_trapping_math)
27707 if (out_mode == SFmode && in_mode == SFmode)
27709 if (out_n == 4 && in_n == 4)
27710 return ix86_builtins[IX86_BUILTIN_CEILPS];
27711 else if (out_n == 8 && in_n == 8)
27712 return ix86_builtins[IX86_BUILTIN_CEILPS256];
27716 case BUILT_IN_TRUNC:
27717 /* The round insn does not trap on denormals. */
27718 if (flag_trapping_math)
27721 if (out_mode == DFmode && in_mode == DFmode)
27723 if (out_n == 2 && in_n == 2)
27724 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
27725 else if (out_n == 4 && in_n == 4)
27726 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
27730 case BUILT_IN_TRUNCF:
27731 /* The round insn does not trap on denormals. */
27732 if (flag_trapping_math)
27735 if (out_mode == SFmode && in_mode == SFmode)
27737 if (out_n == 4 && in_n == 4)
27738 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
27739 else if (out_n == 8 && in_n == 8)
27740 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
27744 case BUILT_IN_RINT:
27745 /* The round insn does not trap on denormals. */
27746 if (flag_trapping_math)
27749 if (out_mode == DFmode && in_mode == DFmode)
27751 if (out_n == 2 && in_n == 2)
27752 return ix86_builtins[IX86_BUILTIN_RINTPD];
27753 else if (out_n == 4 && in_n == 4)
27754 return ix86_builtins[IX86_BUILTIN_RINTPD256];
27758 case BUILT_IN_RINTF:
27759 /* The round insn does not trap on denormals. */
27760 if (flag_trapping_math)
27763 if (out_mode == SFmode && in_mode == SFmode)
27765 if (out_n == 4 && in_n == 4)
27766 return ix86_builtins[IX86_BUILTIN_RINTPS];
27767 else if (out_n == 8 && in_n == 8)
27768 return ix86_builtins[IX86_BUILTIN_RINTPS256];
27773 if (out_mode == DFmode && in_mode == DFmode)
27775 if (out_n == 2 && in_n == 2)
27776 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27777 if (out_n == 4 && in_n == 4)
27778 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27782 case BUILT_IN_FMAF:
27783 if (out_mode == SFmode && in_mode == SFmode)
27785 if (out_n == 4 && in_n == 4)
27786 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27787 if (out_n == 8 && in_n == 8)
27788 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27796 /* Dispatch to a handler for a vectorization library. */
27797 if (ix86_veclib_handler)
27798 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27804 /* Handler for an SVML-style interface to
27805 a library with vectorized intrinsics. */
27808 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27811 tree fntype, new_fndecl, args;
27814 enum machine_mode el_mode, in_mode;
27817 /* The SVML is suitable for unsafe math only. */
27818 if (!flag_unsafe_math_optimizations)
27821 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27822 n = TYPE_VECTOR_SUBPARTS (type_out);
27823 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27824 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27825 if (el_mode != in_mode
27833 case BUILT_IN_LOG10:
27835 case BUILT_IN_TANH:
27837 case BUILT_IN_ATAN:
27838 case BUILT_IN_ATAN2:
27839 case BUILT_IN_ATANH:
27840 case BUILT_IN_CBRT:
27841 case BUILT_IN_SINH:
27843 case BUILT_IN_ASINH:
27844 case BUILT_IN_ASIN:
27845 case BUILT_IN_COSH:
27847 case BUILT_IN_ACOSH:
27848 case BUILT_IN_ACOS:
27849 if (el_mode != DFmode || n != 2)
27853 case BUILT_IN_EXPF:
27854 case BUILT_IN_LOGF:
27855 case BUILT_IN_LOG10F:
27856 case BUILT_IN_POWF:
27857 case BUILT_IN_TANHF:
27858 case BUILT_IN_TANF:
27859 case BUILT_IN_ATANF:
27860 case BUILT_IN_ATAN2F:
27861 case BUILT_IN_ATANHF:
27862 case BUILT_IN_CBRTF:
27863 case BUILT_IN_SINHF:
27864 case BUILT_IN_SINF:
27865 case BUILT_IN_ASINHF:
27866 case BUILT_IN_ASINF:
27867 case BUILT_IN_COSHF:
27868 case BUILT_IN_COSF:
27869 case BUILT_IN_ACOSHF:
27870 case BUILT_IN_ACOSF:
27871 if (el_mode != SFmode || n != 4)
27879 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27881 if (fn == BUILT_IN_LOGF)
27882 strcpy (name, "vmlsLn4");
27883 else if (fn == BUILT_IN_LOG)
27884 strcpy (name, "vmldLn2");
27887 sprintf (name, "vmls%s", bname+10);
27888 name[strlen (name)-1] = '4';
27891 sprintf (name, "vmld%s2", bname+10);
27893 /* Convert to uppercase. */
27897 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27898 args = TREE_CHAIN (args))
27902 fntype = build_function_type_list (type_out, type_in, NULL);
27904 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27906 /* Build a function declaration for the vectorized function. */
27907 new_fndecl = build_decl (BUILTINS_LOCATION,
27908 FUNCTION_DECL, get_identifier (name), fntype);
27909 TREE_PUBLIC (new_fndecl) = 1;
27910 DECL_EXTERNAL (new_fndecl) = 1;
27911 DECL_IS_NOVOPS (new_fndecl) = 1;
27912 TREE_READONLY (new_fndecl) = 1;
27917 /* Handler for an ACML-style interface to
27918 a library with vectorized intrinsics. */
27921 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27923 char name[20] = "__vr.._";
27924 tree fntype, new_fndecl, args;
27927 enum machine_mode el_mode, in_mode;
27930 /* The ACML is 64bits only and suitable for unsafe math only as
27931 it does not correctly support parts of IEEE with the required
27932 precision such as denormals. */
27934 || !flag_unsafe_math_optimizations)
27937 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27938 n = TYPE_VECTOR_SUBPARTS (type_out);
27939 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27940 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27941 if (el_mode != in_mode
27951 case BUILT_IN_LOG2:
27952 case BUILT_IN_LOG10:
27955 if (el_mode != DFmode
27960 case BUILT_IN_SINF:
27961 case BUILT_IN_COSF:
27962 case BUILT_IN_EXPF:
27963 case BUILT_IN_POWF:
27964 case BUILT_IN_LOGF:
27965 case BUILT_IN_LOG2F:
27966 case BUILT_IN_LOG10F:
27969 if (el_mode != SFmode
27978 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27979 sprintf (name + 7, "%s", bname+10);
27982 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27983 args = TREE_CHAIN (args))
27987 fntype = build_function_type_list (type_out, type_in, NULL);
27989 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27991 /* Build a function declaration for the vectorized function. */
27992 new_fndecl = build_decl (BUILTINS_LOCATION,
27993 FUNCTION_DECL, get_identifier (name), fntype);
27994 TREE_PUBLIC (new_fndecl) = 1;
27995 DECL_EXTERNAL (new_fndecl) = 1;
27996 DECL_IS_NOVOPS (new_fndecl) = 1;
27997 TREE_READONLY (new_fndecl) = 1;
28003 /* Returns a decl of a function that implements conversion of an integer vector
28004 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
28005 are the types involved when converting according to CODE.
28006 Return NULL_TREE if it is not available. */
28009 ix86_vectorize_builtin_conversion (unsigned int code,
28010 tree dest_type, tree src_type)
28018 switch (TYPE_MODE (src_type))
28021 switch (TYPE_MODE (dest_type))
28024 return (TYPE_UNSIGNED (src_type)
28025 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
28026 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
28028 return (TYPE_UNSIGNED (src_type)
28030 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
28036 switch (TYPE_MODE (dest_type))
28039 return (TYPE_UNSIGNED (src_type)
28041 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
28050 case FIX_TRUNC_EXPR:
28051 switch (TYPE_MODE (dest_type))
28054 switch (TYPE_MODE (src_type))
28057 return (TYPE_UNSIGNED (dest_type)
28059 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
28061 return (TYPE_UNSIGNED (dest_type)
28063 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
28070 switch (TYPE_MODE (src_type))
28073 return (TYPE_UNSIGNED (dest_type)
28075 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
28092 /* Returns a code for a target-specific builtin that implements
28093 reciprocal of the function, or NULL_TREE if not available. */
28096 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
28097 bool sqrt ATTRIBUTE_UNUSED)
28099 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
28100 && flag_finite_math_only && !flag_trapping_math
28101 && flag_unsafe_math_optimizations))
28105 /* Machine dependent builtins. */
28108 /* Vectorized version of sqrt to rsqrt conversion. */
28109 case IX86_BUILTIN_SQRTPS_NR:
28110 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
28112 case IX86_BUILTIN_SQRTPS_NR256:
28113 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
28119 /* Normal builtins. */
28122 /* Sqrt to rsqrt conversion. */
28123 case BUILT_IN_SQRTF:
28124 return ix86_builtins[IX86_BUILTIN_RSQRTF];
28131 /* Helper for avx_vpermilps256_operand et al. This is also used by
28132 the expansion functions to turn the parallel back into a mask.
28133 The return value is 0 for no match and the imm8+1 for a match. */
28136 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
28138 unsigned i, nelt = GET_MODE_NUNITS (mode);
28140 unsigned char ipar[8];
28142 if (XVECLEN (par, 0) != (int) nelt)
28145 /* Validate that all of the elements are constants, and not totally
28146 out of range. Copy the data into an integral array to make the
28147 subsequent checks easier. */
28148 for (i = 0; i < nelt; ++i)
28150 rtx er = XVECEXP (par, 0, i);
28151 unsigned HOST_WIDE_INT ei;
28153 if (!CONST_INT_P (er))
28164 /* In the 256-bit DFmode case, we can only move elements within
28166 for (i = 0; i < 2; ++i)
28170 mask |= ipar[i] << i;
28172 for (i = 2; i < 4; ++i)
28176 mask |= (ipar[i] - 2) << i;
28181 /* In the 256-bit SFmode case, we have full freedom of movement
28182 within the low 128-bit lane, but the high 128-bit lane must
28183 mirror the exact same pattern. */
28184 for (i = 0; i < 4; ++i)
28185 if (ipar[i] + 4 != ipar[i + 4])
28192 /* In the 128-bit case, we've full freedom in the placement of
28193 the elements from the source operand. */
28194 for (i = 0; i < nelt; ++i)
28195 mask |= ipar[i] << (i * (nelt / 2));
28199 gcc_unreachable ();
28202 /* Make sure success has a non-zero value by adding one. */
28206 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
28207 the expansion functions to turn the parallel back into a mask.
28208 The return value is 0 for no match and the imm8+1 for a match. */
28211 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
28213 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
28215 unsigned char ipar[8];
28217 if (XVECLEN (par, 0) != (int) nelt)
28220 /* Validate that all of the elements are constants, and not totally
28221 out of range. Copy the data into an integral array to make the
28222 subsequent checks easier. */
28223 for (i = 0; i < nelt; ++i)
28225 rtx er = XVECEXP (par, 0, i);
28226 unsigned HOST_WIDE_INT ei;
28228 if (!CONST_INT_P (er))
28231 if (ei >= 2 * nelt)
28236 /* Validate that the halves of the permute are halves. */
28237 for (i = 0; i < nelt2 - 1; ++i)
28238 if (ipar[i] + 1 != ipar[i + 1])
28240 for (i = nelt2; i < nelt - 1; ++i)
28241 if (ipar[i] + 1 != ipar[i + 1])
28244 /* Reconstruct the mask. */
28245 for (i = 0; i < 2; ++i)
28247 unsigned e = ipar[i * nelt2];
28251 mask |= e << (i * 4);
28254 /* Make sure success has a non-zero value by adding one. */
28259 /* Store OPERAND to the memory after reload is completed. This means
28260 that we can't easily use assign_stack_local. */
28262 ix86_force_to_memory (enum machine_mode mode, rtx operand)
28266 gcc_assert (reload_completed);
28267 if (ix86_using_red_zone ())
28269 result = gen_rtx_MEM (mode,
28270 gen_rtx_PLUS (Pmode,
28272 GEN_INT (-RED_ZONE_SIZE)));
28273 emit_move_insn (result, operand);
28275 else if (TARGET_64BIT)
28281 operand = gen_lowpart (DImode, operand);
28285 gen_rtx_SET (VOIDmode,
28286 gen_rtx_MEM (DImode,
28287 gen_rtx_PRE_DEC (DImode,
28288 stack_pointer_rtx)),
28292 gcc_unreachable ();
28294 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28303 split_double_mode (mode, &operand, 1, operands, operands + 1);
28305 gen_rtx_SET (VOIDmode,
28306 gen_rtx_MEM (SImode,
28307 gen_rtx_PRE_DEC (Pmode,
28308 stack_pointer_rtx)),
28311 gen_rtx_SET (VOIDmode,
28312 gen_rtx_MEM (SImode,
28313 gen_rtx_PRE_DEC (Pmode,
28314 stack_pointer_rtx)),
28319 /* Store HImodes as SImodes. */
28320 operand = gen_lowpart (SImode, operand);
28324 gen_rtx_SET (VOIDmode,
28325 gen_rtx_MEM (GET_MODE (operand),
28326 gen_rtx_PRE_DEC (SImode,
28327 stack_pointer_rtx)),
28331 gcc_unreachable ();
28333 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28338 /* Free operand from the memory. */
28340 ix86_free_from_memory (enum machine_mode mode)
28342 if (!ix86_using_red_zone ())
28346 if (mode == DImode || TARGET_64BIT)
28350 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28351 to pop or add instruction if registers are available. */
28352 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28353 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28358 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
28359 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
28361 static const reg_class_t *
28362 i386_ira_cover_classes (void)
28364 static const reg_class_t sse_fpmath_classes[] = {
28365 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
28367 static const reg_class_t no_sse_fpmath_classes[] = {
28368 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
28371 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
28374 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28376 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28377 QImode must go into class Q_REGS.
28378 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28379 movdf to do mem-to-mem moves through integer regs. */
28382 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28384 enum machine_mode mode = GET_MODE (x);
28386 /* We're only allowed to return a subclass of CLASS. Many of the
28387 following checks fail for NO_REGS, so eliminate that early. */
28388 if (regclass == NO_REGS)
28391 /* All classes can load zeros. */
28392 if (x == CONST0_RTX (mode))
28395 /* Force constants into memory if we are loading a (nonzero) constant into
28396 an MMX or SSE register. This is because there are no MMX/SSE instructions
28397 to load from a constant. */
28399 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28402 /* Prefer SSE regs only, if we can use them for math. */
28403 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28404 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28406 /* Floating-point constants need more complex checks. */
28407 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28409 /* General regs can load everything. */
28410 if (reg_class_subset_p (regclass, GENERAL_REGS))
28413 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28414 zero above. We only want to wind up preferring 80387 registers if
28415 we plan on doing computation with them. */
28417 && standard_80387_constant_p (x))
28419 /* Limit class to non-sse. */
28420 if (regclass == FLOAT_SSE_REGS)
28422 if (regclass == FP_TOP_SSE_REGS)
28424 if (regclass == FP_SECOND_SSE_REGS)
28425 return FP_SECOND_REG;
28426 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28433 /* Generally when we see PLUS here, it's the function invariant
28434 (plus soft-fp const_int). Which can only be computed into general
28436 if (GET_CODE (x) == PLUS)
28437 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28439 /* QImode constants are easy to load, but non-constant QImode data
28440 must go into Q_REGS. */
28441 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28443 if (reg_class_subset_p (regclass, Q_REGS))
28445 if (reg_class_subset_p (Q_REGS, regclass))
28453 /* Discourage putting floating-point values in SSE registers unless
28454 SSE math is being used, and likewise for the 387 registers. */
28456 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28458 enum machine_mode mode = GET_MODE (x);
28460 /* Restrict the output reload class to the register bank that we are doing
28461 math on. If we would like not to return a subset of CLASS, reject this
28462 alternative: if reload cannot do this, it will still use its choice. */
28463 mode = GET_MODE (x);
28464 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28465 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28467 if (X87_FLOAT_MODE_P (mode))
28469 if (regclass == FP_TOP_SSE_REGS)
28471 else if (regclass == FP_SECOND_SSE_REGS)
28472 return FP_SECOND_REG;
28474 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28481 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28482 enum machine_mode mode,
28483 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28485 /* QImode spills from non-QI registers require
28486 intermediate register on 32bit targets. */
28488 && !in_p && mode == QImode
28489 && (rclass == GENERAL_REGS
28490 || rclass == LEGACY_REGS
28491 || rclass == INDEX_REGS))
28500 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28501 regno = true_regnum (x);
28503 /* Return Q_REGS if the operand is in memory. */
28508 /* This condition handles corner case where an expression involving
28509 pointers gets vectorized. We're trying to use the address of a
28510 stack slot as a vector initializer.
28512 (set (reg:V2DI 74 [ vect_cst_.2 ])
28513 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
28515 Eventually frame gets turned into sp+offset like this:
28517 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28518 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28519 (const_int 392 [0x188]))))
28521 That later gets turned into:
28523 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28524 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
28525 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
28527 We'll have the following reload recorded:
28529 Reload 0: reload_in (DI) =
28530 (plus:DI (reg/f:DI 7 sp)
28531 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
28532 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28533 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
28534 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
28535 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
28536 reload_reg_rtx: (reg:V2DI 22 xmm1)
28538 Which isn't going to work since SSE instructions can't handle scalar
28539 additions. Returning GENERAL_REGS forces the addition into integer
28540 register and reload can handle subsequent reloads without problems. */
28542 if (in_p && GET_CODE (x) == PLUS
28543 && SSE_CLASS_P (rclass)
28544 && SCALAR_INT_MODE_P (mode))
28545 return GENERAL_REGS;
28550 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28553 ix86_class_likely_spilled_p (reg_class_t rclass)
28564 case SSE_FIRST_REG:
28566 case FP_SECOND_REG:
28576 /* If we are copying between general and FP registers, we need a memory
28577 location. The same is true for SSE and MMX registers.
28579 To optimize register_move_cost performance, allow inline variant.
28581 The macro can't work reliably when one of the CLASSES is class containing
28582 registers from multiple units (SSE, MMX, integer). We avoid this by never
28583 combining those units in single alternative in the machine description.
28584 Ensure that this constraint holds to avoid unexpected surprises.
28586 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28587 enforce these sanity checks. */
28590 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28591 enum machine_mode mode, int strict)
28593 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28594 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28595 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28596 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28597 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28598 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28600 gcc_assert (!strict);
28604 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28607 /* ??? This is a lie. We do have moves between mmx/general, and for
28608 mmx/sse2. But by saying we need secondary memory we discourage the
28609 register allocator from using the mmx registers unless needed. */
28610 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28613 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28615 /* SSE1 doesn't have any direct moves from other classes. */
28619 /* If the target says that inter-unit moves are more expensive
28620 than moving through memory, then don't generate them. */
28621 if (!TARGET_INTER_UNIT_MOVES)
28624 /* Between SSE and general, we have moves no larger than word size. */
28625 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28633 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28634 enum machine_mode mode, int strict)
28636 return inline_secondary_memory_needed (class1, class2, mode, strict);
28639 /* Return true if the registers in CLASS cannot represent the change from
28640 modes FROM to TO. */
28643 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28644 enum reg_class regclass)
28649 /* x87 registers can't do subreg at all, as all values are reformatted
28650 to extended precision. */
28651 if (MAYBE_FLOAT_CLASS_P (regclass))
28654 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28656 /* Vector registers do not support QI or HImode loads. If we don't
28657 disallow a change to these modes, reload will assume it's ok to
28658 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28659 the vec_dupv4hi pattern. */
28660 if (GET_MODE_SIZE (from) < 4)
28663 /* Vector registers do not support subreg with nonzero offsets, which
28664 are otherwise valid for integer registers. Since we can't see
28665 whether we have a nonzero offset from here, prohibit all
28666 nonparadoxical subregs changing size. */
28667 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28674 /* Return the cost of moving data of mode M between a
28675 register and memory. A value of 2 is the default; this cost is
28676 relative to those in `REGISTER_MOVE_COST'.
28678 This function is used extensively by register_move_cost that is used to
28679 build tables at startup. Make it inline in this case.
28680 When IN is 2, return maximum of in and out move cost.
28682 If moving between registers and memory is more expensive than
28683 between two registers, you should define this macro to express the
28686 Model also increased moving costs of QImode registers in non
28690 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28694 if (FLOAT_CLASS_P (regclass))
28712 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28713 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28715 if (SSE_CLASS_P (regclass))
28718 switch (GET_MODE_SIZE (mode))
28733 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28734 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28736 if (MMX_CLASS_P (regclass))
28739 switch (GET_MODE_SIZE (mode))
28751 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28752 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28754 switch (GET_MODE_SIZE (mode))
28757 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28760 return ix86_cost->int_store[0];
28761 if (TARGET_PARTIAL_REG_DEPENDENCY
28762 && optimize_function_for_speed_p (cfun))
28763 cost = ix86_cost->movzbl_load;
28765 cost = ix86_cost->int_load[0];
28767 return MAX (cost, ix86_cost->int_store[0]);
28773 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28775 return ix86_cost->movzbl_load;
28777 return ix86_cost->int_store[0] + 4;
28782 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28783 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28785 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28786 if (mode == TFmode)
28789 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28791 cost = ix86_cost->int_load[2];
28793 cost = ix86_cost->int_store[2];
28794 return (cost * (((int) GET_MODE_SIZE (mode)
28795 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28800 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28803 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28807 /* Return the cost of moving data from a register in class CLASS1 to
28808 one in class CLASS2.
28810 It is not required that the cost always equal 2 when FROM is the same as TO;
28811 on some machines it is expensive to move between registers if they are not
28812 general registers. */
28815 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28816 reg_class_t class2_i)
28818 enum reg_class class1 = (enum reg_class) class1_i;
28819 enum reg_class class2 = (enum reg_class) class2_i;
28821 /* In case we require secondary memory, compute cost of the store followed
28822 by load. In order to avoid bad register allocation choices, we need
28823 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28825 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28829 cost += inline_memory_move_cost (mode, class1, 2);
28830 cost += inline_memory_move_cost (mode, class2, 2);
28832 /* In case of copying from general_purpose_register we may emit multiple
28833 stores followed by single load causing memory size mismatch stall.
28834 Count this as arbitrarily high cost of 20. */
28835 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28838 /* In the case of FP/MMX moves, the registers actually overlap, and we
28839 have to switch modes in order to treat them differently. */
28840 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28841 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28847 /* Moves between SSE/MMX and integer unit are expensive. */
28848 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28849 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28851 /* ??? By keeping returned value relatively high, we limit the number
28852 of moves between integer and MMX/SSE registers for all targets.
28853 Additionally, high value prevents problem with x86_modes_tieable_p(),
28854 where integer modes in MMX/SSE registers are not tieable
28855 because of missing QImode and HImode moves to, from or between
28856 MMX/SSE registers. */
28857 return MAX (8, ix86_cost->mmxsse_to_integer);
28859 if (MAYBE_FLOAT_CLASS_P (class1))
28860 return ix86_cost->fp_move;
28861 if (MAYBE_SSE_CLASS_P (class1))
28862 return ix86_cost->sse_move;
28863 if (MAYBE_MMX_CLASS_P (class1))
28864 return ix86_cost->mmx_move;
28868 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28871 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28873 /* Flags and only flags can only hold CCmode values. */
28874 if (CC_REGNO_P (regno))
28875 return GET_MODE_CLASS (mode) == MODE_CC;
28876 if (GET_MODE_CLASS (mode) == MODE_CC
28877 || GET_MODE_CLASS (mode) == MODE_RANDOM
28878 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28880 if (FP_REGNO_P (regno))
28881 return VALID_FP_MODE_P (mode);
28882 if (SSE_REGNO_P (regno))
28884 /* We implement the move patterns for all vector modes into and
28885 out of SSE registers, even when no operation instructions
28886 are available. OImode move is available only when AVX is
28888 return ((TARGET_AVX && mode == OImode)
28889 || VALID_AVX256_REG_MODE (mode)
28890 || VALID_SSE_REG_MODE (mode)
28891 || VALID_SSE2_REG_MODE (mode)
28892 || VALID_MMX_REG_MODE (mode)
28893 || VALID_MMX_REG_MODE_3DNOW (mode));
28895 if (MMX_REGNO_P (regno))
28897 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28898 so if the register is available at all, then we can move data of
28899 the given mode into or out of it. */
28900 return (VALID_MMX_REG_MODE (mode)
28901 || VALID_MMX_REG_MODE_3DNOW (mode));
28904 if (mode == QImode)
28906 /* Take care for QImode values - they can be in non-QI regs,
28907 but then they do cause partial register stalls. */
28908 if (regno <= BX_REG || TARGET_64BIT)
28910 if (!TARGET_PARTIAL_REG_STALL)
28912 return reload_in_progress || reload_completed;
28914 /* We handle both integer and floats in the general purpose registers. */
28915 else if (VALID_INT_MODE_P (mode))
28917 else if (VALID_FP_MODE_P (mode))
28919 else if (VALID_DFP_MODE_P (mode))
28921 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28922 on to use that value in smaller contexts, this can easily force a
28923 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28924 supporting DImode, allow it. */
28925 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28931 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28932 tieable integer mode. */
28935 ix86_tieable_integer_mode_p (enum machine_mode mode)
28944 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28947 return TARGET_64BIT;
28954 /* Return true if MODE1 is accessible in a register that can hold MODE2
28955 without copying. That is, all register classes that can hold MODE2
28956 can also hold MODE1. */
28959 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28961 if (mode1 == mode2)
28964 if (ix86_tieable_integer_mode_p (mode1)
28965 && ix86_tieable_integer_mode_p (mode2))
28968 /* MODE2 being XFmode implies fp stack or general regs, which means we
28969 can tie any smaller floating point modes to it. Note that we do not
28970 tie this with TFmode. */
28971 if (mode2 == XFmode)
28972 return mode1 == SFmode || mode1 == DFmode;
28974 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28975 that we can tie it with SFmode. */
28976 if (mode2 == DFmode)
28977 return mode1 == SFmode;
28979 /* If MODE2 is only appropriate for an SSE register, then tie with
28980 any other mode acceptable to SSE registers. */
28981 if (GET_MODE_SIZE (mode2) == 16
28982 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28983 return (GET_MODE_SIZE (mode1) == 16
28984 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28986 /* If MODE2 is appropriate for an MMX register, then tie
28987 with any other mode acceptable to MMX registers. */
28988 if (GET_MODE_SIZE (mode2) == 8
28989 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28990 return (GET_MODE_SIZE (mode1) == 8
28991 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28996 /* Compute a (partial) cost for rtx X. Return true if the complete
28997 cost has been computed, and false if subexpressions should be
28998 scanned. In either case, *TOTAL contains the cost result. */
29001 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
29003 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
29004 enum machine_mode mode = GET_MODE (x);
29005 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
29013 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
29015 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
29017 else if (flag_pic && SYMBOLIC_CONST (x)
29019 || (!GET_CODE (x) != LABEL_REF
29020 && (GET_CODE (x) != SYMBOL_REF
29021 || !SYMBOL_REF_LOCAL_P (x)))))
29028 if (mode == VOIDmode)
29031 switch (standard_80387_constant_p (x))
29036 default: /* Other constants */
29041 /* Start with (MEM (SYMBOL_REF)), since that's where
29042 it'll probably end up. Add a penalty for size. */
29043 *total = (COSTS_N_INSNS (1)
29044 + (flag_pic != 0 && !TARGET_64BIT)
29045 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
29051 /* The zero extensions is often completely free on x86_64, so make
29052 it as cheap as possible. */
29053 if (TARGET_64BIT && mode == DImode
29054 && GET_MODE (XEXP (x, 0)) == SImode)
29056 else if (TARGET_ZERO_EXTEND_WITH_AND)
29057 *total = cost->add;
29059 *total = cost->movzx;
29063 *total = cost->movsx;
29067 if (CONST_INT_P (XEXP (x, 1))
29068 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
29070 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29073 *total = cost->add;
29076 if ((value == 2 || value == 3)
29077 && cost->lea <= cost->shift_const)
29079 *total = cost->lea;
29089 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
29091 if (CONST_INT_P (XEXP (x, 1)))
29093 if (INTVAL (XEXP (x, 1)) > 32)
29094 *total = cost->shift_const + COSTS_N_INSNS (2);
29096 *total = cost->shift_const * 2;
29100 if (GET_CODE (XEXP (x, 1)) == AND)
29101 *total = cost->shift_var * 2;
29103 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
29108 if (CONST_INT_P (XEXP (x, 1)))
29109 *total = cost->shift_const;
29111 *total = cost->shift_var;
29119 gcc_assert (FLOAT_MODE_P (mode));
29120 gcc_assert (TARGET_FMA || TARGET_FMA4);
29122 /* ??? SSE scalar/vector cost should be used here. */
29123 /* ??? Bald assumption that fma has the same cost as fmul. */
29124 *total = cost->fmul;
29125 *total += rtx_cost (XEXP (x, 1), FMA, speed);
29127 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
29129 if (GET_CODE (sub) == NEG)
29131 *total += rtx_cost (sub, FMA, speed);
29134 if (GET_CODE (sub) == NEG)
29136 *total += rtx_cost (sub, FMA, speed);
29141 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29143 /* ??? SSE scalar cost should be used here. */
29144 *total = cost->fmul;
29147 else if (X87_FLOAT_MODE_P (mode))
29149 *total = cost->fmul;
29152 else if (FLOAT_MODE_P (mode))
29154 /* ??? SSE vector cost should be used here. */
29155 *total = cost->fmul;
29160 rtx op0 = XEXP (x, 0);
29161 rtx op1 = XEXP (x, 1);
29163 if (CONST_INT_P (XEXP (x, 1)))
29165 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
29166 for (nbits = 0; value != 0; value &= value - 1)
29170 /* This is arbitrary. */
29173 /* Compute costs correctly for widening multiplication. */
29174 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
29175 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
29176 == GET_MODE_SIZE (mode))
29178 int is_mulwiden = 0;
29179 enum machine_mode inner_mode = GET_MODE (op0);
29181 if (GET_CODE (op0) == GET_CODE (op1))
29182 is_mulwiden = 1, op1 = XEXP (op1, 0);
29183 else if (CONST_INT_P (op1))
29185 if (GET_CODE (op0) == SIGN_EXTEND)
29186 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
29189 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
29193 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
29196 *total = (cost->mult_init[MODE_INDEX (mode)]
29197 + nbits * cost->mult_bit
29198 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
29207 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29208 /* ??? SSE cost should be used here. */
29209 *total = cost->fdiv;
29210 else if (X87_FLOAT_MODE_P (mode))
29211 *total = cost->fdiv;
29212 else if (FLOAT_MODE_P (mode))
29213 /* ??? SSE vector cost should be used here. */
29214 *total = cost->fdiv;
29216 *total = cost->divide[MODE_INDEX (mode)];
29220 if (GET_MODE_CLASS (mode) == MODE_INT
29221 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
29223 if (GET_CODE (XEXP (x, 0)) == PLUS
29224 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
29225 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
29226 && CONSTANT_P (XEXP (x, 1)))
29228 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
29229 if (val == 2 || val == 4 || val == 8)
29231 *total = cost->lea;
29232 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29233 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
29234 outer_code, speed);
29235 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29239 else if (GET_CODE (XEXP (x, 0)) == MULT
29240 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
29242 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
29243 if (val == 2 || val == 4 || val == 8)
29245 *total = cost->lea;
29246 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29247 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29251 else if (GET_CODE (XEXP (x, 0)) == PLUS)
29253 *total = cost->lea;
29254 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
29255 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
29256 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
29263 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29265 /* ??? SSE cost should be used here. */
29266 *total = cost->fadd;
29269 else if (X87_FLOAT_MODE_P (mode))
29271 *total = cost->fadd;
29274 else if (FLOAT_MODE_P (mode))
29276 /* ??? SSE vector cost should be used here. */
29277 *total = cost->fadd;
29285 if (!TARGET_64BIT && mode == DImode)
29287 *total = (cost->add * 2
29288 + (rtx_cost (XEXP (x, 0), outer_code, speed)
29289 << (GET_MODE (XEXP (x, 0)) != DImode))
29290 + (rtx_cost (XEXP (x, 1), outer_code, speed)
29291 << (GET_MODE (XEXP (x, 1)) != DImode)));
29297 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29299 /* ??? SSE cost should be used here. */
29300 *total = cost->fchs;
29303 else if (X87_FLOAT_MODE_P (mode))
29305 *total = cost->fchs;
29308 else if (FLOAT_MODE_P (mode))
29310 /* ??? SSE vector cost should be used here. */
29311 *total = cost->fchs;
29317 if (!TARGET_64BIT && mode == DImode)
29318 *total = cost->add * 2;
29320 *total = cost->add;
29324 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
29325 && XEXP (XEXP (x, 0), 1) == const1_rtx
29326 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
29327 && XEXP (x, 1) == const0_rtx)
29329 /* This kind of construct is implemented using test[bwl].
29330 Treat it as if we had an AND. */
29331 *total = (cost->add
29332 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
29333 + rtx_cost (const1_rtx, outer_code, speed));
29339 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
29344 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29345 /* ??? SSE cost should be used here. */
29346 *total = cost->fabs;
29347 else if (X87_FLOAT_MODE_P (mode))
29348 *total = cost->fabs;
29349 else if (FLOAT_MODE_P (mode))
29350 /* ??? SSE vector cost should be used here. */
29351 *total = cost->fabs;
29355 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29356 /* ??? SSE cost should be used here. */
29357 *total = cost->fsqrt;
29358 else if (X87_FLOAT_MODE_P (mode))
29359 *total = cost->fsqrt;
29360 else if (FLOAT_MODE_P (mode))
29361 /* ??? SSE vector cost should be used here. */
29362 *total = cost->fsqrt;
29366 if (XINT (x, 1) == UNSPEC_TP)
29373 case VEC_DUPLICATE:
29374 /* ??? Assume all of these vector manipulation patterns are
29375 recognizable. In which case they all pretty much have the
29377 *total = COSTS_N_INSNS (1);
29387 static int current_machopic_label_num;
29389 /* Given a symbol name and its associated stub, write out the
29390 definition of the stub. */
29393 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29395 unsigned int length;
29396 char *binder_name, *symbol_name, lazy_ptr_name[32];
29397 int label = ++current_machopic_label_num;
29399 /* For 64-bit we shouldn't get here. */
29400 gcc_assert (!TARGET_64BIT);
29402 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29403 symb = targetm.strip_name_encoding (symb);
29405 length = strlen (stub);
29406 binder_name = XALLOCAVEC (char, length + 32);
29407 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29409 length = strlen (symb);
29410 symbol_name = XALLOCAVEC (char, length + 32);
29411 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29413 sprintf (lazy_ptr_name, "L%d$lz", label);
29415 if (MACHOPIC_ATT_STUB)
29416 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29417 else if (MACHOPIC_PURE)
29419 if (TARGET_DEEP_BRANCH_PREDICTION)
29420 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29422 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29425 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29427 fprintf (file, "%s:\n", stub);
29428 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29430 if (MACHOPIC_ATT_STUB)
29432 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29434 else if (MACHOPIC_PURE)
29437 if (TARGET_DEEP_BRANCH_PREDICTION)
29439 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29440 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29441 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29442 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29446 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29447 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29448 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29450 fprintf (file, "\tjmp\t*%%ecx\n");
29453 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29455 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29456 it needs no stub-binding-helper. */
29457 if (MACHOPIC_ATT_STUB)
29460 fprintf (file, "%s:\n", binder_name);
29464 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29465 fprintf (file, "\tpushl\t%%ecx\n");
29468 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29470 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29472 /* N.B. Keep the correspondence of these
29473 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29474 old-pic/new-pic/non-pic stubs; altering this will break
29475 compatibility with existing dylibs. */
29479 if (TARGET_DEEP_BRANCH_PREDICTION)
29480 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29481 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29483 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29484 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29487 /* 16-byte -mdynamic-no-pic stub. */
29488 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29490 fprintf (file, "%s:\n", lazy_ptr_name);
29491 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29492 fprintf (file, ASM_LONG "%s\n", binder_name);
29494 #endif /* TARGET_MACHO */
29496 /* Order the registers for register allocator. */
29499 x86_order_regs_for_local_alloc (void)
29504 /* First allocate the local general purpose registers. */
29505 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29506 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29507 reg_alloc_order [pos++] = i;
29509 /* Global general purpose registers. */
29510 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29511 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29512 reg_alloc_order [pos++] = i;
29514 /* x87 registers come first in case we are doing FP math
29516 if (!TARGET_SSE_MATH)
29517 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29518 reg_alloc_order [pos++] = i;
29520 /* SSE registers. */
29521 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29522 reg_alloc_order [pos++] = i;
29523 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29524 reg_alloc_order [pos++] = i;
29526 /* x87 registers. */
29527 if (TARGET_SSE_MATH)
29528 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29529 reg_alloc_order [pos++] = i;
29531 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29532 reg_alloc_order [pos++] = i;
29534 /* Initialize the rest of array as we do not allocate some registers
29536 while (pos < FIRST_PSEUDO_REGISTER)
29537 reg_alloc_order [pos++] = 0;
29540 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29541 in struct attribute_spec handler. */
29543 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29545 int flags ATTRIBUTE_UNUSED,
29546 bool *no_add_attrs)
29548 if (TREE_CODE (*node) != FUNCTION_TYPE
29549 && TREE_CODE (*node) != METHOD_TYPE
29550 && TREE_CODE (*node) != FIELD_DECL
29551 && TREE_CODE (*node) != TYPE_DECL)
29553 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29555 *no_add_attrs = true;
29560 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29562 *no_add_attrs = true;
29565 if (is_attribute_p ("callee_pop_aggregate_return", name))
29569 cst = TREE_VALUE (args);
29570 if (TREE_CODE (cst) != INTEGER_CST)
29572 warning (OPT_Wattributes,
29573 "%qE attribute requires an integer constant argument",
29575 *no_add_attrs = true;
29577 else if (compare_tree_int (cst, 0) != 0
29578 && compare_tree_int (cst, 1) != 0)
29580 warning (OPT_Wattributes,
29581 "argument to %qE attribute is neither zero, nor one",
29583 *no_add_attrs = true;
29592 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29593 struct attribute_spec.handler. */
29595 ix86_handle_abi_attribute (tree *node, tree name,
29596 tree args ATTRIBUTE_UNUSED,
29597 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29599 if (TREE_CODE (*node) != FUNCTION_TYPE
29600 && TREE_CODE (*node) != METHOD_TYPE
29601 && TREE_CODE (*node) != FIELD_DECL
29602 && TREE_CODE (*node) != TYPE_DECL)
29604 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29606 *no_add_attrs = true;
29611 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29613 *no_add_attrs = true;
29617 /* Can combine regparm with all attributes but fastcall. */
29618 if (is_attribute_p ("ms_abi", name))
29620 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29622 error ("ms_abi and sysv_abi attributes are not compatible");
29627 else if (is_attribute_p ("sysv_abi", name))
29629 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29631 error ("ms_abi and sysv_abi attributes are not compatible");
29640 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29641 struct attribute_spec.handler. */
29643 ix86_handle_struct_attribute (tree *node, tree name,
29644 tree args ATTRIBUTE_UNUSED,
29645 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29648 if (DECL_P (*node))
29650 if (TREE_CODE (*node) == TYPE_DECL)
29651 type = &TREE_TYPE (*node);
29656 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29657 || TREE_CODE (*type) == UNION_TYPE)))
29659 warning (OPT_Wattributes, "%qE attribute ignored",
29661 *no_add_attrs = true;
29664 else if ((is_attribute_p ("ms_struct", name)
29665 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29666 || ((is_attribute_p ("gcc_struct", name)
29667 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29669 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29671 *no_add_attrs = true;
29678 ix86_handle_fndecl_attribute (tree *node, tree name,
29679 tree args ATTRIBUTE_UNUSED,
29680 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29682 if (TREE_CODE (*node) != FUNCTION_DECL)
29684 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29686 *no_add_attrs = true;
29692 ix86_ms_bitfield_layout_p (const_tree record_type)
29694 return ((TARGET_MS_BITFIELD_LAYOUT
29695 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29696 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29699 /* Returns an expression indicating where the this parameter is
29700 located on entry to the FUNCTION. */
29703 x86_this_parameter (tree function)
29705 tree type = TREE_TYPE (function);
29706 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29711 const int *parm_regs;
29713 if (ix86_function_type_abi (type) == MS_ABI)
29714 parm_regs = x86_64_ms_abi_int_parameter_registers;
29716 parm_regs = x86_64_int_parameter_registers;
29717 return gen_rtx_REG (DImode, parm_regs[aggr]);
29720 nregs = ix86_function_regparm (type, function);
29722 if (nregs > 0 && !stdarg_p (type))
29726 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
29727 regno = aggr ? DX_REG : CX_REG;
29728 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
29732 return gen_rtx_MEM (SImode,
29733 plus_constant (stack_pointer_rtx, 4));
29742 return gen_rtx_MEM (SImode,
29743 plus_constant (stack_pointer_rtx, 4));
29746 return gen_rtx_REG (SImode, regno);
29749 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29752 /* Determine whether x86_output_mi_thunk can succeed. */
29755 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29756 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29757 HOST_WIDE_INT vcall_offset, const_tree function)
29759 /* 64-bit can handle anything. */
29763 /* For 32-bit, everything's fine if we have one free register. */
29764 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29767 /* Need a free register for vcall_offset. */
29771 /* Need a free register for GOT references. */
29772 if (flag_pic && !targetm.binds_local_p (function))
29775 /* Otherwise ok. */
29779 /* Output the assembler code for a thunk function. THUNK_DECL is the
29780 declaration for the thunk function itself, FUNCTION is the decl for
29781 the target function. DELTA is an immediate constant offset to be
29782 added to THIS. If VCALL_OFFSET is nonzero, the word at
29783 *(*this + vcall_offset) should be added to THIS. */
29786 x86_output_mi_thunk (FILE *file,
29787 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29788 HOST_WIDE_INT vcall_offset, tree function)
29791 rtx this_param = x86_this_parameter (function);
29794 /* Make sure unwind info is emitted for the thunk if needed. */
29795 final_start_function (emit_barrier (), file, 1);
29797 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29798 pull it in now and let DELTA benefit. */
29799 if (REG_P (this_param))
29800 this_reg = this_param;
29801 else if (vcall_offset)
29803 /* Put the this parameter into %eax. */
29804 xops[0] = this_param;
29805 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29806 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29809 this_reg = NULL_RTX;
29811 /* Adjust the this parameter by a fixed constant. */
29814 xops[0] = GEN_INT (delta);
29815 xops[1] = this_reg ? this_reg : this_param;
29818 if (!x86_64_general_operand (xops[0], DImode))
29820 tmp = gen_rtx_REG (DImode, R10_REG);
29822 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29824 xops[1] = this_param;
29826 if (x86_maybe_negate_const_int (&xops[0], DImode))
29827 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29829 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29831 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29832 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29834 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29837 /* Adjust the this parameter by a value stored in the vtable. */
29841 tmp = gen_rtx_REG (DImode, R10_REG);
29844 int tmp_regno = CX_REG;
29845 if (lookup_attribute ("fastcall",
29846 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29847 || lookup_attribute ("thiscall",
29848 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29849 tmp_regno = AX_REG;
29850 tmp = gen_rtx_REG (SImode, tmp_regno);
29853 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29855 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29857 /* Adjust the this parameter. */
29858 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29859 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29861 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29862 xops[0] = GEN_INT (vcall_offset);
29864 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29865 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29867 xops[1] = this_reg;
29868 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29871 /* If necessary, drop THIS back to its stack slot. */
29872 if (this_reg && this_reg != this_param)
29874 xops[0] = this_reg;
29875 xops[1] = this_param;
29876 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29879 xops[0] = XEXP (DECL_RTL (function), 0);
29882 if (!flag_pic || targetm.binds_local_p (function)
29883 || DEFAULT_ABI == MS_ABI)
29884 output_asm_insn ("jmp\t%P0", xops);
29885 /* All thunks should be in the same object as their target,
29886 and thus binds_local_p should be true. */
29887 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29888 gcc_unreachable ();
29891 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29892 tmp = gen_rtx_CONST (Pmode, tmp);
29893 tmp = gen_rtx_MEM (QImode, tmp);
29895 output_asm_insn ("jmp\t%A0", xops);
29900 if (!flag_pic || targetm.binds_local_p (function))
29901 output_asm_insn ("jmp\t%P0", xops);
29906 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29907 if (TARGET_MACHO_BRANCH_ISLANDS)
29908 sym_ref = (gen_rtx_SYMBOL_REF
29910 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29911 tmp = gen_rtx_MEM (QImode, sym_ref);
29913 output_asm_insn ("jmp\t%0", xops);
29916 #endif /* TARGET_MACHO */
29918 tmp = gen_rtx_REG (SImode, CX_REG);
29919 output_set_got (tmp, NULL_RTX);
29922 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29923 output_asm_insn ("jmp\t{*}%1", xops);
29926 final_end_function ();
29930 x86_file_start (void)
29932 default_file_start ();
29934 darwin_file_start ();
29936 if (X86_FILE_START_VERSION_DIRECTIVE)
29937 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29938 if (X86_FILE_START_FLTUSED)
29939 fputs ("\t.global\t__fltused\n", asm_out_file);
29940 if (ix86_asm_dialect == ASM_INTEL)
29941 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29945 x86_field_alignment (tree field, int computed)
29947 enum machine_mode mode;
29948 tree type = TREE_TYPE (field);
29950 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29952 mode = TYPE_MODE (strip_array_types (type));
29953 if (mode == DFmode || mode == DCmode
29954 || GET_MODE_CLASS (mode) == MODE_INT
29955 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29956 return MIN (32, computed);
29960 /* Output assembler code to FILE to increment profiler label # LABELNO
29961 for profiling a function entry. */
29963 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29965 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29970 #ifndef NO_PROFILE_COUNTERS
29971 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29974 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29975 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29977 fprintf (file, "\tcall\t%s\n", mcount_name);
29981 #ifndef NO_PROFILE_COUNTERS
29982 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29985 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29989 #ifndef NO_PROFILE_COUNTERS
29990 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29993 fprintf (file, "\tcall\t%s\n", mcount_name);
29997 /* We don't have exact information about the insn sizes, but we may assume
29998 quite safely that we are informed about all 1 byte insns and memory
29999 address sizes. This is enough to eliminate unnecessary padding in
30003 min_insn_size (rtx insn)
30007 if (!INSN_P (insn) || !active_insn_p (insn))
30010 /* Discard alignments we've emit and jump instructions. */
30011 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
30012 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
30014 if (JUMP_TABLE_DATA_P (insn))
30017 /* Important case - calls are always 5 bytes.
30018 It is common to have many calls in the row. */
30020 && symbolic_reference_mentioned_p (PATTERN (insn))
30021 && !SIBLING_CALL_P (insn))
30023 len = get_attr_length (insn);
30027 /* For normal instructions we rely on get_attr_length being exact,
30028 with a few exceptions. */
30029 if (!JUMP_P (insn))
30031 enum attr_type type = get_attr_type (insn);
30036 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
30037 || asm_noperands (PATTERN (insn)) >= 0)
30044 /* Otherwise trust get_attr_length. */
30048 l = get_attr_length_address (insn);
30049 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
30058 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30060 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
30064 ix86_avoid_jump_mispredicts (void)
30066 rtx insn, start = get_insns ();
30067 int nbytes = 0, njumps = 0;
30070 /* Look for all minimal intervals of instructions containing 4 jumps.
30071 The intervals are bounded by START and INSN. NBYTES is the total
30072 size of instructions in the interval including INSN and not including
30073 START. When the NBYTES is smaller than 16 bytes, it is possible
30074 that the end of START and INSN ends up in the same 16byte page.
30076 The smallest offset in the page INSN can start is the case where START
30077 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
30078 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
30080 for (insn = start; insn; insn = NEXT_INSN (insn))
30084 if (LABEL_P (insn))
30086 int align = label_to_alignment (insn);
30087 int max_skip = label_to_max_skip (insn);
30091 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
30092 already in the current 16 byte page, because otherwise
30093 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
30094 bytes to reach 16 byte boundary. */
30096 || (align <= 3 && max_skip != (1 << align) - 1))
30099 fprintf (dump_file, "Label %i with max_skip %i\n",
30100 INSN_UID (insn), max_skip);
30103 while (nbytes + max_skip >= 16)
30105 start = NEXT_INSN (start);
30106 if ((JUMP_P (start)
30107 && GET_CODE (PATTERN (start)) != ADDR_VEC
30108 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30110 njumps--, isjump = 1;
30113 nbytes -= min_insn_size (start);
30119 min_size = min_insn_size (insn);
30120 nbytes += min_size;
30122 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
30123 INSN_UID (insn), min_size);
30125 && GET_CODE (PATTERN (insn)) != ADDR_VEC
30126 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
30134 start = NEXT_INSN (start);
30135 if ((JUMP_P (start)
30136 && GET_CODE (PATTERN (start)) != ADDR_VEC
30137 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
30139 njumps--, isjump = 1;
30142 nbytes -= min_insn_size (start);
30144 gcc_assert (njumps >= 0);
30146 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
30147 INSN_UID (start), INSN_UID (insn), nbytes);
30149 if (njumps == 3 && isjump && nbytes < 16)
30151 int padsize = 15 - nbytes + min_insn_size (insn);
30154 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
30155 INSN_UID (insn), padsize);
30156 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
30162 /* AMD Athlon works faster
30163 when RET is not destination of conditional jump or directly preceded
30164 by other jump instruction. We avoid the penalty by inserting NOP just
30165 before the RET instructions in such cases. */
30167 ix86_pad_returns (void)
30172 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30174 basic_block bb = e->src;
30175 rtx ret = BB_END (bb);
30177 bool replace = false;
30179 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
30180 || optimize_bb_for_size_p (bb))
30182 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
30183 if (active_insn_p (prev) || LABEL_P (prev))
30185 if (prev && LABEL_P (prev))
30190 FOR_EACH_EDGE (e, ei, bb->preds)
30191 if (EDGE_FREQUENCY (e) && e->src->index >= 0
30192 && !(e->flags & EDGE_FALLTHRU))
30197 prev = prev_active_insn (ret);
30199 && ((JUMP_P (prev) && any_condjump_p (prev))
30202 /* Empty functions get branch mispredict even when
30203 the jump destination is not visible to us. */
30204 if (!prev && !optimize_function_for_size_p (cfun))
30209 emit_jump_insn_before (gen_return_internal_long (), ret);
30215 /* Count the minimum number of instructions in BB. Return 4 if the
30216 number of instructions >= 4. */
30219 ix86_count_insn_bb (basic_block bb)
30222 int insn_count = 0;
30224 /* Count number of instructions in this block. Return 4 if the number
30225 of instructions >= 4. */
30226 FOR_BB_INSNS (bb, insn)
30228 /* Only happen in exit blocks. */
30230 && GET_CODE (PATTERN (insn)) == RETURN)
30233 if (NONDEBUG_INSN_P (insn)
30234 && GET_CODE (PATTERN (insn)) != USE
30235 && GET_CODE (PATTERN (insn)) != CLOBBER)
30238 if (insn_count >= 4)
30247 /* Count the minimum number of instructions in code path in BB.
30248 Return 4 if the number of instructions >= 4. */
30251 ix86_count_insn (basic_block bb)
30255 int min_prev_count;
30257 /* Only bother counting instructions along paths with no
30258 more than 2 basic blocks between entry and exit. Given
30259 that BB has an edge to exit, determine if a predecessor
30260 of BB has an edge from entry. If so, compute the number
30261 of instructions in the predecessor block. If there
30262 happen to be multiple such blocks, compute the minimum. */
30263 min_prev_count = 4;
30264 FOR_EACH_EDGE (e, ei, bb->preds)
30267 edge_iterator prev_ei;
30269 if (e->src == ENTRY_BLOCK_PTR)
30271 min_prev_count = 0;
30274 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
30276 if (prev_e->src == ENTRY_BLOCK_PTR)
30278 int count = ix86_count_insn_bb (e->src);
30279 if (count < min_prev_count)
30280 min_prev_count = count;
30286 if (min_prev_count < 4)
30287 min_prev_count += ix86_count_insn_bb (bb);
30289 return min_prev_count;
30292 /* Pad short funtion to 4 instructions. */
30295 ix86_pad_short_function (void)
30300 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
30302 rtx ret = BB_END (e->src);
30303 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
30305 int insn_count = ix86_count_insn (e->src);
30307 /* Pad short function. */
30308 if (insn_count < 4)
30312 /* Find epilogue. */
30315 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
30316 insn = PREV_INSN (insn);
30321 /* Two NOPs count as one instruction. */
30322 insn_count = 2 * (4 - insn_count);
30323 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
30329 /* Implement machine specific optimizations. We implement padding of returns
30330 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
30334 /* We are freeing block_for_insn in the toplev to keep compatibility
30335 with old MDEP_REORGS that are not CFG based. Recompute it now. */
30336 compute_bb_for_insn ();
30338 if (optimize && optimize_function_for_speed_p (cfun))
30340 if (TARGET_PAD_SHORT_FUNCTION)
30341 ix86_pad_short_function ();
30342 else if (TARGET_PAD_RETURNS)
30343 ix86_pad_returns ();
30344 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
30345 if (TARGET_FOUR_JUMP_LIMIT)
30346 ix86_avoid_jump_mispredicts ();
30350 /* Run the vzeroupper optimization if needed. */
30351 if (TARGET_VZEROUPPER)
30352 move_or_delete_vzeroupper ();
30355 /* Return nonzero when QImode register that must be represented via REX prefix
30358 x86_extended_QIreg_mentioned_p (rtx insn)
30361 extract_insn_cached (insn);
30362 for (i = 0; i < recog_data.n_operands; i++)
30363 if (REG_P (recog_data.operand[i])
30364 && REGNO (recog_data.operand[i]) > BX_REG)
30369 /* Return nonzero when P points to register encoded via REX prefix.
30370 Called via for_each_rtx. */
30372 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30374 unsigned int regno;
30377 regno = REGNO (*p);
30378 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30381 /* Return true when INSN mentions register that must be encoded using REX
30384 x86_extended_reg_mentioned_p (rtx insn)
30386 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30387 extended_reg_mentioned_1, NULL);
30390 /* If profitable, negate (without causing overflow) integer constant
30391 of mode MODE at location LOC. Return true in this case. */
30393 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30397 if (!CONST_INT_P (*loc))
30403 /* DImode x86_64 constants must fit in 32 bits. */
30404 gcc_assert (x86_64_immediate_operand (*loc, mode));
30415 gcc_unreachable ();
30418 /* Avoid overflows. */
30419 if (mode_signbit_p (mode, *loc))
30422 val = INTVAL (*loc);
30424 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30425 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30426 if ((val < 0 && val != -128)
30429 *loc = GEN_INT (-val);
30436 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30437 optabs would emit if we didn't have TFmode patterns. */
30440 x86_emit_floatuns (rtx operands[2])
30442 rtx neglab, donelab, i0, i1, f0, in, out;
30443 enum machine_mode mode, inmode;
30445 inmode = GET_MODE (operands[1]);
30446 gcc_assert (inmode == SImode || inmode == DImode);
30449 in = force_reg (inmode, operands[1]);
30450 mode = GET_MODE (out);
30451 neglab = gen_label_rtx ();
30452 donelab = gen_label_rtx ();
30453 f0 = gen_reg_rtx (mode);
30455 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30457 expand_float (out, in, 0);
30459 emit_jump_insn (gen_jump (donelab));
30462 emit_label (neglab);
30464 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30466 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30468 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30470 expand_float (f0, i0, 0);
30472 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30474 emit_label (donelab);
30477 /* AVX does not support 32-byte integer vector operations,
30478 thus the longest vector we are faced with is V16QImode. */
30479 #define MAX_VECT_LEN 16
30481 struct expand_vec_perm_d
30483 rtx target, op0, op1;
30484 unsigned char perm[MAX_VECT_LEN];
30485 enum machine_mode vmode;
30486 unsigned char nelt;
30490 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30491 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30493 /* Get a vector mode of the same size as the original but with elements
30494 twice as wide. This is only guaranteed to apply to integral vectors. */
30496 static inline enum machine_mode
30497 get_mode_wider_vector (enum machine_mode o)
30499 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30500 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30501 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30502 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30506 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30507 with all elements equal to VAR. Return true if successful. */
30510 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30511 rtx target, rtx val)
30534 /* First attempt to recognize VAL as-is. */
30535 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30536 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30537 if (recog_memoized (insn) < 0)
30540 /* If that fails, force VAL into a register. */
30543 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30544 seq = get_insns ();
30547 emit_insn_before (seq, insn);
30549 ok = recog_memoized (insn) >= 0;
30558 if (TARGET_SSE || TARGET_3DNOW_A)
30562 val = gen_lowpart (SImode, val);
30563 x = gen_rtx_TRUNCATE (HImode, val);
30564 x = gen_rtx_VEC_DUPLICATE (mode, x);
30565 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30578 struct expand_vec_perm_d dperm;
30582 memset (&dperm, 0, sizeof (dperm));
30583 dperm.target = target;
30584 dperm.vmode = mode;
30585 dperm.nelt = GET_MODE_NUNITS (mode);
30586 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30588 /* Extend to SImode using a paradoxical SUBREG. */
30589 tmp1 = gen_reg_rtx (SImode);
30590 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30592 /* Insert the SImode value as low element of a V4SImode vector. */
30593 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30594 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30596 ok = (expand_vec_perm_1 (&dperm)
30597 || expand_vec_perm_broadcast_1 (&dperm));
30609 /* Replicate the value once into the next wider mode and recurse. */
30611 enum machine_mode smode, wsmode, wvmode;
30614 smode = GET_MODE_INNER (mode);
30615 wvmode = get_mode_wider_vector (mode);
30616 wsmode = GET_MODE_INNER (wvmode);
30618 val = convert_modes (wsmode, smode, val, true);
30619 x = expand_simple_binop (wsmode, ASHIFT, val,
30620 GEN_INT (GET_MODE_BITSIZE (smode)),
30621 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30622 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30624 x = gen_lowpart (wvmode, target);
30625 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30633 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30634 rtx x = gen_reg_rtx (hvmode);
30636 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30639 x = gen_rtx_VEC_CONCAT (mode, x, x);
30640 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30649 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30650 whose ONE_VAR element is VAR, and other elements are zero. Return true
30654 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30655 rtx target, rtx var, int one_var)
30657 enum machine_mode vsimode;
30660 bool use_vector_set = false;
30665 /* For SSE4.1, we normally use vector set. But if the second
30666 element is zero and inter-unit moves are OK, we use movq
30668 use_vector_set = (TARGET_64BIT
30670 && !(TARGET_INTER_UNIT_MOVES
30676 use_vector_set = TARGET_SSE4_1;
30679 use_vector_set = TARGET_SSE2;
30682 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30689 use_vector_set = TARGET_AVX;
30692 /* Use ix86_expand_vector_set in 64bit mode only. */
30693 use_vector_set = TARGET_AVX && TARGET_64BIT;
30699 if (use_vector_set)
30701 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30702 var = force_reg (GET_MODE_INNER (mode), var);
30703 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30719 var = force_reg (GET_MODE_INNER (mode), var);
30720 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30721 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30726 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30727 new_target = gen_reg_rtx (mode);
30729 new_target = target;
30730 var = force_reg (GET_MODE_INNER (mode), var);
30731 x = gen_rtx_VEC_DUPLICATE (mode, var);
30732 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30733 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30736 /* We need to shuffle the value to the correct position, so
30737 create a new pseudo to store the intermediate result. */
30739 /* With SSE2, we can use the integer shuffle insns. */
30740 if (mode != V4SFmode && TARGET_SSE2)
30742 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30744 GEN_INT (one_var == 1 ? 0 : 1),
30745 GEN_INT (one_var == 2 ? 0 : 1),
30746 GEN_INT (one_var == 3 ? 0 : 1)));
30747 if (target != new_target)
30748 emit_move_insn (target, new_target);
30752 /* Otherwise convert the intermediate result to V4SFmode and
30753 use the SSE1 shuffle instructions. */
30754 if (mode != V4SFmode)
30756 tmp = gen_reg_rtx (V4SFmode);
30757 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30762 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30764 GEN_INT (one_var == 1 ? 0 : 1),
30765 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30766 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30768 if (mode != V4SFmode)
30769 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30770 else if (tmp != target)
30771 emit_move_insn (target, tmp);
30773 else if (target != new_target)
30774 emit_move_insn (target, new_target);
30779 vsimode = V4SImode;
30785 vsimode = V2SImode;
30791 /* Zero extend the variable element to SImode and recurse. */
30792 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30794 x = gen_reg_rtx (vsimode);
30795 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30797 gcc_unreachable ();
30799 emit_move_insn (target, gen_lowpart (mode, x));
30807 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30808 consisting of the values in VALS. It is known that all elements
30809 except ONE_VAR are constants. Return true if successful. */
30812 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30813 rtx target, rtx vals, int one_var)
30815 rtx var = XVECEXP (vals, 0, one_var);
30816 enum machine_mode wmode;
30819 const_vec = copy_rtx (vals);
30820 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30821 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30829 /* For the two element vectors, it's just as easy to use
30830 the general case. */
30834 /* Use ix86_expand_vector_set in 64bit mode only. */
30857 /* There's no way to set one QImode entry easily. Combine
30858 the variable value with its adjacent constant value, and
30859 promote to an HImode set. */
30860 x = XVECEXP (vals, 0, one_var ^ 1);
30863 var = convert_modes (HImode, QImode, var, true);
30864 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30865 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30866 x = GEN_INT (INTVAL (x) & 0xff);
30870 var = convert_modes (HImode, QImode, var, true);
30871 x = gen_int_mode (INTVAL (x) << 8, HImode);
30873 if (x != const0_rtx)
30874 var = expand_simple_binop (HImode, IOR, var, x, var,
30875 1, OPTAB_LIB_WIDEN);
30877 x = gen_reg_rtx (wmode);
30878 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30879 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30881 emit_move_insn (target, gen_lowpart (mode, x));
30888 emit_move_insn (target, const_vec);
30889 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30893 /* A subroutine of ix86_expand_vector_init_general. Use vector
30894 concatenate to handle the most general case: all values variable,
30895 and none identical. */
30898 ix86_expand_vector_init_concat (enum machine_mode mode,
30899 rtx target, rtx *ops, int n)
30901 enum machine_mode cmode, hmode = VOIDmode;
30902 rtx first[8], second[4];
30942 gcc_unreachable ();
30945 if (!register_operand (ops[1], cmode))
30946 ops[1] = force_reg (cmode, ops[1]);
30947 if (!register_operand (ops[0], cmode))
30948 ops[0] = force_reg (cmode, ops[0]);
30949 emit_insn (gen_rtx_SET (VOIDmode, target,
30950 gen_rtx_VEC_CONCAT (mode, ops[0],
30970 gcc_unreachable ();
30986 gcc_unreachable ();
30991 /* FIXME: We process inputs backward to help RA. PR 36222. */
30994 for (; i > 0; i -= 2, j--)
30996 first[j] = gen_reg_rtx (cmode);
30997 v = gen_rtvec (2, ops[i - 1], ops[i]);
30998 ix86_expand_vector_init (false, first[j],
30999 gen_rtx_PARALLEL (cmode, v));
31005 gcc_assert (hmode != VOIDmode);
31006 for (i = j = 0; i < n; i += 2, j++)
31008 second[j] = gen_reg_rtx (hmode);
31009 ix86_expand_vector_init_concat (hmode, second [j],
31013 ix86_expand_vector_init_concat (mode, target, second, n);
31016 ix86_expand_vector_init_concat (mode, target, first, n);
31020 gcc_unreachable ();
31024 /* A subroutine of ix86_expand_vector_init_general. Use vector
31025 interleave to handle the most general case: all values variable,
31026 and none identical. */
31029 ix86_expand_vector_init_interleave (enum machine_mode mode,
31030 rtx target, rtx *ops, int n)
31032 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
31035 rtx (*gen_load_even) (rtx, rtx, rtx);
31036 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
31037 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
31042 gen_load_even = gen_vec_setv8hi;
31043 gen_interleave_first_low = gen_vec_interleave_lowv4si;
31044 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31045 inner_mode = HImode;
31046 first_imode = V4SImode;
31047 second_imode = V2DImode;
31048 third_imode = VOIDmode;
31051 gen_load_even = gen_vec_setv16qi;
31052 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
31053 gen_interleave_second_low = gen_vec_interleave_lowv4si;
31054 inner_mode = QImode;
31055 first_imode = V8HImode;
31056 second_imode = V4SImode;
31057 third_imode = V2DImode;
31060 gcc_unreachable ();
31063 for (i = 0; i < n; i++)
31065 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
31066 op0 = gen_reg_rtx (SImode);
31067 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
31069 /* Insert the SImode value as low element of V4SImode vector. */
31070 op1 = gen_reg_rtx (V4SImode);
31071 op0 = gen_rtx_VEC_MERGE (V4SImode,
31072 gen_rtx_VEC_DUPLICATE (V4SImode,
31074 CONST0_RTX (V4SImode),
31076 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
31078 /* Cast the V4SImode vector back to a vector in orignal mode. */
31079 op0 = gen_reg_rtx (mode);
31080 emit_move_insn (op0, gen_lowpart (mode, op1));
31082 /* Load even elements into the second positon. */
31083 emit_insn (gen_load_even (op0,
31084 force_reg (inner_mode,
31088 /* Cast vector to FIRST_IMODE vector. */
31089 ops[i] = gen_reg_rtx (first_imode);
31090 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
31093 /* Interleave low FIRST_IMODE vectors. */
31094 for (i = j = 0; i < n; i += 2, j++)
31096 op0 = gen_reg_rtx (first_imode);
31097 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
31099 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
31100 ops[j] = gen_reg_rtx (second_imode);
31101 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
31104 /* Interleave low SECOND_IMODE vectors. */
31105 switch (second_imode)
31108 for (i = j = 0; i < n / 2; i += 2, j++)
31110 op0 = gen_reg_rtx (second_imode);
31111 emit_insn (gen_interleave_second_low (op0, ops[i],
31114 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
31116 ops[j] = gen_reg_rtx (third_imode);
31117 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
31119 second_imode = V2DImode;
31120 gen_interleave_second_low = gen_vec_interleave_lowv2di;
31124 op0 = gen_reg_rtx (second_imode);
31125 emit_insn (gen_interleave_second_low (op0, ops[0],
31128 /* Cast the SECOND_IMODE vector back to a vector on original
31130 emit_insn (gen_rtx_SET (VOIDmode, target,
31131 gen_lowpart (mode, op0)));
31135 gcc_unreachable ();
31139 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
31140 all values variable, and none identical. */
31143 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
31144 rtx target, rtx vals)
31146 rtx ops[32], op0, op1;
31147 enum machine_mode half_mode = VOIDmode;
31154 if (!mmx_ok && !TARGET_SSE)
31166 n = GET_MODE_NUNITS (mode);
31167 for (i = 0; i < n; i++)
31168 ops[i] = XVECEXP (vals, 0, i);
31169 ix86_expand_vector_init_concat (mode, target, ops, n);
31173 half_mode = V16QImode;
31177 half_mode = V8HImode;
31181 n = GET_MODE_NUNITS (mode);
31182 for (i = 0; i < n; i++)
31183 ops[i] = XVECEXP (vals, 0, i);
31184 op0 = gen_reg_rtx (half_mode);
31185 op1 = gen_reg_rtx (half_mode);
31186 ix86_expand_vector_init_interleave (half_mode, op0, ops,
31188 ix86_expand_vector_init_interleave (half_mode, op1,
31189 &ops [n >> 1], n >> 2);
31190 emit_insn (gen_rtx_SET (VOIDmode, target,
31191 gen_rtx_VEC_CONCAT (mode, op0, op1)));
31195 if (!TARGET_SSE4_1)
31203 /* Don't use ix86_expand_vector_init_interleave if we can't
31204 move from GPR to SSE register directly. */
31205 if (!TARGET_INTER_UNIT_MOVES)
31208 n = GET_MODE_NUNITS (mode);
31209 for (i = 0; i < n; i++)
31210 ops[i] = XVECEXP (vals, 0, i);
31211 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
31219 gcc_unreachable ();
31223 int i, j, n_elts, n_words, n_elt_per_word;
31224 enum machine_mode inner_mode;
31225 rtx words[4], shift;
31227 inner_mode = GET_MODE_INNER (mode);
31228 n_elts = GET_MODE_NUNITS (mode);
31229 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
31230 n_elt_per_word = n_elts / n_words;
31231 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
31233 for (i = 0; i < n_words; ++i)
31235 rtx word = NULL_RTX;
31237 for (j = 0; j < n_elt_per_word; ++j)
31239 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
31240 elt = convert_modes (word_mode, inner_mode, elt, true);
31246 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
31247 word, 1, OPTAB_LIB_WIDEN);
31248 word = expand_simple_binop (word_mode, IOR, word, elt,
31249 word, 1, OPTAB_LIB_WIDEN);
31257 emit_move_insn (target, gen_lowpart (mode, words[0]));
31258 else if (n_words == 2)
31260 rtx tmp = gen_reg_rtx (mode);
31261 emit_clobber (tmp);
31262 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
31263 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
31264 emit_move_insn (target, tmp);
31266 else if (n_words == 4)
31268 rtx tmp = gen_reg_rtx (V4SImode);
31269 gcc_assert (word_mode == SImode);
31270 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
31271 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
31272 emit_move_insn (target, gen_lowpart (mode, tmp));
31275 gcc_unreachable ();
31279 /* Initialize vector TARGET via VALS. Suppress the use of MMX
31280 instructions unless MMX_OK is true. */
31283 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
31285 enum machine_mode mode = GET_MODE (target);
31286 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31287 int n_elts = GET_MODE_NUNITS (mode);
31288 int n_var = 0, one_var = -1;
31289 bool all_same = true, all_const_zero = true;
31293 for (i = 0; i < n_elts; ++i)
31295 x = XVECEXP (vals, 0, i);
31296 if (!(CONST_INT_P (x)
31297 || GET_CODE (x) == CONST_DOUBLE
31298 || GET_CODE (x) == CONST_FIXED))
31299 n_var++, one_var = i;
31300 else if (x != CONST0_RTX (inner_mode))
31301 all_const_zero = false;
31302 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
31306 /* Constants are best loaded from the constant pool. */
31309 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
31313 /* If all values are identical, broadcast the value. */
31315 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
31316 XVECEXP (vals, 0, 0)))
31319 /* Values where only one field is non-constant are best loaded from
31320 the pool and overwritten via move later. */
31324 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
31325 XVECEXP (vals, 0, one_var),
31329 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
31333 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
31337 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
31339 enum machine_mode mode = GET_MODE (target);
31340 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31341 enum machine_mode half_mode;
31342 bool use_vec_merge = false;
31344 static rtx (*gen_extract[6][2]) (rtx, rtx)
31346 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
31347 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
31348 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
31349 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
31350 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31351 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31353 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31355 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31356 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31357 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31358 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31359 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31360 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31370 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31371 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31373 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31375 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31376 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31382 use_vec_merge = TARGET_SSE4_1;
31390 /* For the two element vectors, we implement a VEC_CONCAT with
31391 the extraction of the other element. */
31393 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31394 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31397 op0 = val, op1 = tmp;
31399 op0 = tmp, op1 = val;
31401 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31402 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31407 use_vec_merge = TARGET_SSE4_1;
31414 use_vec_merge = true;
31418 /* tmp = target = A B C D */
31419 tmp = copy_to_reg (target);
31420 /* target = A A B B */
31421 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31422 /* target = X A B B */
31423 ix86_expand_vector_set (false, target, val, 0);
31424 /* target = A X C D */
31425 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31426 const1_rtx, const0_rtx,
31427 GEN_INT (2+4), GEN_INT (3+4)));
31431 /* tmp = target = A B C D */
31432 tmp = copy_to_reg (target);
31433 /* tmp = X B C D */
31434 ix86_expand_vector_set (false, tmp, val, 0);
31435 /* target = A B X D */
31436 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31437 const0_rtx, const1_rtx,
31438 GEN_INT (0+4), GEN_INT (3+4)));
31442 /* tmp = target = A B C D */
31443 tmp = copy_to_reg (target);
31444 /* tmp = X B C D */
31445 ix86_expand_vector_set (false, tmp, val, 0);
31446 /* target = A B X D */
31447 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31448 const0_rtx, const1_rtx,
31449 GEN_INT (2+4), GEN_INT (0+4)));
31453 gcc_unreachable ();
31458 use_vec_merge = TARGET_SSE4_1;
31462 /* Element 0 handled by vec_merge below. */
31465 use_vec_merge = true;
31471 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31472 store into element 0, then shuffle them back. */
31476 order[0] = GEN_INT (elt);
31477 order[1] = const1_rtx;
31478 order[2] = const2_rtx;
31479 order[3] = GEN_INT (3);
31480 order[elt] = const0_rtx;
31482 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31483 order[1], order[2], order[3]));
31485 ix86_expand_vector_set (false, target, val, 0);
31487 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31488 order[1], order[2], order[3]));
31492 /* For SSE1, we have to reuse the V4SF code. */
31493 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31494 gen_lowpart (SFmode, val), elt);
31499 use_vec_merge = TARGET_SSE2;
31502 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31506 use_vec_merge = TARGET_SSE4_1;
31513 half_mode = V16QImode;
31519 half_mode = V8HImode;
31525 half_mode = V4SImode;
31531 half_mode = V2DImode;
31537 half_mode = V4SFmode;
31543 half_mode = V2DFmode;
31549 /* Compute offset. */
31553 gcc_assert (i <= 1);
31555 /* Extract the half. */
31556 tmp = gen_reg_rtx (half_mode);
31557 emit_insn (gen_extract[j][i] (tmp, target));
31559 /* Put val in tmp at elt. */
31560 ix86_expand_vector_set (false, tmp, val, elt);
31563 emit_insn (gen_insert[j][i] (target, target, tmp));
31572 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31573 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31574 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31578 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31580 emit_move_insn (mem, target);
31582 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31583 emit_move_insn (tmp, val);
31585 emit_move_insn (target, mem);
31590 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31592 enum machine_mode mode = GET_MODE (vec);
31593 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31594 bool use_vec_extr = false;
31607 use_vec_extr = true;
31611 use_vec_extr = TARGET_SSE4_1;
31623 tmp = gen_reg_rtx (mode);
31624 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31625 GEN_INT (elt), GEN_INT (elt),
31626 GEN_INT (elt+4), GEN_INT (elt+4)));
31630 tmp = gen_reg_rtx (mode);
31631 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31635 gcc_unreachable ();
31638 use_vec_extr = true;
31643 use_vec_extr = TARGET_SSE4_1;
31657 tmp = gen_reg_rtx (mode);
31658 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31659 GEN_INT (elt), GEN_INT (elt),
31660 GEN_INT (elt), GEN_INT (elt)));
31664 tmp = gen_reg_rtx (mode);
31665 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31669 gcc_unreachable ();
31672 use_vec_extr = true;
31677 /* For SSE1, we have to reuse the V4SF code. */
31678 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31679 gen_lowpart (V4SFmode, vec), elt);
31685 use_vec_extr = TARGET_SSE2;
31688 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31692 use_vec_extr = TARGET_SSE4_1;
31696 /* ??? Could extract the appropriate HImode element and shift. */
31703 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31704 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31706 /* Let the rtl optimizers know about the zero extension performed. */
31707 if (inner_mode == QImode || inner_mode == HImode)
31709 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31710 target = gen_lowpart (SImode, target);
31713 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31717 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31719 emit_move_insn (mem, vec);
31721 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31722 emit_move_insn (target, tmp);
31726 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31727 pattern to reduce; DEST is the destination; IN is the input vector. */
31730 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31732 rtx tmp1, tmp2, tmp3;
31734 tmp1 = gen_reg_rtx (V4SFmode);
31735 tmp2 = gen_reg_rtx (V4SFmode);
31736 tmp3 = gen_reg_rtx (V4SFmode);
31738 emit_insn (gen_sse_movhlps (tmp1, in, in));
31739 emit_insn (fn (tmp2, tmp1, in));
31741 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31742 const1_rtx, const1_rtx,
31743 GEN_INT (1+4), GEN_INT (1+4)));
31744 emit_insn (fn (dest, tmp2, tmp3));
31747 /* Target hook for scalar_mode_supported_p. */
31749 ix86_scalar_mode_supported_p (enum machine_mode mode)
31751 if (DECIMAL_FLOAT_MODE_P (mode))
31752 return default_decimal_float_supported_p ();
31753 else if (mode == TFmode)
31756 return default_scalar_mode_supported_p (mode);
31759 /* Implements target hook vector_mode_supported_p. */
31761 ix86_vector_mode_supported_p (enum machine_mode mode)
31763 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31765 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31767 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31769 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31771 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31776 /* Target hook for c_mode_for_suffix. */
31777 static enum machine_mode
31778 ix86_c_mode_for_suffix (char suffix)
31788 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31790 We do this in the new i386 backend to maintain source compatibility
31791 with the old cc0-based compiler. */
31794 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31795 tree inputs ATTRIBUTE_UNUSED,
31798 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31800 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31805 /* Implements target vector targetm.asm.encode_section_info. This
31806 is not used by netware. */
31808 static void ATTRIBUTE_UNUSED
31809 ix86_encode_section_info (tree decl, rtx rtl, int first)
31811 default_encode_section_info (decl, rtl, first);
31813 if (TREE_CODE (decl) == VAR_DECL
31814 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31815 && ix86_in_large_data_p (decl))
31816 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31819 /* Worker function for REVERSE_CONDITION. */
31822 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31824 return (mode != CCFPmode && mode != CCFPUmode
31825 ? reverse_condition (code)
31826 : reverse_condition_maybe_unordered (code));
31829 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31833 output_387_reg_move (rtx insn, rtx *operands)
31835 if (REG_P (operands[0]))
31837 if (REG_P (operands[1])
31838 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31840 if (REGNO (operands[0]) == FIRST_STACK_REG)
31841 return output_387_ffreep (operands, 0);
31842 return "fstp\t%y0";
31844 if (STACK_TOP_P (operands[0]))
31845 return "fld%Z1\t%y1";
31848 else if (MEM_P (operands[0]))
31850 gcc_assert (REG_P (operands[1]));
31851 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31852 return "fstp%Z0\t%y0";
31855 /* There is no non-popping store to memory for XFmode.
31856 So if we need one, follow the store with a load. */
31857 if (GET_MODE (operands[0]) == XFmode)
31858 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31860 return "fst%Z0\t%y0";
31867 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31868 FP status register is set. */
31871 ix86_emit_fp_unordered_jump (rtx label)
31873 rtx reg = gen_reg_rtx (HImode);
31876 emit_insn (gen_x86_fnstsw_1 (reg));
31878 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31880 emit_insn (gen_x86_sahf_1 (reg));
31882 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31883 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31887 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31889 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31890 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31893 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31894 gen_rtx_LABEL_REF (VOIDmode, label),
31896 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31898 emit_jump_insn (temp);
31899 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31902 /* Output code to perform a log1p XFmode calculation. */
31904 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31906 rtx label1 = gen_label_rtx ();
31907 rtx label2 = gen_label_rtx ();
31909 rtx tmp = gen_reg_rtx (XFmode);
31910 rtx tmp2 = gen_reg_rtx (XFmode);
31913 emit_insn (gen_absxf2 (tmp, op1));
31914 test = gen_rtx_GE (VOIDmode, tmp,
31915 CONST_DOUBLE_FROM_REAL_VALUE (
31916 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31918 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31920 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31921 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31922 emit_jump (label2);
31924 emit_label (label1);
31925 emit_move_insn (tmp, CONST1_RTX (XFmode));
31926 emit_insn (gen_addxf3 (tmp, op1, tmp));
31927 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31928 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31930 emit_label (label2);
31933 /* Output code to perform a Newton-Rhapson approximation of a single precision
31934 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31936 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31938 rtx x0, x1, e0, e1;
31940 x0 = gen_reg_rtx (mode);
31941 e0 = gen_reg_rtx (mode);
31942 e1 = gen_reg_rtx (mode);
31943 x1 = gen_reg_rtx (mode);
31945 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
31947 /* x0 = rcp(b) estimate */
31948 emit_insn (gen_rtx_SET (VOIDmode, x0,
31949 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31952 emit_insn (gen_rtx_SET (VOIDmode, e0,
31953 gen_rtx_MULT (mode, x0, b)));
31956 emit_insn (gen_rtx_SET (VOIDmode, e0,
31957 gen_rtx_MULT (mode, x0, e0)));
31960 emit_insn (gen_rtx_SET (VOIDmode, e1,
31961 gen_rtx_PLUS (mode, x0, x0)));
31964 emit_insn (gen_rtx_SET (VOIDmode, x1,
31965 gen_rtx_MINUS (mode, e1, e0)));
31968 emit_insn (gen_rtx_SET (VOIDmode, res,
31969 gen_rtx_MULT (mode, a, x1)));
31972 /* Output code to perform a Newton-Rhapson approximation of a
31973 single precision floating point [reciprocal] square root. */
31975 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31978 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31981 x0 = gen_reg_rtx (mode);
31982 e0 = gen_reg_rtx (mode);
31983 e1 = gen_reg_rtx (mode);
31984 e2 = gen_reg_rtx (mode);
31985 e3 = gen_reg_rtx (mode);
31987 real_from_integer (&r, VOIDmode, -3, -1, 0);
31988 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31990 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31991 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31993 if (VECTOR_MODE_P (mode))
31995 mthree = ix86_build_const_vector (mode, true, mthree);
31996 mhalf = ix86_build_const_vector (mode, true, mhalf);
31999 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
32000 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
32002 /* x0 = rsqrt(a) estimate */
32003 emit_insn (gen_rtx_SET (VOIDmode, x0,
32004 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
32007 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
32012 zero = gen_reg_rtx (mode);
32013 mask = gen_reg_rtx (mode);
32015 zero = force_reg (mode, CONST0_RTX(mode));
32016 emit_insn (gen_rtx_SET (VOIDmode, mask,
32017 gen_rtx_NE (mode, zero, a)));
32019 emit_insn (gen_rtx_SET (VOIDmode, x0,
32020 gen_rtx_AND (mode, x0, mask)));
32024 emit_insn (gen_rtx_SET (VOIDmode, e0,
32025 gen_rtx_MULT (mode, x0, a)));
32027 emit_insn (gen_rtx_SET (VOIDmode, e1,
32028 gen_rtx_MULT (mode, e0, x0)));
32031 mthree = force_reg (mode, mthree);
32032 emit_insn (gen_rtx_SET (VOIDmode, e2,
32033 gen_rtx_PLUS (mode, e1, mthree)));
32035 mhalf = force_reg (mode, mhalf);
32037 /* e3 = -.5 * x0 */
32038 emit_insn (gen_rtx_SET (VOIDmode, e3,
32039 gen_rtx_MULT (mode, x0, mhalf)));
32041 /* e3 = -.5 * e0 */
32042 emit_insn (gen_rtx_SET (VOIDmode, e3,
32043 gen_rtx_MULT (mode, e0, mhalf)));
32044 /* ret = e2 * e3 */
32045 emit_insn (gen_rtx_SET (VOIDmode, res,
32046 gen_rtx_MULT (mode, e2, e3)));
32049 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
32051 static void ATTRIBUTE_UNUSED
32052 i386_solaris_elf_named_section (const char *name, unsigned int flags,
32055 /* With Binutils 2.15, the "@unwind" marker must be specified on
32056 every occurrence of the ".eh_frame" section, not just the first
32059 && strcmp (name, ".eh_frame") == 0)
32061 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
32062 flags & SECTION_WRITE ? "aw" : "a");
32065 default_elf_asm_named_section (name, flags, decl);
32068 /* Return the mangling of TYPE if it is an extended fundamental type. */
32070 static const char *
32071 ix86_mangle_type (const_tree type)
32073 type = TYPE_MAIN_VARIANT (type);
32075 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32076 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32079 switch (TYPE_MODE (type))
32082 /* __float128 is "g". */
32085 /* "long double" or __float80 is "e". */
32092 /* For 32-bit code we can save PIC register setup by using
32093 __stack_chk_fail_local hidden function instead of calling
32094 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
32095 register, so it is better to call __stack_chk_fail directly. */
32098 ix86_stack_protect_fail (void)
32100 return TARGET_64BIT
32101 ? default_external_stack_protect_fail ()
32102 : default_hidden_stack_protect_fail ();
32105 /* Select a format to encode pointers in exception handling data. CODE
32106 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
32107 true if the symbol may be affected by dynamic relocations.
32109 ??? All x86 object file formats are capable of representing this.
32110 After all, the relocation needed is the same as for the call insn.
32111 Whether or not a particular assembler allows us to enter such, I
32112 guess we'll have to see. */
32114 asm_preferred_eh_data_format (int code, int global)
32118 int type = DW_EH_PE_sdata8;
32120 || ix86_cmodel == CM_SMALL_PIC
32121 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
32122 type = DW_EH_PE_sdata4;
32123 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
32125 if (ix86_cmodel == CM_SMALL
32126 || (ix86_cmodel == CM_MEDIUM && code))
32127 return DW_EH_PE_udata4;
32128 return DW_EH_PE_absptr;
32131 /* Expand copysign from SIGN to the positive value ABS_VALUE
32132 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
32135 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
32137 enum machine_mode mode = GET_MODE (sign);
32138 rtx sgn = gen_reg_rtx (mode);
32139 if (mask == NULL_RTX)
32141 enum machine_mode vmode;
32143 if (mode == SFmode)
32145 else if (mode == DFmode)
32150 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
32151 if (!VECTOR_MODE_P (mode))
32153 /* We need to generate a scalar mode mask in this case. */
32154 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32155 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32156 mask = gen_reg_rtx (mode);
32157 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32161 mask = gen_rtx_NOT (mode, mask);
32162 emit_insn (gen_rtx_SET (VOIDmode, sgn,
32163 gen_rtx_AND (mode, mask, sign)));
32164 emit_insn (gen_rtx_SET (VOIDmode, result,
32165 gen_rtx_IOR (mode, abs_value, sgn)));
32168 /* Expand fabs (OP0) and return a new rtx that holds the result. The
32169 mask for masking out the sign-bit is stored in *SMASK, if that is
32172 ix86_expand_sse_fabs (rtx op0, rtx *smask)
32174 enum machine_mode vmode, mode = GET_MODE (op0);
32177 xa = gen_reg_rtx (mode);
32178 if (mode == SFmode)
32180 else if (mode == DFmode)
32184 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
32185 if (!VECTOR_MODE_P (mode))
32187 /* We need to generate a scalar mode mask in this case. */
32188 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
32189 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
32190 mask = gen_reg_rtx (mode);
32191 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
32193 emit_insn (gen_rtx_SET (VOIDmode, xa,
32194 gen_rtx_AND (mode, op0, mask)));
32202 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
32203 swapping the operands if SWAP_OPERANDS is true. The expanded
32204 code is a forward jump to a newly created label in case the
32205 comparison is true. The generated label rtx is returned. */
32207 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
32208 bool swap_operands)
32219 label = gen_label_rtx ();
32220 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
32221 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32222 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
32223 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
32224 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
32225 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
32226 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32227 JUMP_LABEL (tmp) = label;
32232 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
32233 using comparison code CODE. Operands are swapped for the comparison if
32234 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
32236 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
32237 bool swap_operands)
32239 enum machine_mode mode = GET_MODE (op0);
32240 rtx mask = gen_reg_rtx (mode);
32249 if (mode == DFmode)
32250 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
32251 gen_rtx_fmt_ee (code, mode, op0, op1)));
32253 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
32254 gen_rtx_fmt_ee (code, mode, op0, op1)));
32259 /* Generate and return a rtx of mode MODE for 2**n where n is the number
32260 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
32262 ix86_gen_TWO52 (enum machine_mode mode)
32264 REAL_VALUE_TYPE TWO52r;
32267 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
32268 TWO52 = const_double_from_real_value (TWO52r, mode);
32269 TWO52 = force_reg (mode, TWO52);
32274 /* Expand SSE sequence for computing lround from OP1 storing
32277 ix86_expand_lround (rtx op0, rtx op1)
32279 /* C code for the stuff we're doing below:
32280 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
32283 enum machine_mode mode = GET_MODE (op1);
32284 const struct real_format *fmt;
32285 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32288 /* load nextafter (0.5, 0.0) */
32289 fmt = REAL_MODE_FORMAT (mode);
32290 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32291 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32293 /* adj = copysign (0.5, op1) */
32294 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
32295 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
32297 /* adj = op1 + adj */
32298 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
32300 /* op0 = (imode)adj */
32301 expand_fix (op0, adj, 0);
32304 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
32307 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
32309 /* C code for the stuff we're doing below (for do_floor):
32311 xi -= (double)xi > op1 ? 1 : 0;
32314 enum machine_mode fmode = GET_MODE (op1);
32315 enum machine_mode imode = GET_MODE (op0);
32316 rtx ireg, freg, label, tmp;
32318 /* reg = (long)op1 */
32319 ireg = gen_reg_rtx (imode);
32320 expand_fix (ireg, op1, 0);
32322 /* freg = (double)reg */
32323 freg = gen_reg_rtx (fmode);
32324 expand_float (freg, ireg, 0);
32326 /* ireg = (freg > op1) ? ireg - 1 : ireg */
32327 label = ix86_expand_sse_compare_and_jump (UNLE,
32328 freg, op1, !do_floor);
32329 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
32330 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
32331 emit_move_insn (ireg, tmp);
32333 emit_label (label);
32334 LABEL_NUSES (label) = 1;
32336 emit_move_insn (op0, ireg);
32339 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
32340 result in OPERAND0. */
32342 ix86_expand_rint (rtx operand0, rtx operand1)
32344 /* C code for the stuff we're doing below:
32345 xa = fabs (operand1);
32346 if (!isless (xa, 2**52))
32348 xa = xa + 2**52 - 2**52;
32349 return copysign (xa, operand1);
32351 enum machine_mode mode = GET_MODE (operand0);
32352 rtx res, xa, label, TWO52, mask;
32354 res = gen_reg_rtx (mode);
32355 emit_move_insn (res, operand1);
32357 /* xa = abs (operand1) */
32358 xa = ix86_expand_sse_fabs (res, &mask);
32360 /* if (!isless (xa, TWO52)) goto label; */
32361 TWO52 = ix86_gen_TWO52 (mode);
32362 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32364 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32365 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32367 ix86_sse_copysign_to_positive (res, xa, res, mask);
32369 emit_label (label);
32370 LABEL_NUSES (label) = 1;
32372 emit_move_insn (operand0, res);
32375 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32378 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32380 /* C code for the stuff we expand below.
32381 double xa = fabs (x), x2;
32382 if (!isless (xa, TWO52))
32384 xa = xa + TWO52 - TWO52;
32385 x2 = copysign (xa, x);
32394 enum machine_mode mode = GET_MODE (operand0);
32395 rtx xa, TWO52, tmp, label, one, res, mask;
32397 TWO52 = ix86_gen_TWO52 (mode);
32399 /* Temporary for holding the result, initialized to the input
32400 operand to ease control flow. */
32401 res = gen_reg_rtx (mode);
32402 emit_move_insn (res, operand1);
32404 /* xa = abs (operand1) */
32405 xa = ix86_expand_sse_fabs (res, &mask);
32407 /* if (!isless (xa, TWO52)) goto label; */
32408 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32410 /* xa = xa + TWO52 - TWO52; */
32411 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32412 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32414 /* xa = copysign (xa, operand1) */
32415 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32417 /* generate 1.0 or -1.0 */
32418 one = force_reg (mode,
32419 const_double_from_real_value (do_floor
32420 ? dconst1 : dconstm1, mode));
32422 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32423 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32424 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32425 gen_rtx_AND (mode, one, tmp)));
32426 /* We always need to subtract here to preserve signed zero. */
32427 tmp = expand_simple_binop (mode, MINUS,
32428 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32429 emit_move_insn (res, tmp);
32431 emit_label (label);
32432 LABEL_NUSES (label) = 1;
32434 emit_move_insn (operand0, res);
32437 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32440 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32442 /* C code for the stuff we expand below.
32443 double xa = fabs (x), x2;
32444 if (!isless (xa, TWO52))
32446 x2 = (double)(long)x;
32453 if (HONOR_SIGNED_ZEROS (mode))
32454 return copysign (x2, x);
32457 enum machine_mode mode = GET_MODE (operand0);
32458 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32460 TWO52 = ix86_gen_TWO52 (mode);
32462 /* Temporary for holding the result, initialized to the input
32463 operand to ease control flow. */
32464 res = gen_reg_rtx (mode);
32465 emit_move_insn (res, operand1);
32467 /* xa = abs (operand1) */
32468 xa = ix86_expand_sse_fabs (res, &mask);
32470 /* if (!isless (xa, TWO52)) goto label; */
32471 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32473 /* xa = (double)(long)x */
32474 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32475 expand_fix (xi, res, 0);
32476 expand_float (xa, xi, 0);
32479 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32481 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32482 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32483 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32484 gen_rtx_AND (mode, one, tmp)));
32485 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32486 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32487 emit_move_insn (res, tmp);
32489 if (HONOR_SIGNED_ZEROS (mode))
32490 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32492 emit_label (label);
32493 LABEL_NUSES (label) = 1;
32495 emit_move_insn (operand0, res);
32498 /* Expand SSE sequence for computing round from OPERAND1 storing
32499 into OPERAND0. Sequence that works without relying on DImode truncation
32500 via cvttsd2siq that is only available on 64bit targets. */
32502 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32504 /* C code for the stuff we expand below.
32505 double xa = fabs (x), xa2, x2;
32506 if (!isless (xa, TWO52))
32508 Using the absolute value and copying back sign makes
32509 -0.0 -> -0.0 correct.
32510 xa2 = xa + TWO52 - TWO52;
32515 else if (dxa > 0.5)
32517 x2 = copysign (xa2, x);
32520 enum machine_mode mode = GET_MODE (operand0);
32521 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32523 TWO52 = ix86_gen_TWO52 (mode);
32525 /* Temporary for holding the result, initialized to the input
32526 operand to ease control flow. */
32527 res = gen_reg_rtx (mode);
32528 emit_move_insn (res, operand1);
32530 /* xa = abs (operand1) */
32531 xa = ix86_expand_sse_fabs (res, &mask);
32533 /* if (!isless (xa, TWO52)) goto label; */
32534 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32536 /* xa2 = xa + TWO52 - TWO52; */
32537 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32538 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32540 /* dxa = xa2 - xa; */
32541 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32543 /* generate 0.5, 1.0 and -0.5 */
32544 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32545 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32546 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32550 tmp = gen_reg_rtx (mode);
32551 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32552 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32553 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32554 gen_rtx_AND (mode, one, tmp)));
32555 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32556 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32557 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32558 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32559 gen_rtx_AND (mode, one, tmp)));
32560 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32562 /* res = copysign (xa2, operand1) */
32563 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32565 emit_label (label);
32566 LABEL_NUSES (label) = 1;
32568 emit_move_insn (operand0, res);
32571 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32574 ix86_expand_trunc (rtx operand0, rtx operand1)
32576 /* C code for SSE variant we expand below.
32577 double xa = fabs (x), x2;
32578 if (!isless (xa, TWO52))
32580 x2 = (double)(long)x;
32581 if (HONOR_SIGNED_ZEROS (mode))
32582 return copysign (x2, x);
32585 enum machine_mode mode = GET_MODE (operand0);
32586 rtx xa, xi, TWO52, label, res, mask;
32588 TWO52 = ix86_gen_TWO52 (mode);
32590 /* Temporary for holding the result, initialized to the input
32591 operand to ease control flow. */
32592 res = gen_reg_rtx (mode);
32593 emit_move_insn (res, operand1);
32595 /* xa = abs (operand1) */
32596 xa = ix86_expand_sse_fabs (res, &mask);
32598 /* if (!isless (xa, TWO52)) goto label; */
32599 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32601 /* x = (double)(long)x */
32602 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32603 expand_fix (xi, res, 0);
32604 expand_float (res, xi, 0);
32606 if (HONOR_SIGNED_ZEROS (mode))
32607 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32609 emit_label (label);
32610 LABEL_NUSES (label) = 1;
32612 emit_move_insn (operand0, res);
32615 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32618 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32620 enum machine_mode mode = GET_MODE (operand0);
32621 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32623 /* C code for SSE variant we expand below.
32624 double xa = fabs (x), x2;
32625 if (!isless (xa, TWO52))
32627 xa2 = xa + TWO52 - TWO52;
32631 x2 = copysign (xa2, x);
32635 TWO52 = ix86_gen_TWO52 (mode);
32637 /* Temporary for holding the result, initialized to the input
32638 operand to ease control flow. */
32639 res = gen_reg_rtx (mode);
32640 emit_move_insn (res, operand1);
32642 /* xa = abs (operand1) */
32643 xa = ix86_expand_sse_fabs (res, &smask);
32645 /* if (!isless (xa, TWO52)) goto label; */
32646 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32648 /* res = xa + TWO52 - TWO52; */
32649 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32650 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32651 emit_move_insn (res, tmp);
32654 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32656 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32657 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32658 emit_insn (gen_rtx_SET (VOIDmode, mask,
32659 gen_rtx_AND (mode, mask, one)));
32660 tmp = expand_simple_binop (mode, MINUS,
32661 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32662 emit_move_insn (res, tmp);
32664 /* res = copysign (res, operand1) */
32665 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32667 emit_label (label);
32668 LABEL_NUSES (label) = 1;
32670 emit_move_insn (operand0, res);
32673 /* Expand SSE sequence for computing round from OPERAND1 storing
32676 ix86_expand_round (rtx operand0, rtx operand1)
32678 /* C code for the stuff we're doing below:
32679 double xa = fabs (x);
32680 if (!isless (xa, TWO52))
32682 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32683 return copysign (xa, x);
32685 enum machine_mode mode = GET_MODE (operand0);
32686 rtx res, TWO52, xa, label, xi, half, mask;
32687 const struct real_format *fmt;
32688 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32690 /* Temporary for holding the result, initialized to the input
32691 operand to ease control flow. */
32692 res = gen_reg_rtx (mode);
32693 emit_move_insn (res, operand1);
32695 TWO52 = ix86_gen_TWO52 (mode);
32696 xa = ix86_expand_sse_fabs (res, &mask);
32697 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32699 /* load nextafter (0.5, 0.0) */
32700 fmt = REAL_MODE_FORMAT (mode);
32701 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32702 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32704 /* xa = xa + 0.5 */
32705 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32706 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32708 /* xa = (double)(int64_t)xa */
32709 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32710 expand_fix (xi, xa, 0);
32711 expand_float (xa, xi, 0);
32713 /* res = copysign (xa, operand1) */
32714 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32716 emit_label (label);
32717 LABEL_NUSES (label) = 1;
32719 emit_move_insn (operand0, res);
32723 /* Table of valid machine attributes. */
32724 static const struct attribute_spec ix86_attribute_table[] =
32726 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
32727 /* Stdcall attribute says callee is responsible for popping arguments
32728 if they are not variable. */
32729 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32730 /* Fastcall attribute says callee is responsible for popping arguments
32731 if they are not variable. */
32732 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32733 /* Thiscall attribute says callee is responsible for popping arguments
32734 if they are not variable. */
32735 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32736 /* Cdecl attribute says the callee is a normal C declaration */
32737 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32738 /* Regparm attribute specifies how many integer arguments are to be
32739 passed in registers. */
32740 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
32741 /* Sseregparm attribute says we are using x86_64 calling conventions
32742 for FP arguments. */
32743 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32744 /* force_align_arg_pointer says this function realigns the stack at entry. */
32745 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32746 false, true, true, ix86_handle_cconv_attribute },
32747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32748 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
32749 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
32750 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
32752 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32753 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32754 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32755 SUBTARGET_ATTRIBUTE_TABLE,
32757 /* ms_abi and sysv_abi calling convention function attributes. */
32758 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32759 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32760 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
32761 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32762 ix86_handle_callee_pop_aggregate_return },
32764 { NULL, 0, 0, false, false, false, NULL }
32767 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32769 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32770 tree vectype ATTRIBUTE_UNUSED,
32771 int misalign ATTRIBUTE_UNUSED)
32773 switch (type_of_cost)
32776 return ix86_cost->scalar_stmt_cost;
32779 return ix86_cost->scalar_load_cost;
32782 return ix86_cost->scalar_store_cost;
32785 return ix86_cost->vec_stmt_cost;
32788 return ix86_cost->vec_align_load_cost;
32791 return ix86_cost->vec_store_cost;
32793 case vec_to_scalar:
32794 return ix86_cost->vec_to_scalar_cost;
32796 case scalar_to_vec:
32797 return ix86_cost->scalar_to_vec_cost;
32799 case unaligned_load:
32800 case unaligned_store:
32801 return ix86_cost->vec_unalign_load_cost;
32803 case cond_branch_taken:
32804 return ix86_cost->cond_taken_branch_cost;
32806 case cond_branch_not_taken:
32807 return ix86_cost->cond_not_taken_branch_cost;
32813 gcc_unreachable ();
32818 /* Implement targetm.vectorize.builtin_vec_perm. */
32821 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32823 tree itype = TREE_TYPE (vec_type);
32824 bool u = TYPE_UNSIGNED (itype);
32825 enum machine_mode vmode = TYPE_MODE (vec_type);
32826 enum ix86_builtins fcode;
32827 bool ok = TARGET_SSE2;
32833 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32836 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32838 itype = ix86_get_builtin_type (IX86_BT_DI);
32843 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32847 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32849 itype = ix86_get_builtin_type (IX86_BT_SI);
32853 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32856 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32859 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32862 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32872 *mask_type = itype;
32873 return ix86_builtins[(int) fcode];
32876 /* Return a vector mode with twice as many elements as VMODE. */
32877 /* ??? Consider moving this to a table generated by genmodes.c. */
32879 static enum machine_mode
32880 doublesize_vector_mode (enum machine_mode vmode)
32884 case V2SFmode: return V4SFmode;
32885 case V1DImode: return V2DImode;
32886 case V2SImode: return V4SImode;
32887 case V4HImode: return V8HImode;
32888 case V8QImode: return V16QImode;
32890 case V2DFmode: return V4DFmode;
32891 case V4SFmode: return V8SFmode;
32892 case V2DImode: return V4DImode;
32893 case V4SImode: return V8SImode;
32894 case V8HImode: return V16HImode;
32895 case V16QImode: return V32QImode;
32897 case V4DFmode: return V8DFmode;
32898 case V8SFmode: return V16SFmode;
32899 case V4DImode: return V8DImode;
32900 case V8SImode: return V16SImode;
32901 case V16HImode: return V32HImode;
32902 case V32QImode: return V64QImode;
32905 gcc_unreachable ();
32909 /* Construct (set target (vec_select op0 (parallel perm))) and
32910 return true if that's a valid instruction in the active ISA. */
32913 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32915 rtx rperm[MAX_VECT_LEN], x;
32918 for (i = 0; i < nelt; ++i)
32919 rperm[i] = GEN_INT (perm[i]);
32921 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32922 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32923 x = gen_rtx_SET (VOIDmode, target, x);
32926 if (recog_memoized (x) < 0)
32934 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32937 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32938 const unsigned char *perm, unsigned nelt)
32940 enum machine_mode v2mode;
32943 v2mode = doublesize_vector_mode (GET_MODE (op0));
32944 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32945 return expand_vselect (target, x, perm, nelt);
32948 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32949 in terms of blendp[sd] / pblendw / pblendvb. */
32952 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32954 enum machine_mode vmode = d->vmode;
32955 unsigned i, mask, nelt = d->nelt;
32956 rtx target, op0, op1, x;
32958 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32960 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32963 /* This is a blend, not a permute. Elements must stay in their
32964 respective lanes. */
32965 for (i = 0; i < nelt; ++i)
32967 unsigned e = d->perm[i];
32968 if (!(e == i || e == i + nelt))
32975 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32976 decision should be extracted elsewhere, so that we only try that
32977 sequence once all budget==3 options have been tried. */
32979 /* For bytes, see if bytes move in pairs so we can use pblendw with
32980 an immediate argument, rather than pblendvb with a vector argument. */
32981 if (vmode == V16QImode)
32983 bool pblendw_ok = true;
32984 for (i = 0; i < 16 && pblendw_ok; i += 2)
32985 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32989 rtx rperm[16], vperm;
32991 for (i = 0; i < nelt; ++i)
32992 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32994 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32995 vperm = force_reg (V16QImode, vperm);
32997 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
33002 target = d->target;
33014 for (i = 0; i < nelt; ++i)
33015 mask |= (d->perm[i] >= nelt) << i;
33019 for (i = 0; i < 2; ++i)
33020 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
33024 for (i = 0; i < 4; ++i)
33025 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
33029 for (i = 0; i < 8; ++i)
33030 mask |= (d->perm[i * 2] >= 16) << i;
33034 target = gen_lowpart (vmode, target);
33035 op0 = gen_lowpart (vmode, op0);
33036 op1 = gen_lowpart (vmode, op1);
33040 gcc_unreachable ();
33043 /* This matches five different patterns with the different modes. */
33044 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
33045 x = gen_rtx_SET (VOIDmode, target, x);
33051 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33052 in terms of the variable form of vpermilps.
33054 Note that we will have already failed the immediate input vpermilps,
33055 which requires that the high and low part shuffle be identical; the
33056 variable form doesn't require that. */
33059 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
33061 rtx rperm[8], vperm;
33064 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
33067 /* We can only permute within the 128-bit lane. */
33068 for (i = 0; i < 8; ++i)
33070 unsigned e = d->perm[i];
33071 if (i < 4 ? e >= 4 : e < 4)
33078 for (i = 0; i < 8; ++i)
33080 unsigned e = d->perm[i];
33082 /* Within each 128-bit lane, the elements of op0 are numbered
33083 from 0 and the elements of op1 are numbered from 4. */
33089 rperm[i] = GEN_INT (e);
33092 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
33093 vperm = force_reg (V8SImode, vperm);
33094 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
33099 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33100 in terms of pshufb or vpperm. */
33103 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
33105 unsigned i, nelt, eltsz;
33106 rtx rperm[16], vperm, target, op0, op1;
33108 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
33110 if (GET_MODE_SIZE (d->vmode) != 16)
33117 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33119 for (i = 0; i < nelt; ++i)
33121 unsigned j, e = d->perm[i];
33122 for (j = 0; j < eltsz; ++j)
33123 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
33126 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
33127 vperm = force_reg (V16QImode, vperm);
33129 target = gen_lowpart (V16QImode, d->target);
33130 op0 = gen_lowpart (V16QImode, d->op0);
33131 if (d->op0 == d->op1)
33132 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
33135 op1 = gen_lowpart (V16QImode, d->op1);
33136 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
33142 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
33143 in a single instruction. */
33146 expand_vec_perm_1 (struct expand_vec_perm_d *d)
33148 unsigned i, nelt = d->nelt;
33149 unsigned char perm2[MAX_VECT_LEN];
33151 /* Check plain VEC_SELECT first, because AVX has instructions that could
33152 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
33153 input where SEL+CONCAT may not. */
33154 if (d->op0 == d->op1)
33156 int mask = nelt - 1;
33158 for (i = 0; i < nelt; i++)
33159 perm2[i] = d->perm[i] & mask;
33161 if (expand_vselect (d->target, d->op0, perm2, nelt))
33164 /* There are plenty of patterns in sse.md that are written for
33165 SEL+CONCAT and are not replicated for a single op. Perhaps
33166 that should be changed, to avoid the nastiness here. */
33168 /* Recognize interleave style patterns, which means incrementing
33169 every other permutation operand. */
33170 for (i = 0; i < nelt; i += 2)
33172 perm2[i] = d->perm[i] & mask;
33173 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
33175 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33178 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
33181 for (i = 0; i < nelt; i += 4)
33183 perm2[i + 0] = d->perm[i + 0] & mask;
33184 perm2[i + 1] = d->perm[i + 1] & mask;
33185 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
33186 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
33189 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
33194 /* Finally, try the fully general two operand permute. */
33195 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
33198 /* Recognize interleave style patterns with reversed operands. */
33199 if (d->op0 != d->op1)
33201 for (i = 0; i < nelt; ++i)
33203 unsigned e = d->perm[i];
33211 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
33215 /* Try the SSE4.1 blend variable merge instructions. */
33216 if (expand_vec_perm_blend (d))
33219 /* Try one of the AVX vpermil variable permutations. */
33220 if (expand_vec_perm_vpermil (d))
33223 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
33224 if (expand_vec_perm_pshufb (d))
33230 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
33231 in terms of a pair of pshuflw + pshufhw instructions. */
33234 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
33236 unsigned char perm2[MAX_VECT_LEN];
33240 if (d->vmode != V8HImode || d->op0 != d->op1)
33243 /* The two permutations only operate in 64-bit lanes. */
33244 for (i = 0; i < 4; ++i)
33245 if (d->perm[i] >= 4)
33247 for (i = 4; i < 8; ++i)
33248 if (d->perm[i] < 4)
33254 /* Emit the pshuflw. */
33255 memcpy (perm2, d->perm, 4);
33256 for (i = 4; i < 8; ++i)
33258 ok = expand_vselect (d->target, d->op0, perm2, 8);
33261 /* Emit the pshufhw. */
33262 memcpy (perm2 + 4, d->perm + 4, 4);
33263 for (i = 0; i < 4; ++i)
33265 ok = expand_vselect (d->target, d->target, perm2, 8);
33271 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33272 the permutation using the SSSE3 palignr instruction. This succeeds
33273 when all of the elements in PERM fit within one vector and we merely
33274 need to shift them down so that a single vector permutation has a
33275 chance to succeed. */
33278 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
33280 unsigned i, nelt = d->nelt;
33285 /* Even with AVX, palignr only operates on 128-bit vectors. */
33286 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33289 min = nelt, max = 0;
33290 for (i = 0; i < nelt; ++i)
33292 unsigned e = d->perm[i];
33298 if (min == 0 || max - min >= nelt)
33301 /* Given that we have SSSE3, we know we'll be able to implement the
33302 single operand permutation after the palignr with pshufb. */
33306 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
33307 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
33308 gen_lowpart (TImode, d->op1),
33309 gen_lowpart (TImode, d->op0), shift));
33311 d->op0 = d->op1 = d->target;
33314 for (i = 0; i < nelt; ++i)
33316 unsigned e = d->perm[i] - min;
33322 /* Test for the degenerate case where the alignment by itself
33323 produces the desired permutation. */
33327 ok = expand_vec_perm_1 (d);
33333 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
33334 a two vector permutation into a single vector permutation by using
33335 an interleave operation to merge the vectors. */
33338 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
33340 struct expand_vec_perm_d dremap, dfinal;
33341 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
33342 unsigned contents, h1, h2, h3, h4;
33343 unsigned char remap[2 * MAX_VECT_LEN];
33347 if (d->op0 == d->op1)
33350 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33351 lanes. We can use similar techniques with the vperm2f128 instruction,
33352 but it requires slightly different logic. */
33353 if (GET_MODE_SIZE (d->vmode) != 16)
33356 /* Examine from whence the elements come. */
33358 for (i = 0; i < nelt; ++i)
33359 contents |= 1u << d->perm[i];
33361 /* Split the two input vectors into 4 halves. */
33362 h1 = (1u << nelt2) - 1;
33367 memset (remap, 0xff, sizeof (remap));
33370 /* If the elements from the low halves use interleave low, and similarly
33371 for interleave high. If the elements are from mis-matched halves, we
33372 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33373 if ((contents & (h1 | h3)) == contents)
33375 for (i = 0; i < nelt2; ++i)
33378 remap[i + nelt] = i * 2 + 1;
33379 dremap.perm[i * 2] = i;
33380 dremap.perm[i * 2 + 1] = i + nelt;
33383 else if ((contents & (h2 | h4)) == contents)
33385 for (i = 0; i < nelt2; ++i)
33387 remap[i + nelt2] = i * 2;
33388 remap[i + nelt + nelt2] = i * 2 + 1;
33389 dremap.perm[i * 2] = i + nelt2;
33390 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33393 else if ((contents & (h1 | h4)) == contents)
33395 for (i = 0; i < nelt2; ++i)
33398 remap[i + nelt + nelt2] = i + nelt2;
33399 dremap.perm[i] = i;
33400 dremap.perm[i + nelt2] = i + nelt + nelt2;
33404 dremap.vmode = V2DImode;
33406 dremap.perm[0] = 0;
33407 dremap.perm[1] = 3;
33410 else if ((contents & (h2 | h3)) == contents)
33412 for (i = 0; i < nelt2; ++i)
33414 remap[i + nelt2] = i;
33415 remap[i + nelt] = i + nelt2;
33416 dremap.perm[i] = i + nelt2;
33417 dremap.perm[i + nelt2] = i + nelt;
33421 dremap.vmode = V2DImode;
33423 dremap.perm[0] = 1;
33424 dremap.perm[1] = 2;
33430 /* Use the remapping array set up above to move the elements from their
33431 swizzled locations into their final destinations. */
33433 for (i = 0; i < nelt; ++i)
33435 unsigned e = remap[d->perm[i]];
33436 gcc_assert (e < nelt);
33437 dfinal.perm[i] = e;
33439 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33440 dfinal.op1 = dfinal.op0;
33441 dremap.target = dfinal.op0;
33443 /* Test if the final remap can be done with a single insn. For V4SFmode or
33444 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33446 ok = expand_vec_perm_1 (&dfinal);
33447 seq = get_insns ();
33453 if (dremap.vmode != dfinal.vmode)
33455 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33456 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33457 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33460 ok = expand_vec_perm_1 (&dremap);
33467 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33468 permutation with two pshufb insns and an ior. We should have already
33469 failed all two instruction sequences. */
33472 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33474 rtx rperm[2][16], vperm, l, h, op, m128;
33475 unsigned int i, nelt, eltsz;
33477 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33479 gcc_assert (d->op0 != d->op1);
33482 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33484 /* Generate two permutation masks. If the required element is within
33485 the given vector it is shuffled into the proper lane. If the required
33486 element is in the other vector, force a zero into the lane by setting
33487 bit 7 in the permutation mask. */
33488 m128 = GEN_INT (-128);
33489 for (i = 0; i < nelt; ++i)
33491 unsigned j, e = d->perm[i];
33492 unsigned which = (e >= nelt);
33496 for (j = 0; j < eltsz; ++j)
33498 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33499 rperm[1-which][i*eltsz + j] = m128;
33503 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33504 vperm = force_reg (V16QImode, vperm);
33506 l = gen_reg_rtx (V16QImode);
33507 op = gen_lowpart (V16QImode, d->op0);
33508 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33510 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33511 vperm = force_reg (V16QImode, vperm);
33513 h = gen_reg_rtx (V16QImode);
33514 op = gen_lowpart (V16QImode, d->op1);
33515 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33517 op = gen_lowpart (V16QImode, d->target);
33518 emit_insn (gen_iorv16qi3 (op, l, h));
33523 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33524 and extract-odd permutations. */
33527 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33534 t1 = gen_reg_rtx (V4DFmode);
33535 t2 = gen_reg_rtx (V4DFmode);
33537 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33538 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33539 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33541 /* Now an unpck[lh]pd will produce the result required. */
33543 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33545 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33551 int mask = odd ? 0xdd : 0x88;
33553 t1 = gen_reg_rtx (V8SFmode);
33554 t2 = gen_reg_rtx (V8SFmode);
33555 t3 = gen_reg_rtx (V8SFmode);
33557 /* Shuffle within the 128-bit lanes to produce:
33558 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33559 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33562 /* Shuffle the lanes around to produce:
33563 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33564 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33567 /* Shuffle within the 128-bit lanes to produce:
33568 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33569 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33571 /* Shuffle within the 128-bit lanes to produce:
33572 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33573 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33575 /* Shuffle the lanes around to produce:
33576 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33577 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33586 /* These are always directly implementable by expand_vec_perm_1. */
33587 gcc_unreachable ();
33591 return expand_vec_perm_pshufb2 (d);
33594 /* We need 2*log2(N)-1 operations to achieve odd/even
33595 with interleave. */
33596 t1 = gen_reg_rtx (V8HImode);
33597 t2 = gen_reg_rtx (V8HImode);
33598 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33599 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33600 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33601 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33603 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33605 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33612 return expand_vec_perm_pshufb2 (d);
33615 t1 = gen_reg_rtx (V16QImode);
33616 t2 = gen_reg_rtx (V16QImode);
33617 t3 = gen_reg_rtx (V16QImode);
33618 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33619 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33620 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33621 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33622 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33623 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33625 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33627 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33633 gcc_unreachable ();
33639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33640 extract-even and extract-odd permutations. */
33643 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33645 unsigned i, odd, nelt = d->nelt;
33648 if (odd != 0 && odd != 1)
33651 for (i = 1; i < nelt; ++i)
33652 if (d->perm[i] != 2 * i + odd)
33655 return expand_vec_perm_even_odd_1 (d, odd);
33658 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33659 permutations. We assume that expand_vec_perm_1 has already failed. */
33662 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33664 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33665 enum machine_mode vmode = d->vmode;
33666 unsigned char perm2[4];
33674 /* These are special-cased in sse.md so that we can optionally
33675 use the vbroadcast instruction. They expand to two insns
33676 if the input happens to be in a register. */
33677 gcc_unreachable ();
33683 /* These are always implementable using standard shuffle patterns. */
33684 gcc_unreachable ();
33688 /* These can be implemented via interleave. We save one insn by
33689 stopping once we have promoted to V4SImode and then use pshufd. */
33692 optab otab = vec_interleave_low_optab;
33696 otab = vec_interleave_high_optab;
33701 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33702 vmode = get_mode_wider_vector (vmode);
33703 op0 = gen_lowpart (vmode, op0);
33705 while (vmode != V4SImode);
33707 memset (perm2, elt, 4);
33708 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33713 gcc_unreachable ();
33717 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33718 broadcast permutations. */
33721 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33723 unsigned i, elt, nelt = d->nelt;
33725 if (d->op0 != d->op1)
33729 for (i = 1; i < nelt; ++i)
33730 if (d->perm[i] != elt)
33733 return expand_vec_perm_broadcast_1 (d);
33736 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33737 With all of the interface bits taken care of, perform the expansion
33738 in D and return true on success. */
33741 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33743 /* Try a single instruction expansion. */
33744 if (expand_vec_perm_1 (d))
33747 /* Try sequences of two instructions. */
33749 if (expand_vec_perm_pshuflw_pshufhw (d))
33752 if (expand_vec_perm_palignr (d))
33755 if (expand_vec_perm_interleave2 (d))
33758 if (expand_vec_perm_broadcast (d))
33761 /* Try sequences of three instructions. */
33763 if (expand_vec_perm_pshufb2 (d))
33766 /* ??? Look for narrow permutations whose element orderings would
33767 allow the promotion to a wider mode. */
33769 /* ??? Look for sequences of interleave or a wider permute that place
33770 the data into the correct lanes for a half-vector shuffle like
33771 pshuf[lh]w or vpermilps. */
33773 /* ??? Look for sequences of interleave that produce the desired results.
33774 The combinatorics of punpck[lh] get pretty ugly... */
33776 if (expand_vec_perm_even_odd (d))
33782 /* Extract the values from the vector CST into the permutation array in D.
33783 Return 0 on error, 1 if all values from the permutation come from the
33784 first vector, 2 if all values from the second vector, and 3 otherwise. */
33787 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33789 tree list = TREE_VECTOR_CST_ELTS (cst);
33790 unsigned i, nelt = d->nelt;
33793 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33795 unsigned HOST_WIDE_INT e;
33797 if (!host_integerp (TREE_VALUE (list), 1))
33799 e = tree_low_cst (TREE_VALUE (list), 1);
33803 ret |= (e < nelt ? 1 : 2);
33806 gcc_assert (list == NULL);
33808 /* For all elements from second vector, fold the elements to first. */
33810 for (i = 0; i < nelt; ++i)
33811 d->perm[i] -= nelt;
33817 ix86_expand_vec_perm_builtin (tree exp)
33819 struct expand_vec_perm_d d;
33820 tree arg0, arg1, arg2;
33822 arg0 = CALL_EXPR_ARG (exp, 0);
33823 arg1 = CALL_EXPR_ARG (exp, 1);
33824 arg2 = CALL_EXPR_ARG (exp, 2);
33826 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33827 d.nelt = GET_MODE_NUNITS (d.vmode);
33828 d.testing_p = false;
33829 gcc_assert (VECTOR_MODE_P (d.vmode));
33831 if (TREE_CODE (arg2) != VECTOR_CST)
33833 error_at (EXPR_LOCATION (exp),
33834 "vector permutation requires vector constant");
33838 switch (extract_vec_perm_cst (&d, arg2))
33844 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33848 if (!operand_equal_p (arg0, arg1, 0))
33850 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33851 d.op0 = force_reg (d.vmode, d.op0);
33852 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33853 d.op1 = force_reg (d.vmode, d.op1);
33857 /* The elements of PERM do not suggest that only the first operand
33858 is used, but both operands are identical. Allow easier matching
33859 of the permutation by folding the permutation into the single
33862 unsigned i, nelt = d.nelt;
33863 for (i = 0; i < nelt; ++i)
33864 if (d.perm[i] >= nelt)
33870 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33871 d.op0 = force_reg (d.vmode, d.op0);
33876 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33877 d.op0 = force_reg (d.vmode, d.op0);
33882 d.target = gen_reg_rtx (d.vmode);
33883 if (ix86_expand_vec_perm_builtin_1 (&d))
33886 /* For compiler generated permutations, we should never got here, because
33887 the compiler should also be checking the ok hook. But since this is a
33888 builtin the user has access too, so don't abort. */
33892 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33895 sorry ("vector permutation (%d %d %d %d)",
33896 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33899 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33900 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33901 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33904 sorry ("vector permutation "
33905 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33906 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33907 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33908 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33909 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33912 gcc_unreachable ();
33915 return CONST0_RTX (d.vmode);
33918 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33921 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33923 struct expand_vec_perm_d d;
33927 d.vmode = TYPE_MODE (vec_type);
33928 d.nelt = GET_MODE_NUNITS (d.vmode);
33929 d.testing_p = true;
33931 /* Given sufficient ISA support we can just return true here
33932 for selected vector modes. */
33933 if (GET_MODE_SIZE (d.vmode) == 16)
33935 /* All implementable with a single vpperm insn. */
33938 /* All implementable with 2 pshufb + 1 ior. */
33941 /* All implementable with shufpd or unpck[lh]pd. */
33946 vec_mask = extract_vec_perm_cst (&d, mask);
33948 /* This hook is cannot be called in response to something that the
33949 user does (unlike the builtin expander) so we shouldn't ever see
33950 an error generated from the extract. */
33951 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33952 one_vec = (vec_mask != 3);
33954 /* Implementable with shufps or pshufd. */
33955 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33958 /* Otherwise we have to go through the motions and see if we can
33959 figure out how to generate the requested permutation. */
33960 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33961 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33963 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33966 ret = ix86_expand_vec_perm_builtin_1 (&d);
33973 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33975 struct expand_vec_perm_d d;
33981 d.vmode = GET_MODE (targ);
33982 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33983 d.testing_p = false;
33985 for (i = 0; i < nelt; ++i)
33986 d.perm[i] = i * 2 + odd;
33988 /* We'll either be able to implement the permutation directly... */
33989 if (expand_vec_perm_1 (&d))
33992 /* ... or we use the special-case patterns. */
33993 expand_vec_perm_even_odd_1 (&d, odd);
33996 /* This function returns the calling abi specific va_list type node.
33997 It returns the FNDECL specific va_list type. */
34000 ix86_fn_abi_va_list (tree fndecl)
34003 return va_list_type_node;
34004 gcc_assert (fndecl != NULL_TREE);
34006 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
34007 return ms_va_list_type_node;
34009 return sysv_va_list_type_node;
34012 /* Returns the canonical va_list type specified by TYPE. If there
34013 is no valid TYPE provided, it return NULL_TREE. */
34016 ix86_canonical_va_list_type (tree type)
34020 /* Resolve references and pointers to va_list type. */
34021 if (TREE_CODE (type) == MEM_REF)
34022 type = TREE_TYPE (type);
34023 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
34024 type = TREE_TYPE (type);
34025 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
34026 type = TREE_TYPE (type);
34028 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
34030 wtype = va_list_type_node;
34031 gcc_assert (wtype != NULL_TREE);
34033 if (TREE_CODE (wtype) == ARRAY_TYPE)
34035 /* If va_list is an array type, the argument may have decayed
34036 to a pointer type, e.g. by being passed to another function.
34037 In that case, unwrap both types so that we can compare the
34038 underlying records. */
34039 if (TREE_CODE (htype) == ARRAY_TYPE
34040 || POINTER_TYPE_P (htype))
34042 wtype = TREE_TYPE (wtype);
34043 htype = TREE_TYPE (htype);
34046 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34047 return va_list_type_node;
34048 wtype = sysv_va_list_type_node;
34049 gcc_assert (wtype != NULL_TREE);
34051 if (TREE_CODE (wtype) == ARRAY_TYPE)
34053 /* If va_list is an array type, the argument may have decayed
34054 to a pointer type, e.g. by being passed to another function.
34055 In that case, unwrap both types so that we can compare the
34056 underlying records. */
34057 if (TREE_CODE (htype) == ARRAY_TYPE
34058 || POINTER_TYPE_P (htype))
34060 wtype = TREE_TYPE (wtype);
34061 htype = TREE_TYPE (htype);
34064 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34065 return sysv_va_list_type_node;
34066 wtype = ms_va_list_type_node;
34067 gcc_assert (wtype != NULL_TREE);
34069 if (TREE_CODE (wtype) == ARRAY_TYPE)
34071 /* If va_list is an array type, the argument may have decayed
34072 to a pointer type, e.g. by being passed to another function.
34073 In that case, unwrap both types so that we can compare the
34074 underlying records. */
34075 if (TREE_CODE (htype) == ARRAY_TYPE
34076 || POINTER_TYPE_P (htype))
34078 wtype = TREE_TYPE (wtype);
34079 htype = TREE_TYPE (htype);
34082 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
34083 return ms_va_list_type_node;
34086 return std_canonical_va_list_type (type);
34089 /* Iterate through the target-specific builtin types for va_list.
34090 IDX denotes the iterator, *PTREE is set to the result type of
34091 the va_list builtin, and *PNAME to its internal type.
34092 Returns zero if there is no element for this index, otherwise
34093 IDX should be increased upon the next call.
34094 Note, do not iterate a base builtin's name like __builtin_va_list.
34095 Used from c_common_nodes_and_builtins. */
34098 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
34108 *ptree = ms_va_list_type_node;
34109 *pname = "__builtin_ms_va_list";
34113 *ptree = sysv_va_list_type_node;
34114 *pname = "__builtin_sysv_va_list";
34122 #undef TARGET_SCHED_DISPATCH
34123 #define TARGET_SCHED_DISPATCH has_dispatch
34124 #undef TARGET_SCHED_DISPATCH_DO
34125 #define TARGET_SCHED_DISPATCH_DO do_dispatch
34127 /* The size of the dispatch window is the total number of bytes of
34128 object code allowed in a window. */
34129 #define DISPATCH_WINDOW_SIZE 16
34131 /* Number of dispatch windows considered for scheduling. */
34132 #define MAX_DISPATCH_WINDOWS 3
34134 /* Maximum number of instructions in a window. */
34137 /* Maximum number of immediate operands in a window. */
34140 /* Maximum number of immediate bits allowed in a window. */
34141 #define MAX_IMM_SIZE 128
34143 /* Maximum number of 32 bit immediates allowed in a window. */
34144 #define MAX_IMM_32 4
34146 /* Maximum number of 64 bit immediates allowed in a window. */
34147 #define MAX_IMM_64 2
34149 /* Maximum total of loads or prefetches allowed in a window. */
34152 /* Maximum total of stores allowed in a window. */
34153 #define MAX_STORE 1
34159 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
34160 enum dispatch_group {
34175 /* Number of allowable groups in a dispatch window. It is an array
34176 indexed by dispatch_group enum. 100 is used as a big number,
34177 because the number of these kind of operations does not have any
34178 effect in dispatch window, but we need them for other reasons in
34180 static unsigned int num_allowable_groups[disp_last] = {
34181 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
34184 char group_name[disp_last + 1][16] = {
34185 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
34186 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
34187 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
34190 /* Instruction path. */
34193 path_single, /* Single micro op. */
34194 path_double, /* Double micro op. */
34195 path_multi, /* Instructions with more than 2 micro op.. */
34199 /* sched_insn_info defines a window to the instructions scheduled in
34200 the basic block. It contains a pointer to the insn_info table and
34201 the instruction scheduled.
34203 Windows are allocated for each basic block and are linked
34205 typedef struct sched_insn_info_s {
34207 enum dispatch_group group;
34208 enum insn_path path;
34213 /* Linked list of dispatch windows. This is a two way list of
34214 dispatch windows of a basic block. It contains information about
34215 the number of uops in the window and the total number of
34216 instructions and of bytes in the object code for this dispatch
34218 typedef struct dispatch_windows_s {
34219 int num_insn; /* Number of insn in the window. */
34220 int num_uops; /* Number of uops in the window. */
34221 int window_size; /* Number of bytes in the window. */
34222 int window_num; /* Window number between 0 or 1. */
34223 int num_imm; /* Number of immediates in an insn. */
34224 int num_imm_32; /* Number of 32 bit immediates in an insn. */
34225 int num_imm_64; /* Number of 64 bit immediates in an insn. */
34226 int imm_size; /* Total immediates in the window. */
34227 int num_loads; /* Total memory loads in the window. */
34228 int num_stores; /* Total memory stores in the window. */
34229 int violation; /* Violation exists in window. */
34230 sched_insn_info *window; /* Pointer to the window. */
34231 struct dispatch_windows_s *next;
34232 struct dispatch_windows_s *prev;
34233 } dispatch_windows;
34235 /* Immediate valuse used in an insn. */
34236 typedef struct imm_info_s
34243 static dispatch_windows *dispatch_window_list;
34244 static dispatch_windows *dispatch_window_list1;
34246 /* Get dispatch group of insn. */
34248 static enum dispatch_group
34249 get_mem_group (rtx insn)
34251 enum attr_memory memory;
34253 if (INSN_CODE (insn) < 0)
34254 return disp_no_group;
34255 memory = get_attr_memory (insn);
34256 if (memory == MEMORY_STORE)
34259 if (memory == MEMORY_LOAD)
34262 if (memory == MEMORY_BOTH)
34263 return disp_load_store;
34265 return disp_no_group;
34268 /* Return true if insn is a compare instruction. */
34273 enum attr_type type;
34275 type = get_attr_type (insn);
34276 return (type == TYPE_TEST
34277 || type == TYPE_ICMP
34278 || type == TYPE_FCMP
34279 || GET_CODE (PATTERN (insn)) == COMPARE);
34282 /* Return true if a dispatch violation encountered. */
34285 dispatch_violation (void)
34287 if (dispatch_window_list->next)
34288 return dispatch_window_list->next->violation;
34289 return dispatch_window_list->violation;
34292 /* Return true if insn is a branch instruction. */
34295 is_branch (rtx insn)
34297 return (CALL_P (insn) || JUMP_P (insn));
34300 /* Return true if insn is a prefetch instruction. */
34303 is_prefetch (rtx insn)
34305 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
34308 /* This function initializes a dispatch window and the list container holding a
34309 pointer to the window. */
34312 init_window (int window_num)
34315 dispatch_windows *new_list;
34317 if (window_num == 0)
34318 new_list = dispatch_window_list;
34320 new_list = dispatch_window_list1;
34322 new_list->num_insn = 0;
34323 new_list->num_uops = 0;
34324 new_list->window_size = 0;
34325 new_list->next = NULL;
34326 new_list->prev = NULL;
34327 new_list->window_num = window_num;
34328 new_list->num_imm = 0;
34329 new_list->num_imm_32 = 0;
34330 new_list->num_imm_64 = 0;
34331 new_list->imm_size = 0;
34332 new_list->num_loads = 0;
34333 new_list->num_stores = 0;
34334 new_list->violation = false;
34336 for (i = 0; i < MAX_INSN; i++)
34338 new_list->window[i].insn = NULL;
34339 new_list->window[i].group = disp_no_group;
34340 new_list->window[i].path = no_path;
34341 new_list->window[i].byte_len = 0;
34342 new_list->window[i].imm_bytes = 0;
34347 /* This function allocates and initializes a dispatch window and the
34348 list container holding a pointer to the window. */
34350 static dispatch_windows *
34351 allocate_window (void)
34353 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34354 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34359 /* This routine initializes the dispatch scheduling information. It
34360 initiates building dispatch scheduler tables and constructs the
34361 first dispatch window. */
34364 init_dispatch_sched (void)
34366 /* Allocate a dispatch list and a window. */
34367 dispatch_window_list = allocate_window ();
34368 dispatch_window_list1 = allocate_window ();
34373 /* This function returns true if a branch is detected. End of a basic block
34374 does not have to be a branch, but here we assume only branches end a
34378 is_end_basic_block (enum dispatch_group group)
34380 return group == disp_branch;
34383 /* This function is called when the end of a window processing is reached. */
34386 process_end_window (void)
34388 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34389 if (dispatch_window_list->next)
34391 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34392 gcc_assert (dispatch_window_list->window_size
34393 + dispatch_window_list1->window_size <= 48);
34399 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34400 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34401 for 48 bytes of instructions. Note that these windows are not dispatch
34402 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34404 static dispatch_windows *
34405 allocate_next_window (int window_num)
34407 if (window_num == 0)
34409 if (dispatch_window_list->next)
34412 return dispatch_window_list;
34415 dispatch_window_list->next = dispatch_window_list1;
34416 dispatch_window_list1->prev = dispatch_window_list;
34418 return dispatch_window_list1;
34421 /* Increment the number of immediate operands of an instruction. */
34424 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34429 switch ( GET_CODE (*in_rtx))
34434 (imm_values->imm)++;
34435 if (x86_64_immediate_operand (*in_rtx, SImode))
34436 (imm_values->imm32)++;
34438 (imm_values->imm64)++;
34442 (imm_values->imm)++;
34443 (imm_values->imm64)++;
34447 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34449 (imm_values->imm)++;
34450 (imm_values->imm32)++;
34461 /* Compute number of immediate operands of an instruction. */
34464 find_constant (rtx in_rtx, imm_info *imm_values)
34466 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34467 (rtx_function) find_constant_1, (void *) imm_values);
34470 /* Return total size of immediate operands of an instruction along with number
34471 of corresponding immediate-operands. It initializes its parameters to zero
34472 befor calling FIND_CONSTANT.
34473 INSN is the input instruction. IMM is the total of immediates.
34474 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34478 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34480 imm_info imm_values = {0, 0, 0};
34482 find_constant (insn, &imm_values);
34483 *imm = imm_values.imm;
34484 *imm32 = imm_values.imm32;
34485 *imm64 = imm_values.imm64;
34486 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34489 /* This function indicates if an operand of an instruction is an
34493 has_immediate (rtx insn)
34495 int num_imm_operand;
34496 int num_imm32_operand;
34497 int num_imm64_operand;
34500 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34501 &num_imm64_operand);
34505 /* Return single or double path for instructions. */
34507 static enum insn_path
34508 get_insn_path (rtx insn)
34510 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34512 if ((int)path == 0)
34513 return path_single;
34515 if ((int)path == 1)
34516 return path_double;
34521 /* Return insn dispatch group. */
34523 static enum dispatch_group
34524 get_insn_group (rtx insn)
34526 enum dispatch_group group = get_mem_group (insn);
34530 if (is_branch (insn))
34531 return disp_branch;
34536 if (has_immediate (insn))
34539 if (is_prefetch (insn))
34540 return disp_prefetch;
34542 return disp_no_group;
34545 /* Count number of GROUP restricted instructions in a dispatch
34546 window WINDOW_LIST. */
34549 count_num_restricted (rtx insn, dispatch_windows *window_list)
34551 enum dispatch_group group = get_insn_group (insn);
34553 int num_imm_operand;
34554 int num_imm32_operand;
34555 int num_imm64_operand;
34557 if (group == disp_no_group)
34560 if (group == disp_imm)
34562 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34563 &num_imm64_operand);
34564 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34565 || num_imm_operand + window_list->num_imm > MAX_IMM
34566 || (num_imm32_operand > 0
34567 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34568 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34569 || (num_imm64_operand > 0
34570 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34571 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34572 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34573 && num_imm64_operand > 0
34574 && ((window_list->num_imm_64 > 0
34575 && window_list->num_insn >= 2)
34576 || window_list->num_insn >= 3)))
34582 if ((group == disp_load_store
34583 && (window_list->num_loads >= MAX_LOAD
34584 || window_list->num_stores >= MAX_STORE))
34585 || ((group == disp_load
34586 || group == disp_prefetch)
34587 && window_list->num_loads >= MAX_LOAD)
34588 || (group == disp_store
34589 && window_list->num_stores >= MAX_STORE))
34595 /* This function returns true if insn satisfies dispatch rules on the
34596 last window scheduled. */
34599 fits_dispatch_window (rtx insn)
34601 dispatch_windows *window_list = dispatch_window_list;
34602 dispatch_windows *window_list_next = dispatch_window_list->next;
34603 unsigned int num_restrict;
34604 enum dispatch_group group = get_insn_group (insn);
34605 enum insn_path path = get_insn_path (insn);
34608 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34609 instructions should be given the lowest priority in the
34610 scheduling process in Haifa scheduler to make sure they will be
34611 scheduled in the same dispatch window as the refrence to them. */
34612 if (group == disp_jcc || group == disp_cmp)
34615 /* Check nonrestricted. */
34616 if (group == disp_no_group || group == disp_branch)
34619 /* Get last dispatch window. */
34620 if (window_list_next)
34621 window_list = window_list_next;
34623 if (window_list->window_num == 1)
34625 sum = window_list->prev->window_size + window_list->window_size;
34628 || (min_insn_size (insn) + sum) >= 48)
34629 /* Window 1 is full. Go for next window. */
34633 num_restrict = count_num_restricted (insn, window_list);
34635 if (num_restrict > num_allowable_groups[group])
34638 /* See if it fits in the first window. */
34639 if (window_list->window_num == 0)
34641 /* The first widow should have only single and double path
34643 if (path == path_double
34644 && (window_list->num_uops + 2) > MAX_INSN)
34646 else if (path != path_single)
34652 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34653 dispatch window WINDOW_LIST. */
34656 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34658 int byte_len = min_insn_size (insn);
34659 int num_insn = window_list->num_insn;
34661 sched_insn_info *window = window_list->window;
34662 enum dispatch_group group = get_insn_group (insn);
34663 enum insn_path path = get_insn_path (insn);
34664 int num_imm_operand;
34665 int num_imm32_operand;
34666 int num_imm64_operand;
34668 if (!window_list->violation && group != disp_cmp
34669 && !fits_dispatch_window (insn))
34670 window_list->violation = true;
34672 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34673 &num_imm64_operand);
34675 /* Initialize window with new instruction. */
34676 window[num_insn].insn = insn;
34677 window[num_insn].byte_len = byte_len;
34678 window[num_insn].group = group;
34679 window[num_insn].path = path;
34680 window[num_insn].imm_bytes = imm_size;
34682 window_list->window_size += byte_len;
34683 window_list->num_insn = num_insn + 1;
34684 window_list->num_uops = window_list->num_uops + num_uops;
34685 window_list->imm_size += imm_size;
34686 window_list->num_imm += num_imm_operand;
34687 window_list->num_imm_32 += num_imm32_operand;
34688 window_list->num_imm_64 += num_imm64_operand;
34690 if (group == disp_store)
34691 window_list->num_stores += 1;
34692 else if (group == disp_load
34693 || group == disp_prefetch)
34694 window_list->num_loads += 1;
34695 else if (group == disp_load_store)
34697 window_list->num_stores += 1;
34698 window_list->num_loads += 1;
34702 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34703 If the total bytes of instructions or the number of instructions in
34704 the window exceed allowable, it allocates a new window. */
34707 add_to_dispatch_window (rtx insn)
34710 dispatch_windows *window_list;
34711 dispatch_windows *next_list;
34712 dispatch_windows *window0_list;
34713 enum insn_path path;
34714 enum dispatch_group insn_group;
34722 if (INSN_CODE (insn) < 0)
34725 byte_len = min_insn_size (insn);
34726 window_list = dispatch_window_list;
34727 next_list = window_list->next;
34728 path = get_insn_path (insn);
34729 insn_group = get_insn_group (insn);
34731 /* Get the last dispatch window. */
34733 window_list = dispatch_window_list->next;
34735 if (path == path_single)
34737 else if (path == path_double)
34740 insn_num_uops = (int) path;
34742 /* If current window is full, get a new window.
34743 Window number zero is full, if MAX_INSN uops are scheduled in it.
34744 Window number one is full, if window zero's bytes plus window
34745 one's bytes is 32, or if the bytes of the new instruction added
34746 to the total makes it greater than 48, or it has already MAX_INSN
34747 instructions in it. */
34748 num_insn = window_list->num_insn;
34749 num_uops = window_list->num_uops;
34750 window_num = window_list->window_num;
34751 insn_fits = fits_dispatch_window (insn);
34753 if (num_insn >= MAX_INSN
34754 || num_uops + insn_num_uops > MAX_INSN
34757 window_num = ~window_num & 1;
34758 window_list = allocate_next_window (window_num);
34761 if (window_num == 0)
34763 add_insn_window (insn, window_list, insn_num_uops);
34764 if (window_list->num_insn >= MAX_INSN
34765 && insn_group == disp_branch)
34767 process_end_window ();
34771 else if (window_num == 1)
34773 window0_list = window_list->prev;
34774 sum = window0_list->window_size + window_list->window_size;
34776 || (byte_len + sum) >= 48)
34778 process_end_window ();
34779 window_list = dispatch_window_list;
34782 add_insn_window (insn, window_list, insn_num_uops);
34785 gcc_unreachable ();
34787 if (is_end_basic_block (insn_group))
34789 /* End of basic block is reached do end-basic-block process. */
34790 process_end_window ();
34795 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34797 DEBUG_FUNCTION static void
34798 debug_dispatch_window_file (FILE *file, int window_num)
34800 dispatch_windows *list;
34803 if (window_num == 0)
34804 list = dispatch_window_list;
34806 list = dispatch_window_list1;
34808 fprintf (file, "Window #%d:\n", list->window_num);
34809 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34810 list->num_insn, list->num_uops, list->window_size);
34811 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34812 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34814 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34816 fprintf (file, " insn info:\n");
34818 for (i = 0; i < MAX_INSN; i++)
34820 if (!list->window[i].insn)
34822 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34823 i, group_name[list->window[i].group],
34824 i, (void *)list->window[i].insn,
34825 i, list->window[i].path,
34826 i, list->window[i].byte_len,
34827 i, list->window[i].imm_bytes);
34831 /* Print to stdout a dispatch window. */
34833 DEBUG_FUNCTION void
34834 debug_dispatch_window (int window_num)
34836 debug_dispatch_window_file (stdout, window_num);
34839 /* Print INSN dispatch information to FILE. */
34841 DEBUG_FUNCTION static void
34842 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34845 enum insn_path path;
34846 enum dispatch_group group;
34848 int num_imm_operand;
34849 int num_imm32_operand;
34850 int num_imm64_operand;
34852 if (INSN_CODE (insn) < 0)
34855 byte_len = min_insn_size (insn);
34856 path = get_insn_path (insn);
34857 group = get_insn_group (insn);
34858 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34859 &num_imm64_operand);
34861 fprintf (file, " insn info:\n");
34862 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34863 group_name[group], path, byte_len);
34864 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34865 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34868 /* Print to STDERR the status of the ready list with respect to
34869 dispatch windows. */
34871 DEBUG_FUNCTION void
34872 debug_ready_dispatch (void)
34875 int no_ready = number_in_ready ();
34877 fprintf (stdout, "Number of ready: %d\n", no_ready);
34879 for (i = 0; i < no_ready; i++)
34880 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34883 /* This routine is the driver of the dispatch scheduler. */
34886 do_dispatch (rtx insn, int mode)
34888 if (mode == DISPATCH_INIT)
34889 init_dispatch_sched ();
34890 else if (mode == ADD_TO_DISPATCH_WINDOW)
34891 add_to_dispatch_window (insn);
34894 /* Return TRUE if Dispatch Scheduling is supported. */
34897 has_dispatch (rtx insn, int action)
34899 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34905 case IS_DISPATCH_ON:
34910 return is_cmp (insn);
34912 case DISPATCH_VIOLATION:
34913 return dispatch_violation ();
34915 case FITS_DISPATCH_WINDOW:
34916 return fits_dispatch_window (insn);
34922 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34923 place emms and femms instructions. */
34925 static enum machine_mode
34926 ix86_preferred_simd_mode (enum machine_mode mode)
34928 /* Disable double precision vectorizer if needed. */
34929 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34932 if (!TARGET_AVX && !TARGET_SSE)
34938 return (TARGET_AVX && !flag_prefer_avx128) ? V8SFmode : V4SFmode;
34940 return (TARGET_AVX && !flag_prefer_avx128) ? V4DFmode : V2DFmode;
34956 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34959 static unsigned int
34960 ix86_autovectorize_vector_sizes (void)
34962 return TARGET_AVX ? 32 | 16 : 0;
34965 /* Initialize the GCC target structure. */
34966 #undef TARGET_RETURN_IN_MEMORY
34967 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34969 #undef TARGET_LEGITIMIZE_ADDRESS
34970 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34972 #undef TARGET_ATTRIBUTE_TABLE
34973 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34974 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34975 # undef TARGET_MERGE_DECL_ATTRIBUTES
34976 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34979 #undef TARGET_COMP_TYPE_ATTRIBUTES
34980 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34982 #undef TARGET_INIT_BUILTINS
34983 #define TARGET_INIT_BUILTINS ix86_init_builtins
34984 #undef TARGET_BUILTIN_DECL
34985 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34986 #undef TARGET_EXPAND_BUILTIN
34987 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34989 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34990 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34991 ix86_builtin_vectorized_function
34993 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34994 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34996 #undef TARGET_BUILTIN_RECIPROCAL
34997 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34999 #undef TARGET_ASM_FUNCTION_EPILOGUE
35000 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
35002 #undef TARGET_ENCODE_SECTION_INFO
35003 #ifndef SUBTARGET_ENCODE_SECTION_INFO
35004 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
35006 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
35009 #undef TARGET_ASM_OPEN_PAREN
35010 #define TARGET_ASM_OPEN_PAREN ""
35011 #undef TARGET_ASM_CLOSE_PAREN
35012 #define TARGET_ASM_CLOSE_PAREN ""
35014 #undef TARGET_ASM_BYTE_OP
35015 #define TARGET_ASM_BYTE_OP ASM_BYTE
35017 #undef TARGET_ASM_ALIGNED_HI_OP
35018 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
35019 #undef TARGET_ASM_ALIGNED_SI_OP
35020 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
35022 #undef TARGET_ASM_ALIGNED_DI_OP
35023 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
35026 #undef TARGET_PROFILE_BEFORE_PROLOGUE
35027 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
35029 #undef TARGET_ASM_UNALIGNED_HI_OP
35030 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
35031 #undef TARGET_ASM_UNALIGNED_SI_OP
35032 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
35033 #undef TARGET_ASM_UNALIGNED_DI_OP
35034 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
35036 #undef TARGET_PRINT_OPERAND
35037 #define TARGET_PRINT_OPERAND ix86_print_operand
35038 #undef TARGET_PRINT_OPERAND_ADDRESS
35039 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
35040 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
35041 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
35042 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
35043 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
35045 #undef TARGET_SCHED_INIT_GLOBAL
35046 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
35047 #undef TARGET_SCHED_ADJUST_COST
35048 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
35049 #undef TARGET_SCHED_ISSUE_RATE
35050 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
35051 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
35052 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
35053 ia32_multipass_dfa_lookahead
35055 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
35056 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
35059 #undef TARGET_HAVE_TLS
35060 #define TARGET_HAVE_TLS true
35062 #undef TARGET_CANNOT_FORCE_CONST_MEM
35063 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
35064 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
35065 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
35067 #undef TARGET_DELEGITIMIZE_ADDRESS
35068 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
35070 #undef TARGET_MS_BITFIELD_LAYOUT_P
35071 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
35074 #undef TARGET_BINDS_LOCAL_P
35075 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
35077 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35078 #undef TARGET_BINDS_LOCAL_P
35079 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
35082 #undef TARGET_ASM_OUTPUT_MI_THUNK
35083 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
35084 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
35085 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
35087 #undef TARGET_ASM_FILE_START
35088 #define TARGET_ASM_FILE_START x86_file_start
35090 #undef TARGET_DEFAULT_TARGET_FLAGS
35091 #define TARGET_DEFAULT_TARGET_FLAGS \
35093 | TARGET_SUBTARGET_DEFAULT \
35094 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
35096 #undef TARGET_HANDLE_OPTION
35097 #define TARGET_HANDLE_OPTION ix86_handle_option
35099 #undef TARGET_OPTION_OVERRIDE
35100 #define TARGET_OPTION_OVERRIDE ix86_option_override
35101 #undef TARGET_OPTION_OPTIMIZATION_TABLE
35102 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
35103 #undef TARGET_OPTION_INIT_STRUCT
35104 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
35106 #undef TARGET_REGISTER_MOVE_COST
35107 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
35108 #undef TARGET_MEMORY_MOVE_COST
35109 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
35110 #undef TARGET_RTX_COSTS
35111 #define TARGET_RTX_COSTS ix86_rtx_costs
35112 #undef TARGET_ADDRESS_COST
35113 #define TARGET_ADDRESS_COST ix86_address_cost
35115 #undef TARGET_FIXED_CONDITION_CODE_REGS
35116 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
35117 #undef TARGET_CC_MODES_COMPATIBLE
35118 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
35120 #undef TARGET_MACHINE_DEPENDENT_REORG
35121 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
35123 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
35124 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
35126 #undef TARGET_BUILD_BUILTIN_VA_LIST
35127 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
35129 #undef TARGET_ENUM_VA_LIST_P
35130 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
35132 #undef TARGET_FN_ABI_VA_LIST
35133 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
35135 #undef TARGET_CANONICAL_VA_LIST_TYPE
35136 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
35138 #undef TARGET_EXPAND_BUILTIN_VA_START
35139 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
35141 #undef TARGET_MD_ASM_CLOBBERS
35142 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
35144 #undef TARGET_PROMOTE_PROTOTYPES
35145 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
35146 #undef TARGET_STRUCT_VALUE_RTX
35147 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
35148 #undef TARGET_SETUP_INCOMING_VARARGS
35149 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
35150 #undef TARGET_MUST_PASS_IN_STACK
35151 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
35152 #undef TARGET_FUNCTION_ARG_ADVANCE
35153 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
35154 #undef TARGET_FUNCTION_ARG
35155 #define TARGET_FUNCTION_ARG ix86_function_arg
35156 #undef TARGET_FUNCTION_ARG_BOUNDARY
35157 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
35158 #undef TARGET_PASS_BY_REFERENCE
35159 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
35160 #undef TARGET_INTERNAL_ARG_POINTER
35161 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
35162 #undef TARGET_UPDATE_STACK_BOUNDARY
35163 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
35164 #undef TARGET_GET_DRAP_RTX
35165 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
35166 #undef TARGET_STRICT_ARGUMENT_NAMING
35167 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
35168 #undef TARGET_STATIC_CHAIN
35169 #define TARGET_STATIC_CHAIN ix86_static_chain
35170 #undef TARGET_TRAMPOLINE_INIT
35171 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
35172 #undef TARGET_RETURN_POPS_ARGS
35173 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
35175 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
35176 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
35178 #undef TARGET_SCALAR_MODE_SUPPORTED_P
35179 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
35181 #undef TARGET_VECTOR_MODE_SUPPORTED_P
35182 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
35184 #undef TARGET_C_MODE_FOR_SUFFIX
35185 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
35188 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
35189 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
35192 #ifdef SUBTARGET_INSERT_ATTRIBUTES
35193 #undef TARGET_INSERT_ATTRIBUTES
35194 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
35197 #undef TARGET_MANGLE_TYPE
35198 #define TARGET_MANGLE_TYPE ix86_mangle_type
35200 #undef TARGET_STACK_PROTECT_FAIL
35201 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
35203 #undef TARGET_SUPPORTS_SPLIT_STACK
35204 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
35206 #undef TARGET_FUNCTION_VALUE
35207 #define TARGET_FUNCTION_VALUE ix86_function_value
35209 #undef TARGET_FUNCTION_VALUE_REGNO_P
35210 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
35212 #undef TARGET_SECONDARY_RELOAD
35213 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
35215 #undef TARGET_PREFERRED_RELOAD_CLASS
35216 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
35217 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
35218 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
35219 #undef TARGET_CLASS_LIKELY_SPILLED_P
35220 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
35222 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
35223 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
35224 ix86_builtin_vectorization_cost
35225 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
35226 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
35227 ix86_vectorize_builtin_vec_perm
35228 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
35229 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
35230 ix86_vectorize_builtin_vec_perm_ok
35231 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
35232 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
35233 ix86_preferred_simd_mode
35234 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
35235 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
35236 ix86_autovectorize_vector_sizes
35238 #undef TARGET_SET_CURRENT_FUNCTION
35239 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
35241 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
35242 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
35244 #undef TARGET_OPTION_SAVE
35245 #define TARGET_OPTION_SAVE ix86_function_specific_save
35247 #undef TARGET_OPTION_RESTORE
35248 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
35250 #undef TARGET_OPTION_PRINT
35251 #define TARGET_OPTION_PRINT ix86_function_specific_print
35253 #undef TARGET_CAN_INLINE_P
35254 #define TARGET_CAN_INLINE_P ix86_can_inline_p
35256 #undef TARGET_EXPAND_TO_RTL_HOOK
35257 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
35259 #undef TARGET_LEGITIMATE_ADDRESS_P
35260 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
35262 #undef TARGET_IRA_COVER_CLASSES
35263 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
35265 #undef TARGET_FRAME_POINTER_REQUIRED
35266 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
35268 #undef TARGET_CAN_ELIMINATE
35269 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
35271 #undef TARGET_EXTRA_LIVE_ON_ENTRY
35272 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
35274 #undef TARGET_ASM_CODE_END
35275 #define TARGET_ASM_CODE_END ix86_code_end
35277 #undef TARGET_CONDITIONAL_REGISTER_USAGE
35278 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
35281 #undef TARGET_INIT_LIBFUNCS
35282 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
35285 struct gcc_target targetm = TARGET_INITIALIZER;
35287 #include "gt-i386.h"