1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 enum upper_128bits_state
62 unknown = 0, /* Unknown. */
63 unused, /* Not used or not referenced. */
64 used /* Used or referenced. */
67 typedef struct block_info_def
69 /* State of the upper 128bits of any AVX registers at exit. */
70 enum upper_128bits_state state;
71 /* If the upper 128bits of any AVX registers are referenced. */
72 enum upper_128bits_state referenced;
73 /* Number of vzerouppers in this block. */
75 /* TRUE if block has been processed. */
77 /* TRUE if block has been rescanned. */
81 #define BLOCK_INFO(B) ((block_info) (B)->aux)
83 enum call_avx256_state
85 /* Callee returns 256bit AVX register. */
86 callee_return_avx256 = -1,
87 /* Callee returns and passes 256bit AVX register. */
88 callee_return_pass_avx256,
89 /* Callee passes 256bit AVX register. */
91 /* Callee doesn't return nor passe 256bit AVX register, or no
92 256bit AVX register in function return. */
94 /* vzeroupper intrinsic. */
98 /* Check if a 256bit AVX register is referenced in stores. */
101 check_avx256_stores (rtx dest, const_rtx set, void *data)
104 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
105 || (GET_CODE (set) == SET
106 && REG_P (SET_SRC (set))
107 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
109 enum upper_128bits_state *state
110 = (enum upper_128bits_state *) data;
115 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
116 in basic block BB. Delete it if upper 128bit AVX registers are
117 unused. If it isn't deleted, move it to just before a jump insn.
119 UPPER_128BITS_LIVE is TRUE if the upper 128bits of any AVX registers
120 are live at entry. */
123 move_or_delete_vzeroupper_2 (basic_block bb,
124 enum upper_128bits_state state)
127 rtx vzeroupper_insn = NULL_RTX;
130 enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
131 int count = BLOCK_INFO (bb)->count;
134 fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
137 /* BB_END changes when it is deleted. */
138 bb_end = BB_END (bb);
140 while (insn != bb_end)
142 insn = NEXT_INSN (insn);
144 if (!NONDEBUG_INSN_P (insn))
147 /* Move vzeroupper before jump/call. */
148 if (JUMP_P (insn) || CALL_P (insn))
150 if (!vzeroupper_insn)
153 if (PREV_INSN (insn) != vzeroupper_insn)
157 fprintf (dump_file, "Move vzeroupper after:\n");
158 print_rtl_single (dump_file, PREV_INSN (insn));
159 fprintf (dump_file, "before:\n");
160 print_rtl_single (dump_file, insn);
162 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
165 vzeroupper_insn = NULL_RTX;
169 pat = PATTERN (insn);
171 /* Check insn for vzeroupper intrinsic. */
172 if (GET_CODE (pat) == UNSPEC_VOLATILE
173 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
177 /* Found vzeroupper intrinsic. */
178 fprintf (dump_file, "Found vzeroupper:\n");
179 print_rtl_single (dump_file, insn);
184 /* Check insn for vzeroall intrinsic. */
185 if (GET_CODE (pat) == PARALLEL
186 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
187 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
191 /* Delete pending vzeroupper insertion. */
195 delete_insn (vzeroupper_insn);
196 vzeroupper_insn = NULL_RTX;
199 else if (state != used && referenced != unused)
201 /* No need to call note_stores if the upper 128bits of
202 AVX registers are never referenced. */
203 note_stores (pat, check_avx256_stores, &state);
210 /* Process vzeroupper intrinsic. */
212 avx256 = INTVAL (XVECEXP (pat, 0, 0));
216 /* Since the upper 128bits are cleared, callee must not pass
217 256bit AVX register. We only need to check if callee
218 returns 256bit AVX register. */
219 if (avx256 == callee_return_avx256)
222 /* Remove unnecessary vzeroupper since upper 128bits are
226 fprintf (dump_file, "Delete redundant vzeroupper:\n");
227 print_rtl_single (dump_file, insn);
234 /* Set state to UNUSED if callee doesn't return 256bit AVX
236 if (avx256 != callee_return_pass_avx256)
239 if (avx256 == callee_return_pass_avx256
240 || avx256 == callee_pass_avx256)
242 /* Must remove vzeroupper since callee passes in 256bit
246 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
247 print_rtl_single (dump_file, insn);
253 vzeroupper_insn = insn;
257 BLOCK_INFO (bb)->state = state;
259 if (BLOCK_INFO (bb)->referenced == unknown)
261 /* The upper 128bits of AVX registers are never referenced if
262 REFERENCED isn't updated. */
263 if (referenced == unknown)
265 BLOCK_INFO (bb)->referenced = referenced;
266 BLOCK_INFO (bb)->count = count;
270 fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
274 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
275 in BLOCK and its predecessor blocks recursively. */
278 move_or_delete_vzeroupper_1 (basic_block block)
282 enum upper_128bits_state state;
285 fprintf (dump_file, " Process BB [%i]: status: %d\n",
286 block->index, BLOCK_INFO (block)->processed);
288 if (BLOCK_INFO (block)->processed)
291 BLOCK_INFO (block)->processed = true;
295 /* Process all predecessor edges of this block. */
296 FOR_EACH_EDGE (e, ei, block->preds)
300 move_or_delete_vzeroupper_1 (e->src);
301 switch (BLOCK_INFO (e->src)->state)
315 /* If state of any predecessor edges is unknown, we need to rescan. */
316 if (state == unknown)
317 cfun->machine->rescan_vzeroupper_p = 1;
319 /* Process this block. */
320 move_or_delete_vzeroupper_2 (block, state);
323 /* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper
324 in BLOCK and its predecessor blocks recursively. */
327 rescan_move_or_delete_vzeroupper (basic_block block)
331 enum upper_128bits_state state;
334 fprintf (dump_file, " Rescan BB [%i]: status: %d\n",
335 block->index, BLOCK_INFO (block)->rescanned);
337 if (BLOCK_INFO (block)->rescanned)
340 BLOCK_INFO (block)->rescanned = true;
344 /* Rescan all predecessor edges of this block. */
345 FOR_EACH_EDGE (e, ei, block->preds)
349 rescan_move_or_delete_vzeroupper (e->src);
350 /* For rescan, UKKNOWN state is treated as UNUSED. */
351 if (BLOCK_INFO (e->src)->state == used)
355 /* Rescan this block only if there are vzerouppers or the upper
356 128bits of AVX registers are referenced. */
357 if (BLOCK_INFO (block)->count == 0
358 && (state == used || BLOCK_INFO (block)->referenced != used))
361 BLOCK_INFO (block)->state = state;
364 move_or_delete_vzeroupper_2 (block, state);
367 /* Go through the instruction stream looking for vzeroupper. Delete
368 it if upper 128bit AVX registers are unused. If it isn't deleted,
369 move it to just before a jump insn. */
372 move_or_delete_vzeroupper (void)
377 unsigned int count = 0;
379 /* Set up block info for each basic block. */
380 alloc_aux_for_blocks (sizeof (struct block_info_def));
382 /* Process successor blocks of all entry points. */
384 fprintf (dump_file, "Process all entry points\n");
386 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
388 move_or_delete_vzeroupper_2 (e->dest,
389 cfun->machine->caller_pass_avx256_p
391 BLOCK_INFO (e->dest)->processed = true;
392 BLOCK_INFO (e->dest)->rescanned = true;
395 /* Process all basic blocks. */
397 fprintf (dump_file, "Process all basic blocks\n");
401 move_or_delete_vzeroupper_1 (bb);
402 count += BLOCK_INFO (bb)->count;
405 /* Rescan all basic blocks if needed. */
406 if (count && cfun->machine->rescan_vzeroupper_p)
409 fprintf (dump_file, "Rescan all basic blocks\n");
412 rescan_move_or_delete_vzeroupper (bb);
415 free_aux_for_blocks ();
418 static rtx legitimize_dllimport_symbol (rtx, bool);
420 #ifndef CHECK_STACK_LIMIT
421 #define CHECK_STACK_LIMIT (-1)
424 /* Return index of given mode in mult and division cost tables. */
425 #define MODE_INDEX(mode) \
426 ((mode) == QImode ? 0 \
427 : (mode) == HImode ? 1 \
428 : (mode) == SImode ? 2 \
429 : (mode) == DImode ? 3 \
432 /* Processor costs (relative to an add) */
433 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
434 #define COSTS_N_BYTES(N) ((N) * 2)
436 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
439 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
440 COSTS_N_BYTES (2), /* cost of an add instruction */
441 COSTS_N_BYTES (3), /* cost of a lea instruction */
442 COSTS_N_BYTES (2), /* variable shift costs */
443 COSTS_N_BYTES (3), /* constant shift costs */
444 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
445 COSTS_N_BYTES (3), /* HI */
446 COSTS_N_BYTES (3), /* SI */
447 COSTS_N_BYTES (3), /* DI */
448 COSTS_N_BYTES (5)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
451 COSTS_N_BYTES (3), /* HI */
452 COSTS_N_BYTES (3), /* SI */
453 COSTS_N_BYTES (3), /* DI */
454 COSTS_N_BYTES (5)}, /* other */
455 COSTS_N_BYTES (3), /* cost of movsx */
456 COSTS_N_BYTES (3), /* cost of movzx */
457 0, /* "large" insn */
459 2, /* cost for loading QImode using movzbl */
460 {2, 2, 2}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {2, 2, 2}, /* cost of storing integer registers */
464 2, /* cost of reg,reg fld/fst */
465 {2, 2, 2}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {2, 2, 2}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469 3, /* cost of moving MMX register */
470 {3, 3}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {3, 3}, /* cost of storing MMX registers
473 in SImode and DImode */
474 3, /* cost of moving SSE register */
475 {3, 3, 3}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {3, 3, 3}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 3, /* MMX or SSE register to integer */
480 0, /* size of l1 cache */
481 0, /* size of l2 cache */
482 0, /* size of prefetch block */
483 0, /* number of parallel prefetches */
485 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
486 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
487 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
488 COSTS_N_BYTES (2), /* cost of FABS instruction. */
489 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
490 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
491 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
492 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
493 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
494 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
495 1, /* scalar_stmt_cost. */
496 1, /* scalar load_cost. */
497 1, /* scalar_store_cost. */
498 1, /* vec_stmt_cost. */
499 1, /* vec_to_scalar_cost. */
500 1, /* scalar_to_vec_cost. */
501 1, /* vec_align_load_cost. */
502 1, /* vec_unalign_load_cost. */
503 1, /* vec_store_cost. */
504 1, /* cond_taken_branch_cost. */
505 1, /* cond_not_taken_branch_cost. */
508 /* Processor costs (relative to an add) */
510 struct processor_costs i386_cost = { /* 386 specific costs */
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (1), /* cost of a lea instruction */
513 COSTS_N_INSNS (3), /* variable shift costs */
514 COSTS_N_INSNS (2), /* constant shift costs */
515 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (6), /* HI */
517 COSTS_N_INSNS (6), /* SI */
518 COSTS_N_INSNS (6), /* DI */
519 COSTS_N_INSNS (6)}, /* other */
520 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (23), /* HI */
523 COSTS_N_INSNS (23), /* SI */
524 COSTS_N_INSNS (23), /* DI */
525 COSTS_N_INSNS (23)}, /* other */
526 COSTS_N_INSNS (3), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 15, /* "large" insn */
530 4, /* cost for loading QImode using movzbl */
531 {2, 4, 2}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 4, 2}, /* cost of storing integer registers */
535 2, /* cost of reg,reg fld/fst */
536 {8, 8, 8}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {8, 8, 8}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {4, 8}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {4, 8}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {4, 8, 16}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {4, 8, 16}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 3, /* MMX or SSE register to integer */
551 0, /* size of l1 cache */
552 0, /* size of l2 cache */
553 0, /* size of prefetch block */
554 0, /* number of parallel prefetches */
556 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (22), /* cost of FABS instruction. */
560 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
562 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
563 DUMMY_STRINGOP_ALGS},
564 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs i486_cost = { /* 486 specific costs */
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (1), /* cost of a lea instruction */
583 COSTS_N_INSNS (3), /* variable shift costs */
584 COSTS_N_INSNS (2), /* constant shift costs */
585 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (12), /* HI */
587 COSTS_N_INSNS (12), /* SI */
588 COSTS_N_INSNS (12), /* DI */
589 COSTS_N_INSNS (12)}, /* other */
590 1, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (40), /* HI */
593 COSTS_N_INSNS (40), /* SI */
594 COSTS_N_INSNS (40), /* DI */
595 COSTS_N_INSNS (40)}, /* other */
596 COSTS_N_INSNS (3), /* cost of movsx */
597 COSTS_N_INSNS (2), /* cost of movzx */
598 15, /* "large" insn */
600 4, /* cost for loading QImode using movzbl */
601 {2, 4, 2}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {2, 4, 2}, /* cost of storing integer registers */
605 2, /* cost of reg,reg fld/fst */
606 {8, 8, 8}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {8, 8, 8}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 2, /* cost of moving MMX register */
611 {4, 8}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {4, 8}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 8, 16}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 8, 16}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 3, /* MMX or SSE register to integer */
621 4, /* size of l1 cache. 486 has 8kB cache
622 shared for code and data, so 4kB is
623 not really precise. */
624 4, /* size of l2 cache */
625 0, /* size of prefetch block */
626 0, /* number of parallel prefetches */
628 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
634 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
637 DUMMY_STRINGOP_ALGS},
638 1, /* scalar_stmt_cost. */
639 1, /* scalar load_cost. */
640 1, /* scalar_store_cost. */
641 1, /* vec_stmt_cost. */
642 1, /* vec_to_scalar_cost. */
643 1, /* scalar_to_vec_cost. */
644 1, /* vec_align_load_cost. */
645 2, /* vec_unalign_load_cost. */
646 1, /* vec_store_cost. */
647 3, /* cond_taken_branch_cost. */
648 1, /* cond_not_taken_branch_cost. */
652 struct processor_costs pentium_cost = {
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (1), /* cost of a lea instruction */
655 COSTS_N_INSNS (4), /* variable shift costs */
656 COSTS_N_INSNS (1), /* constant shift costs */
657 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (11), /* HI */
659 COSTS_N_INSNS (11), /* SI */
660 COSTS_N_INSNS (11), /* DI */
661 COSTS_N_INSNS (11)}, /* other */
662 0, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (25), /* HI */
665 COSTS_N_INSNS (25), /* SI */
666 COSTS_N_INSNS (25), /* DI */
667 COSTS_N_INSNS (25)}, /* other */
668 COSTS_N_INSNS (3), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 8, /* "large" insn */
672 6, /* cost for loading QImode using movzbl */
673 {2, 4, 2}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 4, 2}, /* cost of storing integer registers */
677 2, /* cost of reg,reg fld/fst */
678 {2, 2, 6}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {4, 4, 6}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 8, /* cost of moving MMX register */
683 {8, 8}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {8, 8}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {4, 8, 16}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {4, 8, 16}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 3, /* MMX or SSE register to integer */
693 8, /* size of l1 cache. */
694 8, /* size of l2 cache */
695 0, /* size of prefetch block */
696 0, /* number of parallel prefetches */
698 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
699 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
700 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
701 COSTS_N_INSNS (1), /* cost of FABS instruction. */
702 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
703 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
704 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
705 DUMMY_STRINGOP_ALGS},
706 {{libcall, {{-1, rep_prefix_4_byte}}},
707 DUMMY_STRINGOP_ALGS},
708 1, /* scalar_stmt_cost. */
709 1, /* scalar load_cost. */
710 1, /* scalar_store_cost. */
711 1, /* vec_stmt_cost. */
712 1, /* vec_to_scalar_cost. */
713 1, /* scalar_to_vec_cost. */
714 1, /* vec_align_load_cost. */
715 2, /* vec_unalign_load_cost. */
716 1, /* vec_store_cost. */
717 3, /* cond_taken_branch_cost. */
718 1, /* cond_not_taken_branch_cost. */
722 struct processor_costs pentiumpro_cost = {
723 COSTS_N_INSNS (1), /* cost of an add instruction */
724 COSTS_N_INSNS (1), /* cost of a lea instruction */
725 COSTS_N_INSNS (1), /* variable shift costs */
726 COSTS_N_INSNS (1), /* constant shift costs */
727 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
728 COSTS_N_INSNS (4), /* HI */
729 COSTS_N_INSNS (4), /* SI */
730 COSTS_N_INSNS (4), /* DI */
731 COSTS_N_INSNS (4)}, /* other */
732 0, /* cost of multiply per each bit set */
733 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
734 COSTS_N_INSNS (17), /* HI */
735 COSTS_N_INSNS (17), /* SI */
736 COSTS_N_INSNS (17), /* DI */
737 COSTS_N_INSNS (17)}, /* other */
738 COSTS_N_INSNS (1), /* cost of movsx */
739 COSTS_N_INSNS (1), /* cost of movzx */
740 8, /* "large" insn */
742 2, /* cost for loading QImode using movzbl */
743 {4, 4, 4}, /* cost of loading integer registers
744 in QImode, HImode and SImode.
745 Relative to reg-reg move (2). */
746 {2, 2, 2}, /* cost of storing integer registers */
747 2, /* cost of reg,reg fld/fst */
748 {2, 2, 6}, /* cost of loading fp registers
749 in SFmode, DFmode and XFmode */
750 {4, 4, 6}, /* cost of storing fp registers
751 in SFmode, DFmode and XFmode */
752 2, /* cost of moving MMX register */
753 {2, 2}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {2, 2}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {2, 2, 8}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {2, 2, 8}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 3, /* MMX or SSE register to integer */
763 8, /* size of l1 cache. */
764 256, /* size of l2 cache */
765 32, /* size of prefetch block */
766 6, /* number of parallel prefetches */
768 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
769 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
770 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
771 COSTS_N_INSNS (2), /* cost of FABS instruction. */
772 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
773 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
774 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
775 (we ensure the alignment). For small blocks inline loop is still a
776 noticeable win, for bigger blocks either rep movsl or rep movsb is
777 way to go. Rep movsb has apparently more expensive startup time in CPU,
778 but after 4K the difference is down in the noise. */
779 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
780 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
781 DUMMY_STRINGOP_ALGS},
782 {{rep_prefix_4_byte, {{1024, unrolled_loop},
783 {8192, rep_prefix_4_byte}, {-1, libcall}}},
784 DUMMY_STRINGOP_ALGS},
785 1, /* scalar_stmt_cost. */
786 1, /* scalar load_cost. */
787 1, /* scalar_store_cost. */
788 1, /* vec_stmt_cost. */
789 1, /* vec_to_scalar_cost. */
790 1, /* scalar_to_vec_cost. */
791 1, /* vec_align_load_cost. */
792 2, /* vec_unalign_load_cost. */
793 1, /* vec_store_cost. */
794 3, /* cond_taken_branch_cost. */
795 1, /* cond_not_taken_branch_cost. */
799 struct processor_costs geode_cost = {
800 COSTS_N_INSNS (1), /* cost of an add instruction */
801 COSTS_N_INSNS (1), /* cost of a lea instruction */
802 COSTS_N_INSNS (2), /* variable shift costs */
803 COSTS_N_INSNS (1), /* constant shift costs */
804 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
805 COSTS_N_INSNS (4), /* HI */
806 COSTS_N_INSNS (7), /* SI */
807 COSTS_N_INSNS (7), /* DI */
808 COSTS_N_INSNS (7)}, /* other */
809 0, /* cost of multiply per each bit set */
810 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
811 COSTS_N_INSNS (23), /* HI */
812 COSTS_N_INSNS (39), /* SI */
813 COSTS_N_INSNS (39), /* DI */
814 COSTS_N_INSNS (39)}, /* other */
815 COSTS_N_INSNS (1), /* cost of movsx */
816 COSTS_N_INSNS (1), /* cost of movzx */
817 8, /* "large" insn */
819 1, /* cost for loading QImode using movzbl */
820 {1, 1, 1}, /* cost of loading integer registers
821 in QImode, HImode and SImode.
822 Relative to reg-reg move (2). */
823 {1, 1, 1}, /* cost of storing integer registers */
824 1, /* cost of reg,reg fld/fst */
825 {1, 1, 1}, /* cost of loading fp registers
826 in SFmode, DFmode and XFmode */
827 {4, 6, 6}, /* cost of storing fp registers
828 in SFmode, DFmode and XFmode */
830 1, /* cost of moving MMX register */
831 {1, 1}, /* cost of loading MMX registers
832 in SImode and DImode */
833 {1, 1}, /* cost of storing MMX registers
834 in SImode and DImode */
835 1, /* cost of moving SSE register */
836 {1, 1, 1}, /* cost of loading SSE registers
837 in SImode, DImode and TImode */
838 {1, 1, 1}, /* cost of storing SSE registers
839 in SImode, DImode and TImode */
840 1, /* MMX or SSE register to integer */
841 64, /* size of l1 cache. */
842 128, /* size of l2 cache. */
843 32, /* size of prefetch block */
844 1, /* number of parallel prefetches */
846 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
847 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
848 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
849 COSTS_N_INSNS (1), /* cost of FABS instruction. */
850 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
851 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
852 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
853 DUMMY_STRINGOP_ALGS},
854 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
855 DUMMY_STRINGOP_ALGS},
856 1, /* scalar_stmt_cost. */
857 1, /* scalar load_cost. */
858 1, /* scalar_store_cost. */
859 1, /* vec_stmt_cost. */
860 1, /* vec_to_scalar_cost. */
861 1, /* scalar_to_vec_cost. */
862 1, /* vec_align_load_cost. */
863 2, /* vec_unalign_load_cost. */
864 1, /* vec_store_cost. */
865 3, /* cond_taken_branch_cost. */
866 1, /* cond_not_taken_branch_cost. */
870 struct processor_costs k6_cost = {
871 COSTS_N_INSNS (1), /* cost of an add instruction */
872 COSTS_N_INSNS (2), /* cost of a lea instruction */
873 COSTS_N_INSNS (1), /* variable shift costs */
874 COSTS_N_INSNS (1), /* constant shift costs */
875 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
876 COSTS_N_INSNS (3), /* HI */
877 COSTS_N_INSNS (3), /* SI */
878 COSTS_N_INSNS (3), /* DI */
879 COSTS_N_INSNS (3)}, /* other */
880 0, /* cost of multiply per each bit set */
881 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
882 COSTS_N_INSNS (18), /* HI */
883 COSTS_N_INSNS (18), /* SI */
884 COSTS_N_INSNS (18), /* DI */
885 COSTS_N_INSNS (18)}, /* other */
886 COSTS_N_INSNS (2), /* cost of movsx */
887 COSTS_N_INSNS (2), /* cost of movzx */
888 8, /* "large" insn */
890 3, /* cost for loading QImode using movzbl */
891 {4, 5, 4}, /* cost of loading integer registers
892 in QImode, HImode and SImode.
893 Relative to reg-reg move (2). */
894 {2, 3, 2}, /* cost of storing integer registers */
895 4, /* cost of reg,reg fld/fst */
896 {6, 6, 6}, /* cost of loading fp registers
897 in SFmode, DFmode and XFmode */
898 {4, 4, 4}, /* cost of storing fp registers
899 in SFmode, DFmode and XFmode */
900 2, /* cost of moving MMX register */
901 {2, 2}, /* cost of loading MMX registers
902 in SImode and DImode */
903 {2, 2}, /* cost of storing MMX registers
904 in SImode and DImode */
905 2, /* cost of moving SSE register */
906 {2, 2, 8}, /* cost of loading SSE registers
907 in SImode, DImode and TImode */
908 {2, 2, 8}, /* cost of storing SSE registers
909 in SImode, DImode and TImode */
910 6, /* MMX or SSE register to integer */
911 32, /* size of l1 cache. */
912 32, /* size of l2 cache. Some models
913 have integrated l2 cache, but
914 optimizing for k6 is not important
915 enough to worry about that. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (2), /* cost of FABS instruction. */
923 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
925 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
926 DUMMY_STRINGOP_ALGS},
927 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
928 DUMMY_STRINGOP_ALGS},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs athlon_cost = {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (5), /* HI */
950 COSTS_N_INSNS (5), /* SI */
951 COSTS_N_INSNS (5), /* DI */
952 COSTS_N_INSNS (5)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (26), /* HI */
956 COSTS_N_INSNS (42), /* SI */
957 COSTS_N_INSNS (74), /* DI */
958 COSTS_N_INSNS (74)}, /* other */
959 COSTS_N_INSNS (1), /* cost of movsx */
960 COSTS_N_INSNS (1), /* cost of movzx */
961 8, /* "large" insn */
963 4, /* cost for loading QImode using movzbl */
964 {3, 4, 3}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {3, 4, 3}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {4, 4, 12}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {6, 6, 8}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {4, 4}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {4, 4}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {4, 4, 6}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {4, 4, 5}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 5, /* MMX or SSE register to integer */
984 64, /* size of l1 cache. */
985 256, /* size of l2 cache. */
986 64, /* size of prefetch block */
987 6, /* number of parallel prefetches */
989 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
990 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
991 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
992 COSTS_N_INSNS (2), /* cost of FABS instruction. */
993 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
994 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
995 /* For some reason, Athlon deals better with REP prefix (relative to loops)
996 compared to K8. Alignment becomes important after 8 bytes for memcpy and
997 128 bytes for memset. */
998 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
999 DUMMY_STRINGOP_ALGS},
1000 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
1001 DUMMY_STRINGOP_ALGS},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs k8_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (3), /* SI */
1024 COSTS_N_INSNS (4), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {3, 3}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 3, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 512, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 /* New AMD processors never drop prefetches; if they cannot be performed
1061 immediately, they are queued. We set number of simultaneous prefetches
1062 to a large constant to reflect this (it probably is not a good idea not
1063 to limit number of prefetches at all, as their execution also takes some
1065 100, /* number of parallel prefetches */
1066 3, /* Branch cost */
1067 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1068 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1069 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1070 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1071 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1072 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1073 /* K8 has optimized REP instruction for medium sized blocks, but for very
1074 small blocks it is better to use loop. For large blocks, libcall can
1075 do nontemporary accesses and beat inline considerably. */
1076 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1077 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1078 {{libcall, {{8, loop}, {24, unrolled_loop},
1079 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1080 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1081 4, /* scalar_stmt_cost. */
1082 2, /* scalar load_cost. */
1083 2, /* scalar_store_cost. */
1084 5, /* vec_stmt_cost. */
1085 0, /* vec_to_scalar_cost. */
1086 2, /* scalar_to_vec_cost. */
1087 2, /* vec_align_load_cost. */
1088 3, /* vec_unalign_load_cost. */
1089 3, /* vec_store_cost. */
1090 3, /* cond_taken_branch_cost. */
1091 2, /* cond_not_taken_branch_cost. */
1094 struct processor_costs amdfam10_cost = {
1095 COSTS_N_INSNS (1), /* cost of an add instruction */
1096 COSTS_N_INSNS (2), /* cost of a lea instruction */
1097 COSTS_N_INSNS (1), /* variable shift costs */
1098 COSTS_N_INSNS (1), /* constant shift costs */
1099 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1100 COSTS_N_INSNS (4), /* HI */
1101 COSTS_N_INSNS (3), /* SI */
1102 COSTS_N_INSNS (4), /* DI */
1103 COSTS_N_INSNS (5)}, /* other */
1104 0, /* cost of multiply per each bit set */
1105 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1106 COSTS_N_INSNS (35), /* HI */
1107 COSTS_N_INSNS (51), /* SI */
1108 COSTS_N_INSNS (83), /* DI */
1109 COSTS_N_INSNS (83)}, /* other */
1110 COSTS_N_INSNS (1), /* cost of movsx */
1111 COSTS_N_INSNS (1), /* cost of movzx */
1112 8, /* "large" insn */
1114 4, /* cost for loading QImode using movzbl */
1115 {3, 4, 3}, /* cost of loading integer registers
1116 in QImode, HImode and SImode.
1117 Relative to reg-reg move (2). */
1118 {3, 4, 3}, /* cost of storing integer registers */
1119 4, /* cost of reg,reg fld/fst */
1120 {4, 4, 12}, /* cost of loading fp registers
1121 in SFmode, DFmode and XFmode */
1122 {6, 6, 8}, /* cost of storing fp registers
1123 in SFmode, DFmode and XFmode */
1124 2, /* cost of moving MMX register */
1125 {3, 3}, /* cost of loading MMX registers
1126 in SImode and DImode */
1127 {4, 4}, /* cost of storing MMX registers
1128 in SImode and DImode */
1129 2, /* cost of moving SSE register */
1130 {4, 4, 3}, /* cost of loading SSE registers
1131 in SImode, DImode and TImode */
1132 {4, 4, 5}, /* cost of storing SSE registers
1133 in SImode, DImode and TImode */
1134 3, /* MMX or SSE register to integer */
1136 MOVD reg64, xmmreg Double FSTORE 4
1137 MOVD reg32, xmmreg Double FSTORE 4
1139 MOVD reg64, xmmreg Double FADD 3
1141 MOVD reg32, xmmreg Double FADD 3
1143 64, /* size of l1 cache. */
1144 512, /* size of l2 cache. */
1145 64, /* size of prefetch block */
1146 /* New AMD processors never drop prefetches; if they cannot be performed
1147 immediately, they are queued. We set number of simultaneous prefetches
1148 to a large constant to reflect this (it probably is not a good idea not
1149 to limit number of prefetches at all, as their execution also takes some
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1164 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1165 {{libcall, {{8, loop}, {24, unrolled_loop},
1166 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1167 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1168 4, /* scalar_stmt_cost. */
1169 2, /* scalar load_cost. */
1170 2, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 2, /* vec_align_load_cost. */
1175 2, /* vec_unalign_load_cost. */
1176 2, /* vec_store_cost. */
1177 2, /* cond_taken_branch_cost. */
1178 1, /* cond_not_taken_branch_cost. */
1181 struct processor_costs bdver1_cost = {
1182 COSTS_N_INSNS (1), /* cost of an add instruction */
1183 COSTS_N_INSNS (1), /* cost of a lea instruction */
1184 COSTS_N_INSNS (1), /* variable shift costs */
1185 COSTS_N_INSNS (1), /* constant shift costs */
1186 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1187 COSTS_N_INSNS (4), /* HI */
1188 COSTS_N_INSNS (4), /* SI */
1189 COSTS_N_INSNS (6), /* DI */
1190 COSTS_N_INSNS (6)}, /* other */
1191 0, /* cost of multiply per each bit set */
1192 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1193 COSTS_N_INSNS (35), /* HI */
1194 COSTS_N_INSNS (51), /* SI */
1195 COSTS_N_INSNS (83), /* DI */
1196 COSTS_N_INSNS (83)}, /* other */
1197 COSTS_N_INSNS (1), /* cost of movsx */
1198 COSTS_N_INSNS (1), /* cost of movzx */
1199 8, /* "large" insn */
1201 4, /* cost for loading QImode using movzbl */
1202 {5, 5, 4}, /* cost of loading integer registers
1203 in QImode, HImode and SImode.
1204 Relative to reg-reg move (2). */
1205 {4, 4, 4}, /* cost of storing integer registers */
1206 2, /* cost of reg,reg fld/fst */
1207 {5, 5, 12}, /* cost of loading fp registers
1208 in SFmode, DFmode and XFmode */
1209 {4, 4, 8}, /* cost of storing fp registers
1210 in SFmode, DFmode and XFmode */
1211 2, /* cost of moving MMX register */
1212 {4, 4}, /* cost of loading MMX registers
1213 in SImode and DImode */
1214 {4, 4}, /* cost of storing MMX registers
1215 in SImode and DImode */
1216 2, /* cost of moving SSE register */
1217 {4, 4, 4}, /* cost of loading SSE registers
1218 in SImode, DImode and TImode */
1219 {4, 4, 4}, /* cost of storing SSE registers
1220 in SImode, DImode and TImode */
1221 2, /* MMX or SSE register to integer */
1223 MOVD reg64, xmmreg Double FSTORE 4
1224 MOVD reg32, xmmreg Double FSTORE 4
1226 MOVD reg64, xmmreg Double FADD 3
1228 MOVD reg32, xmmreg Double FADD 3
1230 16, /* size of l1 cache. */
1231 2048, /* size of l2 cache. */
1232 64, /* size of prefetch block */
1233 /* New AMD processors never drop prefetches; if they cannot be performed
1234 immediately, they are queued. We set number of simultaneous prefetches
1235 to a large constant to reflect this (it probably is not a good idea not
1236 to limit number of prefetches at all, as their execution also takes some
1238 100, /* number of parallel prefetches */
1239 2, /* Branch cost */
1240 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1241 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1242 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1243 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1244 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1245 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1247 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1248 very small blocks it is better to use loop. For large blocks, libcall
1249 can do nontemporary accesses and beat inline considerably. */
1250 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1251 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1252 {{libcall, {{8, loop}, {24, unrolled_loop},
1253 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1254 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1255 6, /* scalar_stmt_cost. */
1256 4, /* scalar load_cost. */
1257 4, /* scalar_store_cost. */
1258 6, /* vec_stmt_cost. */
1259 0, /* vec_to_scalar_cost. */
1260 2, /* scalar_to_vec_cost. */
1261 4, /* vec_align_load_cost. */
1262 4, /* vec_unalign_load_cost. */
1263 4, /* vec_store_cost. */
1264 2, /* cond_taken_branch_cost. */
1265 1, /* cond_not_taken_branch_cost. */
1269 struct processor_costs pentium4_cost = {
1270 COSTS_N_INSNS (1), /* cost of an add instruction */
1271 COSTS_N_INSNS (3), /* cost of a lea instruction */
1272 COSTS_N_INSNS (4), /* variable shift costs */
1273 COSTS_N_INSNS (4), /* constant shift costs */
1274 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1275 COSTS_N_INSNS (15), /* HI */
1276 COSTS_N_INSNS (15), /* SI */
1277 COSTS_N_INSNS (15), /* DI */
1278 COSTS_N_INSNS (15)}, /* other */
1279 0, /* cost of multiply per each bit set */
1280 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1281 COSTS_N_INSNS (56), /* HI */
1282 COSTS_N_INSNS (56), /* SI */
1283 COSTS_N_INSNS (56), /* DI */
1284 COSTS_N_INSNS (56)}, /* other */
1285 COSTS_N_INSNS (1), /* cost of movsx */
1286 COSTS_N_INSNS (1), /* cost of movzx */
1287 16, /* "large" insn */
1289 2, /* cost for loading QImode using movzbl */
1290 {4, 5, 4}, /* cost of loading integer registers
1291 in QImode, HImode and SImode.
1292 Relative to reg-reg move (2). */
1293 {2, 3, 2}, /* cost of storing integer registers */
1294 2, /* cost of reg,reg fld/fst */
1295 {2, 2, 6}, /* cost of loading fp registers
1296 in SFmode, DFmode and XFmode */
1297 {4, 4, 6}, /* cost of storing fp registers
1298 in SFmode, DFmode and XFmode */
1299 2, /* cost of moving MMX register */
1300 {2, 2}, /* cost of loading MMX registers
1301 in SImode and DImode */
1302 {2, 2}, /* cost of storing MMX registers
1303 in SImode and DImode */
1304 12, /* cost of moving SSE register */
1305 {12, 12, 12}, /* cost of loading SSE registers
1306 in SImode, DImode and TImode */
1307 {2, 2, 8}, /* cost of storing SSE registers
1308 in SImode, DImode and TImode */
1309 10, /* MMX or SSE register to integer */
1310 8, /* size of l1 cache. */
1311 256, /* size of l2 cache. */
1312 64, /* size of prefetch block */
1313 6, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1321 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1322 DUMMY_STRINGOP_ALGS},
1323 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1325 DUMMY_STRINGOP_ALGS},
1326 1, /* scalar_stmt_cost. */
1327 1, /* scalar load_cost. */
1328 1, /* scalar_store_cost. */
1329 1, /* vec_stmt_cost. */
1330 1, /* vec_to_scalar_cost. */
1331 1, /* scalar_to_vec_cost. */
1332 1, /* vec_align_load_cost. */
1333 2, /* vec_unalign_load_cost. */
1334 1, /* vec_store_cost. */
1335 3, /* cond_taken_branch_cost. */
1336 1, /* cond_not_taken_branch_cost. */
1340 struct processor_costs nocona_cost = {
1341 COSTS_N_INSNS (1), /* cost of an add instruction */
1342 COSTS_N_INSNS (1), /* cost of a lea instruction */
1343 COSTS_N_INSNS (1), /* variable shift costs */
1344 COSTS_N_INSNS (1), /* constant shift costs */
1345 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1346 COSTS_N_INSNS (10), /* HI */
1347 COSTS_N_INSNS (10), /* SI */
1348 COSTS_N_INSNS (10), /* DI */
1349 COSTS_N_INSNS (10)}, /* other */
1350 0, /* cost of multiply per each bit set */
1351 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1352 COSTS_N_INSNS (66), /* HI */
1353 COSTS_N_INSNS (66), /* SI */
1354 COSTS_N_INSNS (66), /* DI */
1355 COSTS_N_INSNS (66)}, /* other */
1356 COSTS_N_INSNS (1), /* cost of movsx */
1357 COSTS_N_INSNS (1), /* cost of movzx */
1358 16, /* "large" insn */
1359 17, /* MOVE_RATIO */
1360 4, /* cost for loading QImode using movzbl */
1361 {4, 4, 4}, /* cost of loading integer registers
1362 in QImode, HImode and SImode.
1363 Relative to reg-reg move (2). */
1364 {4, 4, 4}, /* cost of storing integer registers */
1365 3, /* cost of reg,reg fld/fst */
1366 {12, 12, 12}, /* cost of loading fp registers
1367 in SFmode, DFmode and XFmode */
1368 {4, 4, 4}, /* cost of storing fp registers
1369 in SFmode, DFmode and XFmode */
1370 6, /* cost of moving MMX register */
1371 {12, 12}, /* cost of loading MMX registers
1372 in SImode and DImode */
1373 {12, 12}, /* cost of storing MMX registers
1374 in SImode and DImode */
1375 6, /* cost of moving SSE register */
1376 {12, 12, 12}, /* cost of loading SSE registers
1377 in SImode, DImode and TImode */
1378 {12, 12, 12}, /* cost of storing SSE registers
1379 in SImode, DImode and TImode */
1380 8, /* MMX or SSE register to integer */
1381 8, /* size of l1 cache. */
1382 1024, /* size of l2 cache. */
1383 128, /* size of prefetch block */
1384 8, /* number of parallel prefetches */
1385 1, /* Branch cost */
1386 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1387 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1388 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1389 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1390 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1391 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1392 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1393 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1394 {100000, unrolled_loop}, {-1, libcall}}}},
1395 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1397 {libcall, {{24, loop}, {64, unrolled_loop},
1398 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1399 1, /* scalar_stmt_cost. */
1400 1, /* scalar load_cost. */
1401 1, /* scalar_store_cost. */
1402 1, /* vec_stmt_cost. */
1403 1, /* vec_to_scalar_cost. */
1404 1, /* scalar_to_vec_cost. */
1405 1, /* vec_align_load_cost. */
1406 2, /* vec_unalign_load_cost. */
1407 1, /* vec_store_cost. */
1408 3, /* cond_taken_branch_cost. */
1409 1, /* cond_not_taken_branch_cost. */
1413 struct processor_costs atom_cost = {
1414 COSTS_N_INSNS (1), /* cost of an add instruction */
1415 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1416 COSTS_N_INSNS (1), /* variable shift costs */
1417 COSTS_N_INSNS (1), /* constant shift costs */
1418 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1419 COSTS_N_INSNS (4), /* HI */
1420 COSTS_N_INSNS (3), /* SI */
1421 COSTS_N_INSNS (4), /* DI */
1422 COSTS_N_INSNS (2)}, /* other */
1423 0, /* cost of multiply per each bit set */
1424 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1425 COSTS_N_INSNS (26), /* HI */
1426 COSTS_N_INSNS (42), /* SI */
1427 COSTS_N_INSNS (74), /* DI */
1428 COSTS_N_INSNS (74)}, /* other */
1429 COSTS_N_INSNS (1), /* cost of movsx */
1430 COSTS_N_INSNS (1), /* cost of movzx */
1431 8, /* "large" insn */
1432 17, /* MOVE_RATIO */
1433 2, /* cost for loading QImode using movzbl */
1434 {4, 4, 4}, /* cost of loading integer registers
1435 in QImode, HImode and SImode.
1436 Relative to reg-reg move (2). */
1437 {4, 4, 4}, /* cost of storing integer registers */
1438 4, /* cost of reg,reg fld/fst */
1439 {12, 12, 12}, /* cost of loading fp registers
1440 in SFmode, DFmode and XFmode */
1441 {6, 6, 8}, /* cost of storing fp registers
1442 in SFmode, DFmode and XFmode */
1443 2, /* cost of moving MMX register */
1444 {8, 8}, /* cost of loading MMX registers
1445 in SImode and DImode */
1446 {8, 8}, /* cost of storing MMX registers
1447 in SImode and DImode */
1448 2, /* cost of moving SSE register */
1449 {8, 8, 8}, /* cost of loading SSE registers
1450 in SImode, DImode and TImode */
1451 {8, 8, 8}, /* cost of storing SSE registers
1452 in SImode, DImode and TImode */
1453 5, /* MMX or SSE register to integer */
1454 32, /* size of l1 cache. */
1455 256, /* size of l2 cache. */
1456 64, /* size of prefetch block */
1457 6, /* number of parallel prefetches */
1458 3, /* Branch cost */
1459 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1460 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1461 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1462 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1463 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1464 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1465 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1466 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1467 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1468 {{libcall, {{8, loop}, {15, unrolled_loop},
1469 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1470 {libcall, {{24, loop}, {32, unrolled_loop},
1471 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1472 1, /* scalar_stmt_cost. */
1473 1, /* scalar load_cost. */
1474 1, /* scalar_store_cost. */
1475 1, /* vec_stmt_cost. */
1476 1, /* vec_to_scalar_cost. */
1477 1, /* scalar_to_vec_cost. */
1478 1, /* vec_align_load_cost. */
1479 2, /* vec_unalign_load_cost. */
1480 1, /* vec_store_cost. */
1481 3, /* cond_taken_branch_cost. */
1482 1, /* cond_not_taken_branch_cost. */
1485 /* Generic64 should produce code tuned for Nocona and K8. */
1487 struct processor_costs generic64_cost = {
1488 COSTS_N_INSNS (1), /* cost of an add instruction */
1489 /* On all chips taken into consideration lea is 2 cycles and more. With
1490 this cost however our current implementation of synth_mult results in
1491 use of unnecessary temporary registers causing regression on several
1492 SPECfp benchmarks. */
1493 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1494 COSTS_N_INSNS (1), /* variable shift costs */
1495 COSTS_N_INSNS (1), /* constant shift costs */
1496 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1497 COSTS_N_INSNS (4), /* HI */
1498 COSTS_N_INSNS (3), /* SI */
1499 COSTS_N_INSNS (4), /* DI */
1500 COSTS_N_INSNS (2)}, /* other */
1501 0, /* cost of multiply per each bit set */
1502 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1503 COSTS_N_INSNS (26), /* HI */
1504 COSTS_N_INSNS (42), /* SI */
1505 COSTS_N_INSNS (74), /* DI */
1506 COSTS_N_INSNS (74)}, /* other */
1507 COSTS_N_INSNS (1), /* cost of movsx */
1508 COSTS_N_INSNS (1), /* cost of movzx */
1509 8, /* "large" insn */
1510 17, /* MOVE_RATIO */
1511 4, /* cost for loading QImode using movzbl */
1512 {4, 4, 4}, /* cost of loading integer registers
1513 in QImode, HImode and SImode.
1514 Relative to reg-reg move (2). */
1515 {4, 4, 4}, /* cost of storing integer registers */
1516 4, /* cost of reg,reg fld/fst */
1517 {12, 12, 12}, /* cost of loading fp registers
1518 in SFmode, DFmode and XFmode */
1519 {6, 6, 8}, /* cost of storing fp registers
1520 in SFmode, DFmode and XFmode */
1521 2, /* cost of moving MMX register */
1522 {8, 8}, /* cost of loading MMX registers
1523 in SImode and DImode */
1524 {8, 8}, /* cost of storing MMX registers
1525 in SImode and DImode */
1526 2, /* cost of moving SSE register */
1527 {8, 8, 8}, /* cost of loading SSE registers
1528 in SImode, DImode and TImode */
1529 {8, 8, 8}, /* cost of storing SSE registers
1530 in SImode, DImode and TImode */
1531 5, /* MMX or SSE register to integer */
1532 32, /* size of l1 cache. */
1533 512, /* size of l2 cache. */
1534 64, /* size of prefetch block */
1535 6, /* number of parallel prefetches */
1536 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1537 value is increased to perhaps more appropriate value of 5. */
1538 3, /* Branch cost */
1539 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1540 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1541 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1542 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1543 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1544 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1545 {DUMMY_STRINGOP_ALGS,
1546 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1547 {DUMMY_STRINGOP_ALGS,
1548 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1549 1, /* scalar_stmt_cost. */
1550 1, /* scalar load_cost. */
1551 1, /* scalar_store_cost. */
1552 1, /* vec_stmt_cost. */
1553 1, /* vec_to_scalar_cost. */
1554 1, /* scalar_to_vec_cost. */
1555 1, /* vec_align_load_cost. */
1556 2, /* vec_unalign_load_cost. */
1557 1, /* vec_store_cost. */
1558 3, /* cond_taken_branch_cost. */
1559 1, /* cond_not_taken_branch_cost. */
1562 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1565 struct processor_costs generic32_cost = {
1566 COSTS_N_INSNS (1), /* cost of an add instruction */
1567 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1568 COSTS_N_INSNS (1), /* variable shift costs */
1569 COSTS_N_INSNS (1), /* constant shift costs */
1570 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1571 COSTS_N_INSNS (4), /* HI */
1572 COSTS_N_INSNS (3), /* SI */
1573 COSTS_N_INSNS (4), /* DI */
1574 COSTS_N_INSNS (2)}, /* other */
1575 0, /* cost of multiply per each bit set */
1576 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1577 COSTS_N_INSNS (26), /* HI */
1578 COSTS_N_INSNS (42), /* SI */
1579 COSTS_N_INSNS (74), /* DI */
1580 COSTS_N_INSNS (74)}, /* other */
1581 COSTS_N_INSNS (1), /* cost of movsx */
1582 COSTS_N_INSNS (1), /* cost of movzx */
1583 8, /* "large" insn */
1584 17, /* MOVE_RATIO */
1585 4, /* cost for loading QImode using movzbl */
1586 {4, 4, 4}, /* cost of loading integer registers
1587 in QImode, HImode and SImode.
1588 Relative to reg-reg move (2). */
1589 {4, 4, 4}, /* cost of storing integer registers */
1590 4, /* cost of reg,reg fld/fst */
1591 {12, 12, 12}, /* cost of loading fp registers
1592 in SFmode, DFmode and XFmode */
1593 {6, 6, 8}, /* cost of storing fp registers
1594 in SFmode, DFmode and XFmode */
1595 2, /* cost of moving MMX register */
1596 {8, 8}, /* cost of loading MMX registers
1597 in SImode and DImode */
1598 {8, 8}, /* cost of storing MMX registers
1599 in SImode and DImode */
1600 2, /* cost of moving SSE register */
1601 {8, 8, 8}, /* cost of loading SSE registers
1602 in SImode, DImode and TImode */
1603 {8, 8, 8}, /* cost of storing SSE registers
1604 in SImode, DImode and TImode */
1605 5, /* MMX or SSE register to integer */
1606 32, /* size of l1 cache. */
1607 256, /* size of l2 cache. */
1608 64, /* size of prefetch block */
1609 6, /* number of parallel prefetches */
1610 3, /* Branch cost */
1611 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1612 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1613 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1614 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1615 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1616 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1617 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1618 DUMMY_STRINGOP_ALGS},
1619 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1620 DUMMY_STRINGOP_ALGS},
1621 1, /* scalar_stmt_cost. */
1622 1, /* scalar load_cost. */
1623 1, /* scalar_store_cost. */
1624 1, /* vec_stmt_cost. */
1625 1, /* vec_to_scalar_cost. */
1626 1, /* scalar_to_vec_cost. */
1627 1, /* vec_align_load_cost. */
1628 2, /* vec_unalign_load_cost. */
1629 1, /* vec_store_cost. */
1630 3, /* cond_taken_branch_cost. */
1631 1, /* cond_not_taken_branch_cost. */
1634 const struct processor_costs *ix86_cost = &pentium_cost;
1636 /* Processor feature/optimization bitmasks. */
1637 #define m_386 (1<<PROCESSOR_I386)
1638 #define m_486 (1<<PROCESSOR_I486)
1639 #define m_PENT (1<<PROCESSOR_PENTIUM)
1640 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1641 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1642 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1643 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1644 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1645 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1646 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1647 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1648 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1649 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1650 #define m_ATOM (1<<PROCESSOR_ATOM)
1652 #define m_GEODE (1<<PROCESSOR_GEODE)
1653 #define m_K6 (1<<PROCESSOR_K6)
1654 #define m_K6_GEODE (m_K6 | m_GEODE)
1655 #define m_K8 (1<<PROCESSOR_K8)
1656 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1657 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1658 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1659 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1660 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1662 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1663 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1665 /* Generic instruction choice should be common subset of supported CPUs
1666 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1667 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1669 /* Feature tests against the various tunings. */
1670 unsigned char ix86_tune_features[X86_TUNE_LAST];
1672 /* Feature tests against the various tunings used to create ix86_tune_features
1673 based on the processor mask. */
1674 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1675 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1676 negatively, so enabling for Generic64 seems like good code size
1677 tradeoff. We can't enable it for 32bit generic because it does not
1678 work well with PPro base chips. */
1679 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1681 /* X86_TUNE_PUSH_MEMORY */
1682 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1683 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1685 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1688 /* X86_TUNE_UNROLL_STRLEN */
1689 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1690 | m_CORE2I7 | m_GENERIC,
1692 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1693 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1694 | m_CORE2I7 | m_GENERIC,
1696 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1697 on simulation result. But after P4 was made, no performance benefit
1698 was observed with branch hints. It also increases the code size.
1699 As a result, icc never generates branch hints. */
1702 /* X86_TUNE_DOUBLE_WITH_ADD */
1705 /* X86_TUNE_USE_SAHF */
1706 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1707 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1709 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1710 partial dependencies. */
1711 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1712 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1714 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1715 register stalls on Generic32 compilation setting as well. However
1716 in current implementation the partial register stalls are not eliminated
1717 very well - they can be introduced via subregs synthesized by combine
1718 and can happen in caller/callee saving sequences. Because this option
1719 pays back little on PPro based chips and is in conflict with partial reg
1720 dependencies used by Athlon/P4 based chips, it is better to leave it off
1721 for generic32 for now. */
1724 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1725 m_CORE2I7 | m_GENERIC,
1727 /* X86_TUNE_USE_HIMODE_FIOP */
1728 m_386 | m_486 | m_K6_GEODE,
1730 /* X86_TUNE_USE_SIMODE_FIOP */
1731 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1733 /* X86_TUNE_USE_MOV0 */
1736 /* X86_TUNE_USE_CLTD */
1737 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1739 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1742 /* X86_TUNE_SPLIT_LONG_MOVES */
1745 /* X86_TUNE_READ_MODIFY_WRITE */
1748 /* X86_TUNE_READ_MODIFY */
1751 /* X86_TUNE_PROMOTE_QIMODE */
1752 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1753 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1755 /* X86_TUNE_FAST_PREFIX */
1756 ~(m_PENT | m_486 | m_386),
1758 /* X86_TUNE_SINGLE_STRINGOP */
1759 m_386 | m_PENT4 | m_NOCONA,
1761 /* X86_TUNE_QIMODE_MATH */
1764 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1765 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1766 might be considered for Generic32 if our scheme for avoiding partial
1767 stalls was more effective. */
1770 /* X86_TUNE_PROMOTE_QI_REGS */
1773 /* X86_TUNE_PROMOTE_HI_REGS */
1776 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1777 over esp addition. */
1778 m_386 | m_486 | m_PENT | m_PPRO,
1780 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1781 over esp addition. */
1784 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1785 over esp subtraction. */
1786 m_386 | m_486 | m_PENT | m_K6_GEODE,
1788 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1789 over esp subtraction. */
1790 m_PENT | m_K6_GEODE,
1792 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1793 for DFmode copies */
1794 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1795 | m_GENERIC | m_GEODE),
1797 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1798 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1800 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1801 conflict here in between PPro/Pentium4 based chips that thread 128bit
1802 SSE registers as single units versus K8 based chips that divide SSE
1803 registers to two 64bit halves. This knob promotes all store destinations
1804 to be 128bit to allow register renaming on 128bit SSE units, but usually
1805 results in one extra microop on 64bit SSE units. Experimental results
1806 shows that disabling this option on P4 brings over 20% SPECfp regression,
1807 while enabling it on K8 brings roughly 2.4% regression that can be partly
1808 masked by careful scheduling of moves. */
1809 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1810 | m_AMDFAM10 | m_BDVER1,
1812 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1813 m_AMDFAM10 | m_BDVER1,
1815 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1818 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1821 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1822 are resolved on SSE register parts instead of whole registers, so we may
1823 maintain just lower part of scalar values in proper format leaving the
1824 upper part undefined. */
1827 /* X86_TUNE_SSE_TYPELESS_STORES */
1830 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1831 m_PPRO | m_PENT4 | m_NOCONA,
1833 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1834 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1836 /* X86_TUNE_PROLOGUE_USING_MOVE */
1837 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1839 /* X86_TUNE_EPILOGUE_USING_MOVE */
1840 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1842 /* X86_TUNE_SHIFT1 */
1845 /* X86_TUNE_USE_FFREEP */
1848 /* X86_TUNE_INTER_UNIT_MOVES */
1849 ~(m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC),
1851 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1852 ~(m_AMDFAM10 | m_BDVER1),
1854 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1855 than 4 branch instructions in the 16 byte window. */
1856 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
1859 /* X86_TUNE_SCHEDULE */
1860 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
1863 /* X86_TUNE_USE_BT */
1864 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
1866 /* X86_TUNE_USE_INCDEC */
1867 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
1869 /* X86_TUNE_PAD_RETURNS */
1870 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
1872 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1875 /* X86_TUNE_EXT_80387_CONSTANTS */
1876 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1877 | m_CORE2I7 | m_GENERIC,
1879 /* X86_TUNE_SHORTEN_X87_SSE */
1882 /* X86_TUNE_AVOID_VECTOR_DECODE */
1883 m_K8 | m_CORE2I7_64 | m_GENERIC64,
1885 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1886 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1889 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1890 vector path on AMD machines. */
1891 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1893 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1895 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1897 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1901 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1902 but one byte longer. */
1905 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1906 operand that cannot be represented using a modRM byte. The XOR
1907 replacement is long decoded, so this split helps here as well. */
1910 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1912 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
1914 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1915 from integer to FP. */
1918 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1919 with a subsequent conditional jump instruction into a single
1920 compare-and-branch uop. */
1923 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1924 will impact LEA instruction selection. */
1927 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1932 /* Feature tests against the various architecture variations. */
1933 unsigned char ix86_arch_features[X86_ARCH_LAST];
1935 /* Feature tests against the various architecture variations, used to create
1936 ix86_arch_features based on the processor mask. */
1937 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1938 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1939 ~(m_386 | m_486 | m_PENT | m_K6),
1941 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1944 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1947 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1950 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1954 static const unsigned int x86_accumulate_outgoing_args
1955 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1958 static const unsigned int x86_arch_always_fancy_math_387
1959 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1960 | m_NOCONA | m_CORE2I7 | m_GENERIC;
1962 static enum stringop_alg stringop_alg = no_stringop;
1964 /* In case the average insn count for single function invocation is
1965 lower than this constant, emit fast (but longer) prologue and
1967 #define FAST_PROLOGUE_INSN_COUNT 20
1969 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1970 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1971 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1972 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1974 /* Array of the smallest class containing reg number REGNO, indexed by
1975 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1977 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1979 /* ax, dx, cx, bx */
1980 AREG, DREG, CREG, BREG,
1981 /* si, di, bp, sp */
1982 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1984 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1985 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1988 /* flags, fpsr, fpcr, frame */
1989 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1991 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1994 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1997 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1998 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1999 /* SSE REX registers */
2000 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2004 /* The "default" register map used in 32bit mode. */
2006 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2008 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2009 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2010 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2011 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2012 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2013 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2014 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2017 /* The "default" register map used in 64bit mode. */
2019 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2021 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2022 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2023 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2024 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2025 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2026 8,9,10,11,12,13,14,15, /* extended integer registers */
2027 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2030 /* Define the register numbers to be used in Dwarf debugging information.
2031 The SVR4 reference port C compiler uses the following register numbers
2032 in its Dwarf output code:
2033 0 for %eax (gcc regno = 0)
2034 1 for %ecx (gcc regno = 2)
2035 2 for %edx (gcc regno = 1)
2036 3 for %ebx (gcc regno = 3)
2037 4 for %esp (gcc regno = 7)
2038 5 for %ebp (gcc regno = 6)
2039 6 for %esi (gcc regno = 4)
2040 7 for %edi (gcc regno = 5)
2041 The following three DWARF register numbers are never generated by
2042 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2043 believes these numbers have these meanings.
2044 8 for %eip (no gcc equivalent)
2045 9 for %eflags (gcc regno = 17)
2046 10 for %trapno (no gcc equivalent)
2047 It is not at all clear how we should number the FP stack registers
2048 for the x86 architecture. If the version of SDB on x86/svr4 were
2049 a bit less brain dead with respect to floating-point then we would
2050 have a precedent to follow with respect to DWARF register numbers
2051 for x86 FP registers, but the SDB on x86/svr4 is so completely
2052 broken with respect to FP registers that it is hardly worth thinking
2053 of it as something to strive for compatibility with.
2054 The version of x86/svr4 SDB I have at the moment does (partially)
2055 seem to believe that DWARF register number 11 is associated with
2056 the x86 register %st(0), but that's about all. Higher DWARF
2057 register numbers don't seem to be associated with anything in
2058 particular, and even for DWARF regno 11, SDB only seems to under-
2059 stand that it should say that a variable lives in %st(0) (when
2060 asked via an `=' command) if we said it was in DWARF regno 11,
2061 but SDB still prints garbage when asked for the value of the
2062 variable in question (via a `/' command).
2063 (Also note that the labels SDB prints for various FP stack regs
2064 when doing an `x' command are all wrong.)
2065 Note that these problems generally don't affect the native SVR4
2066 C compiler because it doesn't allow the use of -O with -g and
2067 because when it is *not* optimizing, it allocates a memory
2068 location for each floating-point variable, and the memory
2069 location is what gets described in the DWARF AT_location
2070 attribute for the variable in question.
2071 Regardless of the severe mental illness of the x86/svr4 SDB, we
2072 do something sensible here and we use the following DWARF
2073 register numbers. Note that these are all stack-top-relative
2075 11 for %st(0) (gcc regno = 8)
2076 12 for %st(1) (gcc regno = 9)
2077 13 for %st(2) (gcc regno = 10)
2078 14 for %st(3) (gcc regno = 11)
2079 15 for %st(4) (gcc regno = 12)
2080 16 for %st(5) (gcc regno = 13)
2081 17 for %st(6) (gcc regno = 14)
2082 18 for %st(7) (gcc regno = 15)
2084 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2086 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2087 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2088 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2089 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2090 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2091 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2092 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2095 /* Define parameter passing and return registers. */
2097 static int const x86_64_int_parameter_registers[6] =
2099 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2102 static int const x86_64_ms_abi_int_parameter_registers[4] =
2104 CX_REG, DX_REG, R8_REG, R9_REG
2107 static int const x86_64_int_return_registers[4] =
2109 AX_REG, DX_REG, DI_REG, SI_REG
2112 /* Define the structure for the machine field in struct function. */
2114 struct GTY(()) stack_local_entry {
2115 unsigned short mode;
2118 struct stack_local_entry *next;
2121 /* Structure describing stack frame layout.
2122 Stack grows downward:
2128 saved static chain if ix86_static_chain_on_stack
2130 saved frame pointer if frame_pointer_needed
2131 <- HARD_FRAME_POINTER
2137 <- sse_regs_save_offset
2140 [va_arg registers] |
2144 [padding2] | = to_allocate
2153 int outgoing_arguments_size;
2154 HOST_WIDE_INT frame;
2156 /* The offsets relative to ARG_POINTER. */
2157 HOST_WIDE_INT frame_pointer_offset;
2158 HOST_WIDE_INT hard_frame_pointer_offset;
2159 HOST_WIDE_INT stack_pointer_offset;
2160 HOST_WIDE_INT hfp_save_offset;
2161 HOST_WIDE_INT reg_save_offset;
2162 HOST_WIDE_INT sse_reg_save_offset;
2164 /* When save_regs_using_mov is set, emit prologue using
2165 move instead of push instructions. */
2166 bool save_regs_using_mov;
2169 /* Code model option. */
2170 enum cmodel ix86_cmodel;
2172 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2174 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2176 /* Which unit we are generating floating point math for. */
2177 enum fpmath_unit ix86_fpmath;
2179 /* Which cpu are we scheduling for. */
2180 enum attr_cpu ix86_schedule;
2182 /* Which cpu are we optimizing for. */
2183 enum processor_type ix86_tune;
2185 /* Which instruction set architecture to use. */
2186 enum processor_type ix86_arch;
2188 /* true if sse prefetch instruction is not NOOP. */
2189 int x86_prefetch_sse;
2191 /* ix86_regparm_string as a number */
2192 static int ix86_regparm;
2194 /* -mstackrealign option */
2195 static const char ix86_force_align_arg_pointer_string[]
2196 = "force_align_arg_pointer";
2198 static rtx (*ix86_gen_leave) (void);
2199 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2200 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2201 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2202 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2203 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2204 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2205 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2206 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2207 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2209 /* Preferred alignment for stack boundary in bits. */
2210 unsigned int ix86_preferred_stack_boundary;
2212 /* Alignment for incoming stack boundary in bits specified at
2214 static unsigned int ix86_user_incoming_stack_boundary;
2216 /* Default alignment for incoming stack boundary in bits. */
2217 static unsigned int ix86_default_incoming_stack_boundary;
2219 /* Alignment for incoming stack boundary in bits. */
2220 unsigned int ix86_incoming_stack_boundary;
2222 /* The abi used by target. */
2223 enum calling_abi ix86_abi;
2225 /* Values 1-5: see jump.c */
2226 int ix86_branch_cost;
2228 /* Calling abi specific va_list type nodes. */
2229 static GTY(()) tree sysv_va_list_type_node;
2230 static GTY(()) tree ms_va_list_type_node;
2232 /* Variables which are this size or smaller are put in the data/bss
2233 or ldata/lbss sections. */
2235 int ix86_section_threshold = 65536;
2237 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2238 char internal_label_prefix[16];
2239 int internal_label_prefix_len;
2241 /* Fence to use after loop using movnt. */
2244 /* Register class used for passing given 64bit part of the argument.
2245 These represent classes as documented by the PS ABI, with the exception
2246 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2247 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2249 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2250 whenever possible (upper half does contain padding). */
2251 enum x86_64_reg_class
2254 X86_64_INTEGER_CLASS,
2255 X86_64_INTEGERSI_CLASS,
2262 X86_64_COMPLEX_X87_CLASS,
2266 #define MAX_CLASSES 4
2268 /* Table of constants used by fldpi, fldln2, etc.... */
2269 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2270 static bool ext_80387_constants_init = 0;
2273 static struct machine_function * ix86_init_machine_status (void);
2274 static rtx ix86_function_value (const_tree, const_tree, bool);
2275 static bool ix86_function_value_regno_p (const unsigned int);
2276 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2278 static rtx ix86_static_chain (const_tree, bool);
2279 static int ix86_function_regparm (const_tree, const_tree);
2280 static void ix86_compute_frame_layout (struct ix86_frame *);
2281 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2283 static void ix86_add_new_builtins (int);
2284 static rtx ix86_expand_vec_perm_builtin (tree);
2285 static tree ix86_canonical_va_list_type (tree);
2286 static void predict_jump (int);
2287 static unsigned int split_stack_prologue_scratch_regno (void);
2288 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2290 enum ix86_function_specific_strings
2292 IX86_FUNCTION_SPECIFIC_ARCH,
2293 IX86_FUNCTION_SPECIFIC_TUNE,
2294 IX86_FUNCTION_SPECIFIC_FPMATH,
2295 IX86_FUNCTION_SPECIFIC_MAX
2298 static char *ix86_target_string (int, int, const char *, const char *,
2299 const char *, bool);
2300 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2301 static void ix86_function_specific_save (struct cl_target_option *);
2302 static void ix86_function_specific_restore (struct cl_target_option *);
2303 static void ix86_function_specific_print (FILE *, int,
2304 struct cl_target_option *);
2305 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2306 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2307 static bool ix86_can_inline_p (tree, tree);
2308 static void ix86_set_current_function (tree);
2309 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2311 static enum calling_abi ix86_function_abi (const_tree);
2314 #ifndef SUBTARGET32_DEFAULT_CPU
2315 #define SUBTARGET32_DEFAULT_CPU "i386"
2318 /* The svr4 ABI for the i386 says that records and unions are returned
2320 #ifndef DEFAULT_PCC_STRUCT_RETURN
2321 #define DEFAULT_PCC_STRUCT_RETURN 1
2324 /* Whether -mtune= or -march= were specified */
2325 static int ix86_tune_defaulted;
2326 static int ix86_arch_specified;
2328 /* A mask of ix86_isa_flags that includes bit X if X
2329 was set or cleared on the command line. */
2330 static int ix86_isa_flags_explicit;
2332 /* Define a set of ISAs which are available when a given ISA is
2333 enabled. MMX and SSE ISAs are handled separately. */
2335 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2336 #define OPTION_MASK_ISA_3DNOW_SET \
2337 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2339 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2340 #define OPTION_MASK_ISA_SSE2_SET \
2341 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2342 #define OPTION_MASK_ISA_SSE3_SET \
2343 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2344 #define OPTION_MASK_ISA_SSSE3_SET \
2345 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2346 #define OPTION_MASK_ISA_SSE4_1_SET \
2347 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2348 #define OPTION_MASK_ISA_SSE4_2_SET \
2349 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2350 #define OPTION_MASK_ISA_AVX_SET \
2351 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2352 #define OPTION_MASK_ISA_FMA_SET \
2353 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2355 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2357 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2359 #define OPTION_MASK_ISA_SSE4A_SET \
2360 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2361 #define OPTION_MASK_ISA_FMA4_SET \
2362 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2363 | OPTION_MASK_ISA_AVX_SET)
2364 #define OPTION_MASK_ISA_XOP_SET \
2365 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2366 #define OPTION_MASK_ISA_LWP_SET \
2369 /* AES and PCLMUL need SSE2 because they use xmm registers */
2370 #define OPTION_MASK_ISA_AES_SET \
2371 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2372 #define OPTION_MASK_ISA_PCLMUL_SET \
2373 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2375 #define OPTION_MASK_ISA_ABM_SET \
2376 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2378 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2379 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2380 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2381 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2382 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2383 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2384 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2386 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2387 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2388 #define OPTION_MASK_ISA_F16C_SET \
2389 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2391 /* Define a set of ISAs which aren't available when a given ISA is
2392 disabled. MMX and SSE ISAs are handled separately. */
2394 #define OPTION_MASK_ISA_MMX_UNSET \
2395 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2396 #define OPTION_MASK_ISA_3DNOW_UNSET \
2397 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2398 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2400 #define OPTION_MASK_ISA_SSE_UNSET \
2401 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2402 #define OPTION_MASK_ISA_SSE2_UNSET \
2403 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2404 #define OPTION_MASK_ISA_SSE3_UNSET \
2405 (OPTION_MASK_ISA_SSE3 \
2406 | OPTION_MASK_ISA_SSSE3_UNSET \
2407 | OPTION_MASK_ISA_SSE4A_UNSET )
2408 #define OPTION_MASK_ISA_SSSE3_UNSET \
2409 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2410 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2411 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2412 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2413 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2414 #define OPTION_MASK_ISA_AVX_UNSET \
2415 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2416 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2417 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2419 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2421 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2423 #define OPTION_MASK_ISA_SSE4A_UNSET \
2424 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2426 #define OPTION_MASK_ISA_FMA4_UNSET \
2427 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2428 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2429 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2431 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2432 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2433 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2434 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2435 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2436 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2437 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2438 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2439 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2440 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2442 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2443 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2444 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2446 /* Vectorization library interface and handlers. */
2447 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2449 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2450 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2452 /* Processor target table, indexed by processor number */
2455 const struct processor_costs *cost; /* Processor costs */
2456 const int align_loop; /* Default alignments. */
2457 const int align_loop_max_skip;
2458 const int align_jump;
2459 const int align_jump_max_skip;
2460 const int align_func;
2463 static const struct ptt processor_target_table[PROCESSOR_max] =
2465 {&i386_cost, 4, 3, 4, 3, 4},
2466 {&i486_cost, 16, 15, 16, 15, 16},
2467 {&pentium_cost, 16, 7, 16, 7, 16},
2468 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2469 {&geode_cost, 0, 0, 0, 0, 0},
2470 {&k6_cost, 32, 7, 32, 7, 32},
2471 {&athlon_cost, 16, 7, 16, 7, 16},
2472 {&pentium4_cost, 0, 0, 0, 0, 0},
2473 {&k8_cost, 16, 7, 16, 7, 16},
2474 {&nocona_cost, 0, 0, 0, 0, 0},
2475 /* Core 2 32-bit. */
2476 {&generic32_cost, 16, 10, 16, 10, 16},
2477 /* Core 2 64-bit. */
2478 {&generic64_cost, 16, 10, 16, 10, 16},
2479 /* Core i7 32-bit. */
2480 {&generic32_cost, 16, 10, 16, 10, 16},
2481 /* Core i7 64-bit. */
2482 {&generic64_cost, 16, 10, 16, 10, 16},
2483 {&generic32_cost, 16, 7, 16, 7, 16},
2484 {&generic64_cost, 16, 10, 16, 10, 16},
2485 {&amdfam10_cost, 32, 24, 32, 7, 32},
2486 {&bdver1_cost, 32, 24, 32, 7, 32},
2487 {&atom_cost, 16, 7, 16, 7, 16}
2490 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2518 /* Return true if a red-zone is in use. */
2521 ix86_using_red_zone (void)
2523 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2526 /* Implement TARGET_HANDLE_OPTION. */
2529 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2536 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2537 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2541 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2542 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2549 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2550 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2554 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2555 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2565 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2566 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2570 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2571 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2578 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2579 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2583 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2584 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2591 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2592 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2596 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2604 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2605 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2609 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2617 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2622 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2630 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2635 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2643 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2644 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2648 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2656 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2657 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2661 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2662 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2667 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2668 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2672 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2679 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2680 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2684 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2685 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2692 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2693 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2697 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2698 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2705 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2706 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2710 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2711 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2718 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2719 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2723 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2724 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2731 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2732 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2736 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2737 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2744 ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2745 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2749 ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2750 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2757 ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2758 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2762 ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2763 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2770 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2771 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2775 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2776 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2783 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2784 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2788 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2789 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2796 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2797 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2801 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2802 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2809 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2810 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2814 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2815 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2822 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2823 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2827 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2828 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2835 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2836 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2840 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2841 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2848 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2853 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2861 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2866 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2874 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2879 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2887 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2892 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2902 /* Return a string that documents the current -m options. The caller is
2903 responsible for freeing the string. */
2906 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2907 const char *fpmath, bool add_nl_p)
2909 struct ix86_target_opts
2911 const char *option; /* option string */
2912 int mask; /* isa mask options */
2915 /* This table is ordered so that options like -msse4.2 that imply
2916 preceding options while match those first. */
2917 static struct ix86_target_opts isa_opts[] =
2919 { "-m64", OPTION_MASK_ISA_64BIT },
2920 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2921 { "-mfma", OPTION_MASK_ISA_FMA },
2922 { "-mxop", OPTION_MASK_ISA_XOP },
2923 { "-mlwp", OPTION_MASK_ISA_LWP },
2924 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2925 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2926 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2927 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2928 { "-msse3", OPTION_MASK_ISA_SSE3 },
2929 { "-msse2", OPTION_MASK_ISA_SSE2 },
2930 { "-msse", OPTION_MASK_ISA_SSE },
2931 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2932 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2933 { "-mmmx", OPTION_MASK_ISA_MMX },
2934 { "-mabm", OPTION_MASK_ISA_ABM },
2935 { "-mbmi", OPTION_MASK_ISA_BMI },
2936 { "-mtbm", OPTION_MASK_ISA_TBM },
2937 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2938 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2939 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2940 { "-maes", OPTION_MASK_ISA_AES },
2941 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2942 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2943 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2944 { "-mf16c", OPTION_MASK_ISA_F16C },
2948 static struct ix86_target_opts flag_opts[] =
2950 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2951 { "-m80387", MASK_80387 },
2952 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2953 { "-malign-double", MASK_ALIGN_DOUBLE },
2954 { "-mcld", MASK_CLD },
2955 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2956 { "-mieee-fp", MASK_IEEE_FP },
2957 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2958 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2959 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2960 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2961 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2962 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2963 { "-mno-red-zone", MASK_NO_RED_ZONE },
2964 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2965 { "-mrecip", MASK_RECIP },
2966 { "-mrtd", MASK_RTD },
2967 { "-msseregparm", MASK_SSEREGPARM },
2968 { "-mstack-arg-probe", MASK_STACK_PROBE },
2969 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2970 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2971 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2972 { "-mvzeroupper", MASK_VZEROUPPER },
2975 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2978 char target_other[40];
2987 memset (opts, '\0', sizeof (opts));
2989 /* Add -march= option. */
2992 opts[num][0] = "-march=";
2993 opts[num++][1] = arch;
2996 /* Add -mtune= option. */
2999 opts[num][0] = "-mtune=";
3000 opts[num++][1] = tune;
3003 /* Pick out the options in isa options. */
3004 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3006 if ((isa & isa_opts[i].mask) != 0)
3008 opts[num++][0] = isa_opts[i].option;
3009 isa &= ~ isa_opts[i].mask;
3013 if (isa && add_nl_p)
3015 opts[num++][0] = isa_other;
3016 sprintf (isa_other, "(other isa: %#x)", isa);
3019 /* Add flag options. */
3020 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3022 if ((flags & flag_opts[i].mask) != 0)
3024 opts[num++][0] = flag_opts[i].option;
3025 flags &= ~ flag_opts[i].mask;
3029 if (flags && add_nl_p)
3031 opts[num++][0] = target_other;
3032 sprintf (target_other, "(other flags: %#x)", flags);
3035 /* Add -fpmath= option. */
3038 opts[num][0] = "-mfpmath=";
3039 opts[num++][1] = fpmath;
3046 gcc_assert (num < ARRAY_SIZE (opts));
3048 /* Size the string. */
3050 sep_len = (add_nl_p) ? 3 : 1;
3051 for (i = 0; i < num; i++)
3054 for (j = 0; j < 2; j++)
3056 len += strlen (opts[i][j]);
3059 /* Build the string. */
3060 ret = ptr = (char *) xmalloc (len);
3063 for (i = 0; i < num; i++)
3067 for (j = 0; j < 2; j++)
3068 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3075 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3083 for (j = 0; j < 2; j++)
3086 memcpy (ptr, opts[i][j], len2[j]);
3088 line_len += len2[j];
3093 gcc_assert (ret + len >= ptr);
3098 /* Return TRUE if software prefetching is beneficial for the
3102 software_prefetching_beneficial_p (void)
3106 case PROCESSOR_GEODE:
3108 case PROCESSOR_ATHLON:
3110 case PROCESSOR_AMDFAM10:
3118 /* Return true, if profiling code should be emitted before
3119 prologue. Otherwise it returns false.
3120 Note: For x86 with "hotfix" it is sorried. */
3122 ix86_profile_before_prologue (void)
3124 return flag_fentry != 0;
3127 /* Function that is callable from the debugger to print the current
3130 ix86_debug_options (void)
3132 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3133 ix86_arch_string, ix86_tune_string,
3134 ix86_fpmath_string, true);
3138 fprintf (stderr, "%s\n\n", opts);
3142 fputs ("<no options>\n\n", stderr);
3147 /* Override various settings based on options. If MAIN_ARGS_P, the
3148 options are from the command line, otherwise they are from
3152 ix86_option_override_internal (bool main_args_p)
3155 unsigned int ix86_arch_mask, ix86_tune_mask;
3156 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3161 /* Comes from final.c -- no real reason to change it. */
3162 #define MAX_CODE_ALIGN 16
3170 PTA_PREFETCH_SSE = 1 << 4,
3172 PTA_3DNOW_A = 1 << 6,
3176 PTA_POPCNT = 1 << 10,
3178 PTA_SSE4A = 1 << 12,
3179 PTA_NO_SAHF = 1 << 13,
3180 PTA_SSE4_1 = 1 << 14,
3181 PTA_SSE4_2 = 1 << 15,
3183 PTA_PCLMUL = 1 << 17,
3186 PTA_MOVBE = 1 << 20,
3190 PTA_FSGSBASE = 1 << 24,
3191 PTA_RDRND = 1 << 25,
3195 /* if this reaches 32, need to widen struct pta flags below */
3200 const char *const name; /* processor name or nickname. */
3201 const enum processor_type processor;
3202 const enum attr_cpu schedule;
3203 const unsigned /*enum pta_flags*/ flags;
3205 const processor_alias_table[] =
3207 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3208 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3209 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3210 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3211 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3212 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3213 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3214 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3215 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3216 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3217 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3218 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3219 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3221 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3223 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3224 PTA_MMX | PTA_SSE | PTA_SSE2},
3225 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3226 PTA_MMX |PTA_SSE | PTA_SSE2},
3227 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3228 PTA_MMX | PTA_SSE | PTA_SSE2},
3229 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3230 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3231 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3232 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3233 | PTA_CX16 | PTA_NO_SAHF},
3234 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3235 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3236 | PTA_SSSE3 | PTA_CX16},
3237 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3238 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3239 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3240 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3241 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3242 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3243 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3244 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3245 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3246 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3247 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3248 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3249 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3250 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3252 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3253 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3254 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3255 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3256 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3257 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3258 {"x86-64", PROCESSOR_K8, CPU_K8,
3259 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3260 {"k8", PROCESSOR_K8, CPU_K8,
3261 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3262 | PTA_SSE2 | PTA_NO_SAHF},
3263 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3264 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3265 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3266 {"opteron", PROCESSOR_K8, CPU_K8,
3267 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3268 | PTA_SSE2 | PTA_NO_SAHF},
3269 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3270 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3271 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3272 {"athlon64", PROCESSOR_K8, CPU_K8,
3273 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3274 | PTA_SSE2 | PTA_NO_SAHF},
3275 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3276 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3277 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3278 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3279 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3280 | PTA_SSE2 | PTA_NO_SAHF},
3281 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3282 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3283 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3284 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3285 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3286 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3287 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3288 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3289 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
3290 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
3291 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
3292 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3293 0 /* flags are only used for -march switch. */ },
3294 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3295 PTA_64BIT /* flags are only used for -march switch. */ },
3298 int const pta_size = ARRAY_SIZE (processor_alias_table);
3300 /* Set up prefix/suffix so the error messages refer to either the command
3301 line argument, or the attribute(target). */
3310 prefix = "option(\"";
3315 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3316 SUBTARGET_OVERRIDE_OPTIONS;
3319 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3320 SUBSUBTARGET_OVERRIDE_OPTIONS;
3323 /* -fPIC is the default for x86_64. */
3324 if (TARGET_MACHO && TARGET_64BIT)
3327 /* Need to check -mtune=generic first. */
3328 if (ix86_tune_string)
3330 if (!strcmp (ix86_tune_string, "generic")
3331 || !strcmp (ix86_tune_string, "i686")
3332 /* As special support for cross compilers we read -mtune=native
3333 as -mtune=generic. With native compilers we won't see the
3334 -mtune=native, as it was changed by the driver. */
3335 || !strcmp (ix86_tune_string, "native"))
3338 ix86_tune_string = "generic64";
3340 ix86_tune_string = "generic32";
3342 /* If this call is for setting the option attribute, allow the
3343 generic32/generic64 that was previously set. */
3344 else if (!main_args_p
3345 && (!strcmp (ix86_tune_string, "generic32")
3346 || !strcmp (ix86_tune_string, "generic64")))
3348 else if (!strncmp (ix86_tune_string, "generic", 7))
3349 error ("bad value (%s) for %stune=%s %s",
3350 ix86_tune_string, prefix, suffix, sw);
3351 else if (!strcmp (ix86_tune_string, "x86-64"))
3352 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3353 "%stune=k8%s or %stune=generic%s instead as appropriate",
3354 prefix, suffix, prefix, suffix, prefix, suffix);
3358 if (ix86_arch_string)
3359 ix86_tune_string = ix86_arch_string;
3360 if (!ix86_tune_string)
3362 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3363 ix86_tune_defaulted = 1;
3366 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3367 need to use a sensible tune option. */
3368 if (!strcmp (ix86_tune_string, "generic")
3369 || !strcmp (ix86_tune_string, "x86-64")
3370 || !strcmp (ix86_tune_string, "i686"))
3373 ix86_tune_string = "generic64";
3375 ix86_tune_string = "generic32";
3379 if (ix86_stringop_string)
3381 if (!strcmp (ix86_stringop_string, "rep_byte"))
3382 stringop_alg = rep_prefix_1_byte;
3383 else if (!strcmp (ix86_stringop_string, "libcall"))
3384 stringop_alg = libcall;
3385 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3386 stringop_alg = rep_prefix_4_byte;
3387 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3389 /* rep; movq isn't available in 32-bit code. */
3390 stringop_alg = rep_prefix_8_byte;
3391 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3392 stringop_alg = loop_1_byte;
3393 else if (!strcmp (ix86_stringop_string, "loop"))
3394 stringop_alg = loop;
3395 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3396 stringop_alg = unrolled_loop;
3398 error ("bad value (%s) for %sstringop-strategy=%s %s",
3399 ix86_stringop_string, prefix, suffix, sw);
3402 if (!ix86_arch_string)
3403 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3405 ix86_arch_specified = 1;
3407 /* Validate -mabi= value. */
3408 if (ix86_abi_string)
3410 if (strcmp (ix86_abi_string, "sysv") == 0)
3411 ix86_abi = SYSV_ABI;
3412 else if (strcmp (ix86_abi_string, "ms") == 0)
3415 error ("unknown ABI (%s) for %sabi=%s %s",
3416 ix86_abi_string, prefix, suffix, sw);
3419 ix86_abi = DEFAULT_ABI;
3421 if (ix86_cmodel_string != 0)
3423 if (!strcmp (ix86_cmodel_string, "small"))
3424 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3425 else if (!strcmp (ix86_cmodel_string, "medium"))
3426 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3427 else if (!strcmp (ix86_cmodel_string, "large"))
3428 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3430 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3431 else if (!strcmp (ix86_cmodel_string, "32"))
3432 ix86_cmodel = CM_32;
3433 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3434 ix86_cmodel = CM_KERNEL;
3436 error ("bad value (%s) for %scmodel=%s %s",
3437 ix86_cmodel_string, prefix, suffix, sw);
3441 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3442 use of rip-relative addressing. This eliminates fixups that
3443 would otherwise be needed if this object is to be placed in a
3444 DLL, and is essentially just as efficient as direct addressing. */
3445 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3446 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3447 else if (TARGET_64BIT)
3448 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3450 ix86_cmodel = CM_32;
3452 if (ix86_asm_string != 0)
3455 && !strcmp (ix86_asm_string, "intel"))
3456 ix86_asm_dialect = ASM_INTEL;
3457 else if (!strcmp (ix86_asm_string, "att"))
3458 ix86_asm_dialect = ASM_ATT;
3460 error ("bad value (%s) for %sasm=%s %s",
3461 ix86_asm_string, prefix, suffix, sw);
3463 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3464 error ("code model %qs not supported in the %s bit mode",
3465 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3466 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3467 sorry ("%i-bit mode not compiled in",
3468 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3470 for (i = 0; i < pta_size; i++)
3471 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3473 ix86_schedule = processor_alias_table[i].schedule;
3474 ix86_arch = processor_alias_table[i].processor;
3475 /* Default cpu tuning to the architecture. */
3476 ix86_tune = ix86_arch;
3478 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3479 error ("CPU you selected does not support x86-64 "
3482 if (processor_alias_table[i].flags & PTA_MMX
3483 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3484 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3485 if (processor_alias_table[i].flags & PTA_3DNOW
3486 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3487 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3488 if (processor_alias_table[i].flags & PTA_3DNOW_A
3489 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3490 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3491 if (processor_alias_table[i].flags & PTA_SSE
3492 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3493 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3494 if (processor_alias_table[i].flags & PTA_SSE2
3495 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3496 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3497 if (processor_alias_table[i].flags & PTA_SSE3
3498 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3499 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3500 if (processor_alias_table[i].flags & PTA_SSSE3
3501 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3502 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3503 if (processor_alias_table[i].flags & PTA_SSE4_1
3504 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3505 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3506 if (processor_alias_table[i].flags & PTA_SSE4_2
3507 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3508 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3509 if (processor_alias_table[i].flags & PTA_AVX
3510 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3511 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3512 if (processor_alias_table[i].flags & PTA_FMA
3513 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3514 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3515 if (processor_alias_table[i].flags & PTA_SSE4A
3516 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3517 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3518 if (processor_alias_table[i].flags & PTA_FMA4
3519 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3520 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3521 if (processor_alias_table[i].flags & PTA_XOP
3522 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3523 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3524 if (processor_alias_table[i].flags & PTA_LWP
3525 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3526 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3527 if (processor_alias_table[i].flags & PTA_ABM
3528 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3529 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3530 if (processor_alias_table[i].flags & PTA_BMI
3531 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3532 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3533 if (processor_alias_table[i].flags & PTA_TBM
3534 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3535 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3536 if (processor_alias_table[i].flags & PTA_CX16
3537 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3538 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3539 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3540 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3541 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3542 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3543 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3544 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3545 if (processor_alias_table[i].flags & PTA_MOVBE
3546 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3547 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3548 if (processor_alias_table[i].flags & PTA_AES
3549 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3550 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3551 if (processor_alias_table[i].flags & PTA_PCLMUL
3552 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3553 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3554 if (processor_alias_table[i].flags & PTA_FSGSBASE
3555 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3556 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3557 if (processor_alias_table[i].flags & PTA_RDRND
3558 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3559 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3560 if (processor_alias_table[i].flags & PTA_F16C
3561 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3562 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3563 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3564 x86_prefetch_sse = true;
3569 if (!strcmp (ix86_arch_string, "generic"))
3570 error ("generic CPU can be used only for %stune=%s %s",
3571 prefix, suffix, sw);
3572 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3573 error ("bad value (%s) for %sarch=%s %s",
3574 ix86_arch_string, prefix, suffix, sw);
3576 ix86_arch_mask = 1u << ix86_arch;
3577 for (i = 0; i < X86_ARCH_LAST; ++i)
3578 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3580 for (i = 0; i < pta_size; i++)
3581 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3583 ix86_schedule = processor_alias_table[i].schedule;
3584 ix86_tune = processor_alias_table[i].processor;
3587 if (!(processor_alias_table[i].flags & PTA_64BIT))
3589 if (ix86_tune_defaulted)
3591 ix86_tune_string = "x86-64";
3592 for (i = 0; i < pta_size; i++)
3593 if (! strcmp (ix86_tune_string,
3594 processor_alias_table[i].name))
3596 ix86_schedule = processor_alias_table[i].schedule;
3597 ix86_tune = processor_alias_table[i].processor;
3600 error ("CPU you selected does not support x86-64 "
3606 /* Adjust tuning when compiling for 32-bit ABI. */
3609 case PROCESSOR_GENERIC64:
3610 ix86_tune = PROCESSOR_GENERIC32;
3611 ix86_schedule = CPU_PENTIUMPRO;
3614 case PROCESSOR_CORE2_64:
3615 ix86_tune = PROCESSOR_CORE2_32;
3618 case PROCESSOR_COREI7_64:
3619 ix86_tune = PROCESSOR_COREI7_32;
3626 /* Intel CPUs have always interpreted SSE prefetch instructions as
3627 NOPs; so, we can enable SSE prefetch instructions even when
3628 -mtune (rather than -march) points us to a processor that has them.
3629 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3630 higher processors. */
3632 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3633 x86_prefetch_sse = true;
3637 if (ix86_tune_specified && i == pta_size)
3638 error ("bad value (%s) for %stune=%s %s",
3639 ix86_tune_string, prefix, suffix, sw);
3641 ix86_tune_mask = 1u << ix86_tune;
3642 for (i = 0; i < X86_TUNE_LAST; ++i)
3643 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3645 #ifndef USE_IX86_FRAME_POINTER
3646 #define USE_IX86_FRAME_POINTER 0
3649 #ifndef USE_X86_64_FRAME_POINTER
3650 #define USE_X86_64_FRAME_POINTER 0
3653 /* Set the default values for switches whose default depends on TARGET_64BIT
3654 in case they weren't overwritten by command line options. */
3657 if (optimize > 1 && !global_options_set.x_flag_zee)
3659 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3660 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3661 if (flag_asynchronous_unwind_tables == 2)
3662 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3663 if (flag_pcc_struct_return == 2)
3664 flag_pcc_struct_return = 0;
3668 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3669 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3670 if (flag_asynchronous_unwind_tables == 2)
3671 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3672 if (flag_pcc_struct_return == 2)
3673 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3677 ix86_cost = &ix86_size_cost;
3679 ix86_cost = processor_target_table[ix86_tune].cost;
3681 /* Arrange to set up i386_stack_locals for all functions. */
3682 init_machine_status = ix86_init_machine_status;
3684 /* Validate -mregparm= value. */
3685 if (ix86_regparm_string)
3688 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3689 i = atoi (ix86_regparm_string);
3690 if (i < 0 || i > REGPARM_MAX)
3691 error ("%sregparm=%d%s is not between 0 and %d",
3692 prefix, i, suffix, REGPARM_MAX);
3697 ix86_regparm = REGPARM_MAX;
3699 /* If the user has provided any of the -malign-* options,
3700 warn and use that value only if -falign-* is not set.
3701 Remove this code in GCC 3.2 or later. */
3702 if (ix86_align_loops_string)
3704 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3705 prefix, suffix, suffix);
3706 if (align_loops == 0)
3708 i = atoi (ix86_align_loops_string);
3709 if (i < 0 || i > MAX_CODE_ALIGN)
3710 error ("%salign-loops=%d%s is not between 0 and %d",
3711 prefix, i, suffix, MAX_CODE_ALIGN);
3713 align_loops = 1 << i;
3717 if (ix86_align_jumps_string)
3719 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3720 prefix, suffix, suffix);
3721 if (align_jumps == 0)
3723 i = atoi (ix86_align_jumps_string);
3724 if (i < 0 || i > MAX_CODE_ALIGN)
3725 error ("%salign-loops=%d%s is not between 0 and %d",
3726 prefix, i, suffix, MAX_CODE_ALIGN);
3728 align_jumps = 1 << i;
3732 if (ix86_align_funcs_string)
3734 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3735 prefix, suffix, suffix);
3736 if (align_functions == 0)
3738 i = atoi (ix86_align_funcs_string);
3739 if (i < 0 || i > MAX_CODE_ALIGN)
3740 error ("%salign-loops=%d%s is not between 0 and %d",
3741 prefix, i, suffix, MAX_CODE_ALIGN);
3743 align_functions = 1 << i;
3747 /* Default align_* from the processor table. */
3748 if (align_loops == 0)
3750 align_loops = processor_target_table[ix86_tune].align_loop;
3751 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3753 if (align_jumps == 0)
3755 align_jumps = processor_target_table[ix86_tune].align_jump;
3756 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3758 if (align_functions == 0)
3760 align_functions = processor_target_table[ix86_tune].align_func;
3763 /* Validate -mbranch-cost= value, or provide default. */
3764 ix86_branch_cost = ix86_cost->branch_cost;
3765 if (ix86_branch_cost_string)
3767 i = atoi (ix86_branch_cost_string);
3769 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3771 ix86_branch_cost = i;
3773 if (ix86_section_threshold_string)
3775 i = atoi (ix86_section_threshold_string);
3777 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3779 ix86_section_threshold = i;
3782 if (ix86_tls_dialect_string)
3784 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3785 ix86_tls_dialect = TLS_DIALECT_GNU;
3786 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3787 ix86_tls_dialect = TLS_DIALECT_GNU2;
3789 error ("bad value (%s) for %stls-dialect=%s %s",
3790 ix86_tls_dialect_string, prefix, suffix, sw);
3793 if (ix87_precision_string)
3795 i = atoi (ix87_precision_string);
3796 if (i != 32 && i != 64 && i != 80)
3797 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3802 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3804 /* Enable by default the SSE and MMX builtins. Do allow the user to
3805 explicitly disable any of these. In particular, disabling SSE and
3806 MMX for kernel code is extremely useful. */
3807 if (!ix86_arch_specified)
3809 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3810 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3813 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3817 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3819 if (!ix86_arch_specified)
3821 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3823 /* i386 ABI does not specify red zone. It still makes sense to use it
3824 when programmer takes care to stack from being destroyed. */
3825 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3826 target_flags |= MASK_NO_RED_ZONE;
3829 /* Keep nonleaf frame pointers. */
3830 if (flag_omit_frame_pointer)
3831 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3832 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3833 flag_omit_frame_pointer = 1;
3835 /* If we're doing fast math, we don't care about comparison order
3836 wrt NaNs. This lets us use a shorter comparison sequence. */
3837 if (flag_finite_math_only)
3838 target_flags &= ~MASK_IEEE_FP;
3840 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3841 since the insns won't need emulation. */
3842 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3843 target_flags &= ~MASK_NO_FANCY_MATH_387;
3845 /* Likewise, if the target doesn't have a 387, or we've specified
3846 software floating point, don't use 387 inline intrinsics. */
3848 target_flags |= MASK_NO_FANCY_MATH_387;
3850 /* Turn on MMX builtins for -msse. */
3853 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3854 x86_prefetch_sse = true;
3857 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3858 if (TARGET_SSE4_2 || TARGET_ABM)
3859 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3861 /* Validate -mpreferred-stack-boundary= value or default it to
3862 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3863 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3864 if (ix86_preferred_stack_boundary_string)
3866 int min = (TARGET_64BIT ? 4 : 2);
3867 int max = (TARGET_SEH ? 4 : 12);
3869 i = atoi (ix86_preferred_stack_boundary_string);
3870 if (i < min || i > max)
3873 error ("%spreferred-stack-boundary%s is not supported "
3874 "for this target", prefix, suffix);
3876 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
3877 prefix, i, suffix, min, max);
3880 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3883 /* Set the default value for -mstackrealign. */
3884 if (ix86_force_align_arg_pointer == -1)
3885 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3887 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3889 /* Validate -mincoming-stack-boundary= value or default it to
3890 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3891 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3892 if (ix86_incoming_stack_boundary_string)
3894 i = atoi (ix86_incoming_stack_boundary_string);
3895 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3896 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3897 i, TARGET_64BIT ? 4 : 2);
3900 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3901 ix86_incoming_stack_boundary
3902 = ix86_user_incoming_stack_boundary;
3906 /* Accept -msseregparm only if at least SSE support is enabled. */
3907 if (TARGET_SSEREGPARM
3909 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3911 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3912 if (ix86_fpmath_string != 0)
3914 if (! strcmp (ix86_fpmath_string, "387"))
3915 ix86_fpmath = FPMATH_387;
3916 else if (! strcmp (ix86_fpmath_string, "sse"))
3920 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3921 ix86_fpmath = FPMATH_387;
3924 ix86_fpmath = FPMATH_SSE;
3926 else if (! strcmp (ix86_fpmath_string, "387,sse")
3927 || ! strcmp (ix86_fpmath_string, "387+sse")
3928 || ! strcmp (ix86_fpmath_string, "sse,387")
3929 || ! strcmp (ix86_fpmath_string, "sse+387")
3930 || ! strcmp (ix86_fpmath_string, "both"))
3934 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3935 ix86_fpmath = FPMATH_387;
3937 else if (!TARGET_80387)
3939 warning (0, "387 instruction set disabled, using SSE arithmetics");
3940 ix86_fpmath = FPMATH_SSE;
3943 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3946 error ("bad value (%s) for %sfpmath=%s %s",
3947 ix86_fpmath_string, prefix, suffix, sw);
3950 /* If the i387 is disabled, then do not return values in it. */
3952 target_flags &= ~MASK_FLOAT_RETURNS;
3954 /* Use external vectorized library in vectorizing intrinsics. */
3955 if (ix86_veclibabi_string)
3957 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3958 ix86_veclib_handler = ix86_veclibabi_svml;
3959 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3960 ix86_veclib_handler = ix86_veclibabi_acml;
3962 error ("unknown vectorization library ABI type (%s) for "
3963 "%sveclibabi=%s %s", ix86_veclibabi_string,
3964 prefix, suffix, sw);
3967 if ((!USE_IX86_FRAME_POINTER
3968 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3969 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3971 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3973 /* ??? Unwind info is not correct around the CFG unless either a frame
3974 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3975 unwind info generation to be aware of the CFG and propagating states
3977 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3978 || flag_exceptions || flag_non_call_exceptions)
3979 && flag_omit_frame_pointer
3980 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3982 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3983 warning (0, "unwind tables currently require either a frame pointer "
3984 "or %saccumulate-outgoing-args%s for correctness",
3986 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3989 /* If stack probes are required, the space used for large function
3990 arguments on the stack must also be probed, so enable
3991 -maccumulate-outgoing-args so this happens in the prologue. */
3992 if (TARGET_STACK_PROBE
3993 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3995 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3996 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3997 "for correctness", prefix, suffix);
3998 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4001 /* For sane SSE instruction set generation we need fcomi instruction.
4002 It is safe to enable all CMOVE instructions. */
4006 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4009 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4010 p = strchr (internal_label_prefix, 'X');
4011 internal_label_prefix_len = p - internal_label_prefix;
4015 /* When scheduling description is not available, disable scheduler pass
4016 so it won't slow down the compilation and make x87 code slower. */
4017 if (!TARGET_SCHEDULE)
4018 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4020 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4021 ix86_cost->simultaneous_prefetches,
4022 global_options.x_param_values,
4023 global_options_set.x_param_values);
4024 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4025 global_options.x_param_values,
4026 global_options_set.x_param_values);
4027 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4028 global_options.x_param_values,
4029 global_options_set.x_param_values);
4030 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4031 global_options.x_param_values,
4032 global_options_set.x_param_values);
4034 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4035 if (flag_prefetch_loop_arrays < 0
4038 && software_prefetching_beneficial_p ())
4039 flag_prefetch_loop_arrays = 1;
4041 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4042 can be optimized to ap = __builtin_next_arg (0). */
4043 if (!TARGET_64BIT && !flag_split_stack)
4044 targetm.expand_builtin_va_start = NULL;
4048 ix86_gen_leave = gen_leave_rex64;
4049 ix86_gen_add3 = gen_adddi3;
4050 ix86_gen_sub3 = gen_subdi3;
4051 ix86_gen_sub3_carry = gen_subdi3_carry;
4052 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4053 ix86_gen_monitor = gen_sse3_monitor64;
4054 ix86_gen_andsp = gen_anddi3;
4055 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4056 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4057 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4061 ix86_gen_leave = gen_leave;
4062 ix86_gen_add3 = gen_addsi3;
4063 ix86_gen_sub3 = gen_subsi3;
4064 ix86_gen_sub3_carry = gen_subsi3_carry;
4065 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4066 ix86_gen_monitor = gen_sse3_monitor;
4067 ix86_gen_andsp = gen_andsi3;
4068 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4069 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4070 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4074 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4076 target_flags |= MASK_CLD & ~target_flags_explicit;
4079 if (!TARGET_64BIT && flag_pic)
4081 if (flag_fentry > 0)
4082 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4086 else if (TARGET_SEH)
4088 if (flag_fentry == 0)
4089 sorry ("-mno-fentry isn%'t compatible with SEH");
4092 else if (flag_fentry < 0)
4094 #if defined(PROFILE_BEFORE_PROLOGUE)
4101 /* Save the initial options in case the user does function specific options */
4103 target_option_default_node = target_option_current_node
4104 = build_target_option_node ();
4108 /* When not optimize for size, enable vzeroupper optimization for
4109 TARGET_AVX with -fexpensive-optimizations. */
4111 && flag_expensive_optimizations
4112 && !(target_flags_explicit & MASK_VZEROUPPER))
4113 target_flags |= MASK_VZEROUPPER;
4117 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4118 target_flags &= ~MASK_VZEROUPPER;
4122 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4125 function_pass_avx256_p (const_rtx val)
4130 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4133 if (GET_CODE (val) == PARALLEL)
4138 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4140 r = XVECEXP (val, 0, i);
4141 if (GET_CODE (r) == EXPR_LIST
4143 && REG_P (XEXP (r, 0))
4144 && (GET_MODE (XEXP (r, 0)) == OImode
4145 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4153 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4156 ix86_option_override (void)
4158 ix86_option_override_internal (true);
4161 /* Update register usage after having seen the compiler flags. */
4164 ix86_conditional_register_usage (void)
4169 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4171 if (fixed_regs[i] > 1)
4172 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4173 if (call_used_regs[i] > 1)
4174 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4177 /* The PIC register, if it exists, is fixed. */
4178 j = PIC_OFFSET_TABLE_REGNUM;
4179 if (j != INVALID_REGNUM)
4180 fixed_regs[j] = call_used_regs[j] = 1;
4182 /* The MS_ABI changes the set of call-used registers. */
4183 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4185 call_used_regs[SI_REG] = 0;
4186 call_used_regs[DI_REG] = 0;
4187 call_used_regs[XMM6_REG] = 0;
4188 call_used_regs[XMM7_REG] = 0;
4189 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4190 call_used_regs[i] = 0;
4193 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4194 other call-clobbered regs for 64-bit. */
4197 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4199 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4200 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4201 && call_used_regs[i])
4202 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4205 /* If MMX is disabled, squash the registers. */
4207 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4208 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4209 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4211 /* If SSE is disabled, squash the registers. */
4213 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4214 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4215 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4217 /* If the FPU is disabled, squash the registers. */
4218 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4219 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4220 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4221 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4223 /* If 32-bit, squash the 64-bit registers. */
4226 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4228 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4234 /* Save the current options */
4237 ix86_function_specific_save (struct cl_target_option *ptr)
4239 ptr->arch = ix86_arch;
4240 ptr->schedule = ix86_schedule;
4241 ptr->tune = ix86_tune;
4242 ptr->fpmath = ix86_fpmath;
4243 ptr->branch_cost = ix86_branch_cost;
4244 ptr->tune_defaulted = ix86_tune_defaulted;
4245 ptr->arch_specified = ix86_arch_specified;
4246 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4247 ptr->ix86_target_flags_explicit = target_flags_explicit;
4249 /* The fields are char but the variables are not; make sure the
4250 values fit in the fields. */
4251 gcc_assert (ptr->arch == ix86_arch);
4252 gcc_assert (ptr->schedule == ix86_schedule);
4253 gcc_assert (ptr->tune == ix86_tune);
4254 gcc_assert (ptr->fpmath == ix86_fpmath);
4255 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4258 /* Restore the current options */
4261 ix86_function_specific_restore (struct cl_target_option *ptr)
4263 enum processor_type old_tune = ix86_tune;
4264 enum processor_type old_arch = ix86_arch;
4265 unsigned int ix86_arch_mask, ix86_tune_mask;
4268 ix86_arch = (enum processor_type) ptr->arch;
4269 ix86_schedule = (enum attr_cpu) ptr->schedule;
4270 ix86_tune = (enum processor_type) ptr->tune;
4271 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4272 ix86_branch_cost = ptr->branch_cost;
4273 ix86_tune_defaulted = ptr->tune_defaulted;
4274 ix86_arch_specified = ptr->arch_specified;
4275 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4276 target_flags_explicit = ptr->ix86_target_flags_explicit;
4278 /* Recreate the arch feature tests if the arch changed */
4279 if (old_arch != ix86_arch)
4281 ix86_arch_mask = 1u << ix86_arch;
4282 for (i = 0; i < X86_ARCH_LAST; ++i)
4283 ix86_arch_features[i]
4284 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4287 /* Recreate the tune optimization tests */
4288 if (old_tune != ix86_tune)
4290 ix86_tune_mask = 1u << ix86_tune;
4291 for (i = 0; i < X86_TUNE_LAST; ++i)
4292 ix86_tune_features[i]
4293 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4297 /* Print the current options */
4300 ix86_function_specific_print (FILE *file, int indent,
4301 struct cl_target_option *ptr)
4304 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4305 NULL, NULL, NULL, false);
4307 fprintf (file, "%*sarch = %d (%s)\n",
4310 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4311 ? cpu_names[ptr->arch]
4314 fprintf (file, "%*stune = %d (%s)\n",
4317 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4318 ? cpu_names[ptr->tune]
4321 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4322 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4323 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4324 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4328 fprintf (file, "%*s%s\n", indent, "", target_string);
4329 free (target_string);
4334 /* Inner function to process the attribute((target(...))), take an argument and
4335 set the current options from the argument. If we have a list, recursively go
4339 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4344 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4345 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4346 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4347 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4362 enum ix86_opt_type type;
4367 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4368 IX86_ATTR_ISA ("abm", OPT_mabm),
4369 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4370 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4371 IX86_ATTR_ISA ("aes", OPT_maes),
4372 IX86_ATTR_ISA ("avx", OPT_mavx),
4373 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4374 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4375 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4376 IX86_ATTR_ISA ("sse", OPT_msse),
4377 IX86_ATTR_ISA ("sse2", OPT_msse2),
4378 IX86_ATTR_ISA ("sse3", OPT_msse3),
4379 IX86_ATTR_ISA ("sse4", OPT_msse4),
4380 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4381 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4382 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4383 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4384 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4385 IX86_ATTR_ISA ("xop", OPT_mxop),
4386 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4387 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4388 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4389 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4391 /* string options */
4392 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4393 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4394 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4397 IX86_ATTR_YES ("cld",
4401 IX86_ATTR_NO ("fancy-math-387",
4402 OPT_mfancy_math_387,
4403 MASK_NO_FANCY_MATH_387),
4405 IX86_ATTR_YES ("ieee-fp",
4409 IX86_ATTR_YES ("inline-all-stringops",
4410 OPT_minline_all_stringops,
4411 MASK_INLINE_ALL_STRINGOPS),
4413 IX86_ATTR_YES ("inline-stringops-dynamically",
4414 OPT_minline_stringops_dynamically,
4415 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4417 IX86_ATTR_NO ("align-stringops",
4418 OPT_mno_align_stringops,
4419 MASK_NO_ALIGN_STRINGOPS),
4421 IX86_ATTR_YES ("recip",
4427 /* If this is a list, recurse to get the options. */
4428 if (TREE_CODE (args) == TREE_LIST)
4432 for (; args; args = TREE_CHAIN (args))
4433 if (TREE_VALUE (args)
4434 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4440 else if (TREE_CODE (args) != STRING_CST)
4443 /* Handle multiple arguments separated by commas. */
4444 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4446 while (next_optstr && *next_optstr != '\0')
4448 char *p = next_optstr;
4450 char *comma = strchr (next_optstr, ',');
4451 const char *opt_string;
4452 size_t len, opt_len;
4457 enum ix86_opt_type type = ix86_opt_unknown;
4463 len = comma - next_optstr;
4464 next_optstr = comma + 1;
4472 /* Recognize no-xxx. */
4473 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4482 /* Find the option. */
4485 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4487 type = attrs[i].type;
4488 opt_len = attrs[i].len;
4489 if (ch == attrs[i].string[0]
4490 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4491 && memcmp (p, attrs[i].string, opt_len) == 0)
4494 mask = attrs[i].mask;
4495 opt_string = attrs[i].string;
4500 /* Process the option. */
4503 error ("attribute(target(\"%s\")) is unknown", orig_p);
4507 else if (type == ix86_opt_isa)
4508 ix86_handle_option (opt, p, opt_set_p);
4510 else if (type == ix86_opt_yes || type == ix86_opt_no)
4512 if (type == ix86_opt_no)
4513 opt_set_p = !opt_set_p;
4516 target_flags |= mask;
4518 target_flags &= ~mask;
4521 else if (type == ix86_opt_str)
4525 error ("option(\"%s\") was already specified", opt_string);
4529 p_strings[opt] = xstrdup (p + opt_len);
4539 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4542 ix86_valid_target_attribute_tree (tree args)
4544 const char *orig_arch_string = ix86_arch_string;
4545 const char *orig_tune_string = ix86_tune_string;
4546 const char *orig_fpmath_string = ix86_fpmath_string;
4547 int orig_tune_defaulted = ix86_tune_defaulted;
4548 int orig_arch_specified = ix86_arch_specified;
4549 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4552 struct cl_target_option *def
4553 = TREE_TARGET_OPTION (target_option_default_node);
4555 /* Process each of the options on the chain. */
4556 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4559 /* If the changed options are different from the default, rerun
4560 ix86_option_override_internal, and then save the options away.
4561 The string options are are attribute options, and will be undone
4562 when we copy the save structure. */
4563 if (ix86_isa_flags != def->x_ix86_isa_flags
4564 || target_flags != def->x_target_flags
4565 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4566 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4567 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4569 /* If we are using the default tune= or arch=, undo the string assigned,
4570 and use the default. */
4571 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4572 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4573 else if (!orig_arch_specified)
4574 ix86_arch_string = NULL;
4576 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4577 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4578 else if (orig_tune_defaulted)
4579 ix86_tune_string = NULL;
4581 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4582 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4583 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4584 else if (!TARGET_64BIT && TARGET_SSE)
4585 ix86_fpmath_string = "sse,387";
4587 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4588 ix86_option_override_internal (false);
4590 /* Add any builtin functions with the new isa if any. */
4591 ix86_add_new_builtins (ix86_isa_flags);
4593 /* Save the current options unless we are validating options for
4595 t = build_target_option_node ();
4597 ix86_arch_string = orig_arch_string;
4598 ix86_tune_string = orig_tune_string;
4599 ix86_fpmath_string = orig_fpmath_string;
4601 /* Free up memory allocated to hold the strings */
4602 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4603 if (option_strings[i])
4604 free (option_strings[i]);
4610 /* Hook to validate attribute((target("string"))). */
4613 ix86_valid_target_attribute_p (tree fndecl,
4614 tree ARG_UNUSED (name),
4616 int ARG_UNUSED (flags))
4618 struct cl_target_option cur_target;
4620 tree old_optimize = build_optimization_node ();
4621 tree new_target, new_optimize;
4622 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4624 /* If the function changed the optimization levels as well as setting target
4625 options, start with the optimizations specified. */
4626 if (func_optimize && func_optimize != old_optimize)
4627 cl_optimization_restore (&global_options,
4628 TREE_OPTIMIZATION (func_optimize));
4630 /* The target attributes may also change some optimization flags, so update
4631 the optimization options if necessary. */
4632 cl_target_option_save (&cur_target, &global_options);
4633 new_target = ix86_valid_target_attribute_tree (args);
4634 new_optimize = build_optimization_node ();
4641 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4643 if (old_optimize != new_optimize)
4644 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4647 cl_target_option_restore (&global_options, &cur_target);
4649 if (old_optimize != new_optimize)
4650 cl_optimization_restore (&global_options,
4651 TREE_OPTIMIZATION (old_optimize));
4657 /* Hook to determine if one function can safely inline another. */
4660 ix86_can_inline_p (tree caller, tree callee)
4663 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4664 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4666 /* If callee has no option attributes, then it is ok to inline. */
4670 /* If caller has no option attributes, but callee does then it is not ok to
4672 else if (!caller_tree)
4677 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4678 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4680 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4681 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4683 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4684 != callee_opts->x_ix86_isa_flags)
4687 /* See if we have the same non-isa options. */
4688 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4691 /* See if arch, tune, etc. are the same. */
4692 else if (caller_opts->arch != callee_opts->arch)
4695 else if (caller_opts->tune != callee_opts->tune)
4698 else if (caller_opts->fpmath != callee_opts->fpmath)
4701 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4712 /* Remember the last target of ix86_set_current_function. */
4713 static GTY(()) tree ix86_previous_fndecl;
4715 /* Establish appropriate back-end context for processing the function
4716 FNDECL. The argument might be NULL to indicate processing at top
4717 level, outside of any function scope. */
4719 ix86_set_current_function (tree fndecl)
4721 /* Only change the context if the function changes. This hook is called
4722 several times in the course of compiling a function, and we don't want to
4723 slow things down too much or call target_reinit when it isn't safe. */
4724 if (fndecl && fndecl != ix86_previous_fndecl)
4726 tree old_tree = (ix86_previous_fndecl
4727 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4730 tree new_tree = (fndecl
4731 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4734 ix86_previous_fndecl = fndecl;
4735 if (old_tree == new_tree)
4740 cl_target_option_restore (&global_options,
4741 TREE_TARGET_OPTION (new_tree));
4747 struct cl_target_option *def
4748 = TREE_TARGET_OPTION (target_option_current_node);
4750 cl_target_option_restore (&global_options, def);
4757 /* Return true if this goes in large data/bss. */
4760 ix86_in_large_data_p (tree exp)
4762 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4765 /* Functions are never large data. */
4766 if (TREE_CODE (exp) == FUNCTION_DECL)
4769 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4771 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4772 if (strcmp (section, ".ldata") == 0
4773 || strcmp (section, ".lbss") == 0)
4779 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4781 /* If this is an incomplete type with size 0, then we can't put it
4782 in data because it might be too big when completed. */
4783 if (!size || size > ix86_section_threshold)
4790 /* Switch to the appropriate section for output of DECL.
4791 DECL is either a `VAR_DECL' node or a constant of some sort.
4792 RELOC indicates whether forming the initial value of DECL requires
4793 link-time relocations. */
4795 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4799 x86_64_elf_select_section (tree decl, int reloc,
4800 unsigned HOST_WIDE_INT align)
4802 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4803 && ix86_in_large_data_p (decl))
4805 const char *sname = NULL;
4806 unsigned int flags = SECTION_WRITE;
4807 switch (categorize_decl_for_section (decl, reloc))
4812 case SECCAT_DATA_REL:
4813 sname = ".ldata.rel";
4815 case SECCAT_DATA_REL_LOCAL:
4816 sname = ".ldata.rel.local";
4818 case SECCAT_DATA_REL_RO:
4819 sname = ".ldata.rel.ro";
4821 case SECCAT_DATA_REL_RO_LOCAL:
4822 sname = ".ldata.rel.ro.local";
4826 flags |= SECTION_BSS;
4829 case SECCAT_RODATA_MERGE_STR:
4830 case SECCAT_RODATA_MERGE_STR_INIT:
4831 case SECCAT_RODATA_MERGE_CONST:
4835 case SECCAT_SRODATA:
4842 /* We don't split these for medium model. Place them into
4843 default sections and hope for best. */
4848 /* We might get called with string constants, but get_named_section
4849 doesn't like them as they are not DECLs. Also, we need to set
4850 flags in that case. */
4852 return get_section (sname, flags, NULL);
4853 return get_named_section (decl, sname, reloc);
4856 return default_elf_select_section (decl, reloc, align);
4859 /* Build up a unique section name, expressed as a
4860 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4861 RELOC indicates whether the initial value of EXP requires
4862 link-time relocations. */
4864 static void ATTRIBUTE_UNUSED
4865 x86_64_elf_unique_section (tree decl, int reloc)
4867 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4868 && ix86_in_large_data_p (decl))
4870 const char *prefix = NULL;
4871 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4872 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4874 switch (categorize_decl_for_section (decl, reloc))
4877 case SECCAT_DATA_REL:
4878 case SECCAT_DATA_REL_LOCAL:
4879 case SECCAT_DATA_REL_RO:
4880 case SECCAT_DATA_REL_RO_LOCAL:
4881 prefix = one_only ? ".ld" : ".ldata";
4884 prefix = one_only ? ".lb" : ".lbss";
4887 case SECCAT_RODATA_MERGE_STR:
4888 case SECCAT_RODATA_MERGE_STR_INIT:
4889 case SECCAT_RODATA_MERGE_CONST:
4890 prefix = one_only ? ".lr" : ".lrodata";
4892 case SECCAT_SRODATA:
4899 /* We don't split these for medium model. Place them into
4900 default sections and hope for best. */
4905 const char *name, *linkonce;
4908 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4909 name = targetm.strip_name_encoding (name);
4911 /* If we're using one_only, then there needs to be a .gnu.linkonce
4912 prefix to the section name. */
4913 linkonce = one_only ? ".gnu.linkonce" : "";
4915 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4917 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4921 default_unique_section (decl, reloc);
4924 #ifdef COMMON_ASM_OP
4925 /* This says how to output assembler code to declare an
4926 uninitialized external linkage data object.
4928 For medium model x86-64 we need to use .largecomm opcode for
4931 x86_elf_aligned_common (FILE *file,
4932 const char *name, unsigned HOST_WIDE_INT size,
4935 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4936 && size > (unsigned int)ix86_section_threshold)
4937 fputs (".largecomm\t", file);
4939 fputs (COMMON_ASM_OP, file);
4940 assemble_name (file, name);
4941 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4942 size, align / BITS_PER_UNIT);
4946 /* Utility function for targets to use in implementing
4947 ASM_OUTPUT_ALIGNED_BSS. */
4950 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4951 const char *name, unsigned HOST_WIDE_INT size,
4954 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4955 && size > (unsigned int)ix86_section_threshold)
4956 switch_to_section (get_named_section (decl, ".lbss", 0));
4958 switch_to_section (bss_section);
4959 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4960 #ifdef ASM_DECLARE_OBJECT_NAME
4961 last_assemble_variable_decl = decl;
4962 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4964 /* Standard thing is just output label for the object. */
4965 ASM_OUTPUT_LABEL (file, name);
4966 #endif /* ASM_DECLARE_OBJECT_NAME */
4967 ASM_OUTPUT_SKIP (file, size ? size : 1);
4970 static const struct default_options ix86_option_optimization_table[] =
4972 /* Turn off -fschedule-insns by default. It tends to make the
4973 problem with not enough registers even worse. */
4974 #ifdef INSN_SCHEDULING
4975 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
4978 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4979 SUBTARGET_OPTIMIZATION_OPTIONS,
4981 { OPT_LEVELS_NONE, 0, NULL, 0 }
4984 /* Implement TARGET_OPTION_INIT_STRUCT. */
4987 ix86_option_init_struct (struct gcc_options *opts)
4990 /* The Darwin libraries never set errno, so we might as well
4991 avoid calling them when that's the only reason we would. */
4992 opts->x_flag_errno_math = 0;
4994 opts->x_flag_pcc_struct_return = 2;
4995 opts->x_flag_asynchronous_unwind_tables = 2;
4996 opts->x_flag_vect_cost_model = 1;
4999 /* Decide whether we must probe the stack before any space allocation
5000 on this target. It's essentially TARGET_STACK_PROBE except when
5001 -fstack-check causes the stack to be already probed differently. */
5004 ix86_target_stack_probe (void)
5006 /* Do not probe the stack twice if static stack checking is enabled. */
5007 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5010 return TARGET_STACK_PROBE;
5013 /* Decide whether we can make a sibling call to a function. DECL is the
5014 declaration of the function being targeted by the call and EXP is the
5015 CALL_EXPR representing the call. */
5018 ix86_function_ok_for_sibcall (tree decl, tree exp)
5020 tree type, decl_or_type;
5023 /* If we are generating position-independent code, we cannot sibcall
5024 optimize any indirect call, or a direct call to a global function,
5025 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5029 && (!decl || !targetm.binds_local_p (decl)))
5032 /* If we need to align the outgoing stack, then sibcalling would
5033 unalign the stack, which may break the called function. */
5034 if (ix86_minimum_incoming_stack_boundary (true)
5035 < PREFERRED_STACK_BOUNDARY)
5040 decl_or_type = decl;
5041 type = TREE_TYPE (decl);
5045 /* We're looking at the CALL_EXPR, we need the type of the function. */
5046 type = CALL_EXPR_FN (exp); /* pointer expression */
5047 type = TREE_TYPE (type); /* pointer type */
5048 type = TREE_TYPE (type); /* function type */
5049 decl_or_type = type;
5052 /* Check that the return value locations are the same. Like
5053 if we are returning floats on the 80387 register stack, we cannot
5054 make a sibcall from a function that doesn't return a float to a
5055 function that does or, conversely, from a function that does return
5056 a float to a function that doesn't; the necessary stack adjustment
5057 would not be executed. This is also the place we notice
5058 differences in the return value ABI. Note that it is ok for one
5059 of the functions to have void return type as long as the return
5060 value of the other is passed in a register. */
5061 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5062 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5064 if (STACK_REG_P (a) || STACK_REG_P (b))
5066 if (!rtx_equal_p (a, b))
5069 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5071 /* Disable sibcall if we need to generate vzeroupper after
5073 if (TARGET_VZEROUPPER
5074 && cfun->machine->callee_return_avx256_p
5075 && !cfun->machine->caller_return_avx256_p)
5078 else if (!rtx_equal_p (a, b))
5083 /* The SYSV ABI has more call-clobbered registers;
5084 disallow sibcalls from MS to SYSV. */
5085 if (cfun->machine->call_abi == MS_ABI
5086 && ix86_function_type_abi (type) == SYSV_ABI)
5091 /* If this call is indirect, we'll need to be able to use a
5092 call-clobbered register for the address of the target function.
5093 Make sure that all such registers are not used for passing
5094 parameters. Note that DLLIMPORT functions are indirect. */
5096 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5098 if (ix86_function_regparm (type, NULL) >= 3)
5100 /* ??? Need to count the actual number of registers to be used,
5101 not the possible number of registers. Fix later. */
5107 /* Otherwise okay. That also includes certain types of indirect calls. */
5111 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5112 and "sseregparm" calling convention attributes;
5113 arguments as in struct attribute_spec.handler. */
5116 ix86_handle_cconv_attribute (tree *node, tree name,
5118 int flags ATTRIBUTE_UNUSED,
5121 if (TREE_CODE (*node) != FUNCTION_TYPE
5122 && TREE_CODE (*node) != METHOD_TYPE
5123 && TREE_CODE (*node) != FIELD_DECL
5124 && TREE_CODE (*node) != TYPE_DECL)
5126 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5128 *no_add_attrs = true;
5132 /* Can combine regparm with all attributes but fastcall. */
5133 if (is_attribute_p ("regparm", name))
5137 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5139 error ("fastcall and regparm attributes are not compatible");
5142 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5144 error ("regparam and thiscall attributes are not compatible");
5147 cst = TREE_VALUE (args);
5148 if (TREE_CODE (cst) != INTEGER_CST)
5150 warning (OPT_Wattributes,
5151 "%qE attribute requires an integer constant argument",
5153 *no_add_attrs = true;
5155 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5157 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5159 *no_add_attrs = true;
5167 /* Do not warn when emulating the MS ABI. */
5168 if ((TREE_CODE (*node) != FUNCTION_TYPE
5169 && TREE_CODE (*node) != METHOD_TYPE)
5170 || ix86_function_type_abi (*node) != MS_ABI)
5171 warning (OPT_Wattributes, "%qE attribute ignored",
5173 *no_add_attrs = true;
5177 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5178 if (is_attribute_p ("fastcall", name))
5180 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5182 error ("fastcall and cdecl attributes are not compatible");
5184 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5186 error ("fastcall and stdcall attributes are not compatible");
5188 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5190 error ("fastcall and regparm attributes are not compatible");
5192 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5194 error ("fastcall and thiscall attributes are not compatible");
5198 /* Can combine stdcall with fastcall (redundant), regparm and
5200 else if (is_attribute_p ("stdcall", name))
5202 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5204 error ("stdcall and cdecl attributes are not compatible");
5206 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5208 error ("stdcall and fastcall attributes are not compatible");
5210 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5212 error ("stdcall and thiscall attributes are not compatible");
5216 /* Can combine cdecl with regparm and sseregparm. */
5217 else if (is_attribute_p ("cdecl", name))
5219 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5221 error ("stdcall and cdecl attributes are not compatible");
5223 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5225 error ("fastcall and cdecl attributes are not compatible");
5227 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5229 error ("cdecl and thiscall attributes are not compatible");
5232 else if (is_attribute_p ("thiscall", name))
5234 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5235 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5237 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5239 error ("stdcall and thiscall attributes are not compatible");
5241 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5243 error ("fastcall and thiscall attributes are not compatible");
5245 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5247 error ("cdecl and thiscall attributes are not compatible");
5251 /* Can combine sseregparm with all attributes. */
5256 /* Return 0 if the attributes for two types are incompatible, 1 if they
5257 are compatible, and 2 if they are nearly compatible (which causes a
5258 warning to be generated). */
5261 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5263 /* Check for mismatch of non-default calling convention. */
5264 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5266 if (TREE_CODE (type1) != FUNCTION_TYPE
5267 && TREE_CODE (type1) != METHOD_TYPE)
5270 /* Check for mismatched fastcall/regparm types. */
5271 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5272 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5273 || (ix86_function_regparm (type1, NULL)
5274 != ix86_function_regparm (type2, NULL)))
5277 /* Check for mismatched sseregparm types. */
5278 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5279 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5282 /* Check for mismatched thiscall types. */
5283 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5284 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5287 /* Check for mismatched return types (cdecl vs stdcall). */
5288 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5289 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5295 /* Return the regparm value for a function with the indicated TYPE and DECL.
5296 DECL may be NULL when calling function indirectly
5297 or considering a libcall. */
5300 ix86_function_regparm (const_tree type, const_tree decl)
5306 return (ix86_function_type_abi (type) == SYSV_ABI
5307 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5309 regparm = ix86_regparm;
5310 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5313 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5317 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5320 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5323 /* Use register calling convention for local functions when possible. */
5325 && TREE_CODE (decl) == FUNCTION_DECL
5327 && !(profile_flag && !flag_fentry))
5329 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5330 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5333 int local_regparm, globals = 0, regno;
5335 /* Make sure no regparm register is taken by a
5336 fixed register variable. */
5337 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5338 if (fixed_regs[local_regparm])
5341 /* We don't want to use regparm(3) for nested functions as
5342 these use a static chain pointer in the third argument. */
5343 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5346 /* In 32-bit mode save a register for the split stack. */
5347 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5350 /* Each fixed register usage increases register pressure,
5351 so less registers should be used for argument passing.
5352 This functionality can be overriden by an explicit
5354 for (regno = 0; regno <= DI_REG; regno++)
5355 if (fixed_regs[regno])
5359 = globals < local_regparm ? local_regparm - globals : 0;
5361 if (local_regparm > regparm)
5362 regparm = local_regparm;
5369 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5370 DFmode (2) arguments in SSE registers for a function with the
5371 indicated TYPE and DECL. DECL may be NULL when calling function
5372 indirectly or considering a libcall. Otherwise return 0. */
5375 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5377 gcc_assert (!TARGET_64BIT);
5379 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5380 by the sseregparm attribute. */
5381 if (TARGET_SSEREGPARM
5382 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5389 error ("calling %qD with attribute sseregparm without "
5390 "SSE/SSE2 enabled", decl);
5392 error ("calling %qT with attribute sseregparm without "
5393 "SSE/SSE2 enabled", type);
5401 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5402 (and DFmode for SSE2) arguments in SSE registers. */
5403 if (decl && TARGET_SSE_MATH && optimize
5404 && !(profile_flag && !flag_fentry))
5406 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5407 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5409 return TARGET_SSE2 ? 2 : 1;
5415 /* Return true if EAX is live at the start of the function. Used by
5416 ix86_expand_prologue to determine if we need special help before
5417 calling allocate_stack_worker. */
5420 ix86_eax_live_at_start_p (void)
5422 /* Cheat. Don't bother working forward from ix86_function_regparm
5423 to the function type to whether an actual argument is located in
5424 eax. Instead just look at cfg info, which is still close enough
5425 to correct at this point. This gives false positives for broken
5426 functions that might use uninitialized data that happens to be
5427 allocated in eax, but who cares? */
5428 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5431 /* Value is the number of bytes of arguments automatically
5432 popped when returning from a subroutine call.
5433 FUNDECL is the declaration node of the function (as a tree),
5434 FUNTYPE is the data type of the function (as a tree),
5435 or for a library call it is an identifier node for the subroutine name.
5436 SIZE is the number of bytes of arguments passed on the stack.
5438 On the 80386, the RTD insn may be used to pop them if the number
5439 of args is fixed, but if the number is variable then the caller
5440 must pop them all. RTD can't be used for library calls now
5441 because the library is compiled with the Unix compiler.
5442 Use of RTD is a selectable option, since it is incompatible with
5443 standard Unix calling sequences. If the option is not selected,
5444 the caller must always pop the args.
5446 The attribute stdcall is equivalent to RTD on a per module basis. */
5449 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5453 /* None of the 64-bit ABIs pop arguments. */
5457 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5459 /* Cdecl functions override -mrtd, and never pop the stack. */
5460 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5462 /* Stdcall and fastcall functions will pop the stack if not
5464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5465 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5466 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5469 if (rtd && ! stdarg_p (funtype))
5473 /* Lose any fake structure return argument if it is passed on the stack. */
5474 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5475 && !KEEP_AGGREGATE_RETURN_POINTER)
5477 int nregs = ix86_function_regparm (funtype, fundecl);
5479 return GET_MODE_SIZE (Pmode);
5485 /* Argument support functions. */
5487 /* Return true when register may be used to pass function parameters. */
5489 ix86_function_arg_regno_p (int regno)
5492 const int *parm_regs;
5497 return (regno < REGPARM_MAX
5498 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5500 return (regno < REGPARM_MAX
5501 || (TARGET_MMX && MMX_REGNO_P (regno)
5502 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5503 || (TARGET_SSE && SSE_REGNO_P (regno)
5504 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5509 if (SSE_REGNO_P (regno) && TARGET_SSE)
5514 if (TARGET_SSE && SSE_REGNO_P (regno)
5515 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5519 /* TODO: The function should depend on current function ABI but
5520 builtins.c would need updating then. Therefore we use the
5523 /* RAX is used as hidden argument to va_arg functions. */
5524 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5527 if (ix86_abi == MS_ABI)
5528 parm_regs = x86_64_ms_abi_int_parameter_registers;
5530 parm_regs = x86_64_int_parameter_registers;
5531 for (i = 0; i < (ix86_abi == MS_ABI
5532 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5533 if (regno == parm_regs[i])
5538 /* Return if we do not know how to pass TYPE solely in registers. */
5541 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5543 if (must_pass_in_stack_var_size_or_pad (mode, type))
5546 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5547 The layout_type routine is crafty and tries to trick us into passing
5548 currently unsupported vector types on the stack by using TImode. */
5549 return (!TARGET_64BIT && mode == TImode
5550 && type && TREE_CODE (type) != VECTOR_TYPE);
5553 /* It returns the size, in bytes, of the area reserved for arguments passed
5554 in registers for the function represented by fndecl dependent to the used
5557 ix86_reg_parm_stack_space (const_tree fndecl)
5559 enum calling_abi call_abi = SYSV_ABI;
5560 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5561 call_abi = ix86_function_abi (fndecl);
5563 call_abi = ix86_function_type_abi (fndecl);
5564 if (call_abi == MS_ABI)
5569 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5572 ix86_function_type_abi (const_tree fntype)
5574 if (TARGET_64BIT && fntype != NULL)
5576 enum calling_abi abi = ix86_abi;
5577 if (abi == SYSV_ABI)
5579 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5582 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5590 ix86_function_ms_hook_prologue (const_tree fn)
5592 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5594 if (decl_function_context (fn) != NULL_TREE)
5595 error_at (DECL_SOURCE_LOCATION (fn),
5596 "ms_hook_prologue is not compatible with nested function");
5603 static enum calling_abi
5604 ix86_function_abi (const_tree fndecl)
5608 return ix86_function_type_abi (TREE_TYPE (fndecl));
5611 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5614 ix86_cfun_abi (void)
5616 if (! cfun || ! TARGET_64BIT)
5618 return cfun->machine->call_abi;
5621 /* Write the extra assembler code needed to declare a function properly. */
5624 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5627 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5631 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5632 unsigned int filler_cc = 0xcccccccc;
5634 for (i = 0; i < filler_count; i += 4)
5635 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5638 #ifdef SUBTARGET_ASM_UNWIND_INIT
5639 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5642 ASM_OUTPUT_LABEL (asm_out_file, fname);
5644 /* Output magic byte marker, if hot-patch attribute is set. */
5649 /* leaq [%rsp + 0], %rsp */
5650 asm_fprintf (asm_out_file, ASM_BYTE
5651 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5655 /* movl.s %edi, %edi
5657 movl.s %esp, %ebp */
5658 asm_fprintf (asm_out_file, ASM_BYTE
5659 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5665 extern void init_regs (void);
5667 /* Implementation of call abi switching target hook. Specific to FNDECL
5668 the specific call register sets are set. See also
5669 ix86_conditional_register_usage for more details. */
5671 ix86_call_abi_override (const_tree fndecl)
5673 if (fndecl == NULL_TREE)
5674 cfun->machine->call_abi = ix86_abi;
5676 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5679 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5680 re-initialization of init_regs each time we switch function context since
5681 this is needed only during RTL expansion. */
5683 ix86_maybe_switch_abi (void)
5686 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5690 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5691 for a call to a function whose data type is FNTYPE.
5692 For a library call, FNTYPE is 0. */
5695 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5696 tree fntype, /* tree ptr for function decl */
5697 rtx libname, /* SYMBOL_REF of library name or 0 */
5701 struct cgraph_local_info *i;
5704 memset (cum, 0, sizeof (*cum));
5706 /* Initialize for the current callee. */
5709 cfun->machine->callee_pass_avx256_p = false;
5710 cfun->machine->callee_return_avx256_p = false;
5715 i = cgraph_local_info (fndecl);
5716 cum->call_abi = ix86_function_abi (fndecl);
5717 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5722 cum->call_abi = ix86_function_type_abi (fntype);
5724 fnret_type = TREE_TYPE (fntype);
5729 if (TARGET_VZEROUPPER && fnret_type)
5731 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5733 if (function_pass_avx256_p (fnret_value))
5735 /* The return value of this function uses 256bit AVX modes. */
5737 cfun->machine->callee_return_avx256_p = true;
5739 cfun->machine->caller_return_avx256_p = true;
5743 cum->caller = caller;
5745 /* Set up the number of registers to use for passing arguments. */
5747 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5748 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5749 "or subtarget optimization implying it");
5750 cum->nregs = ix86_regparm;
5753 cum->nregs = (cum->call_abi == SYSV_ABI
5754 ? X86_64_REGPARM_MAX
5755 : X86_64_MS_REGPARM_MAX);
5759 cum->sse_nregs = SSE_REGPARM_MAX;
5762 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5763 ? X86_64_SSE_REGPARM_MAX
5764 : X86_64_MS_SSE_REGPARM_MAX);
5768 cum->mmx_nregs = MMX_REGPARM_MAX;
5769 cum->warn_avx = true;
5770 cum->warn_sse = true;
5771 cum->warn_mmx = true;
5773 /* Because type might mismatch in between caller and callee, we need to
5774 use actual type of function for local calls.
5775 FIXME: cgraph_analyze can be told to actually record if function uses
5776 va_start so for local functions maybe_vaarg can be made aggressive
5778 FIXME: once typesytem is fixed, we won't need this code anymore. */
5780 fntype = TREE_TYPE (fndecl);
5781 cum->maybe_vaarg = (fntype
5782 ? (!prototype_p (fntype) || stdarg_p (fntype))
5787 /* If there are variable arguments, then we won't pass anything
5788 in registers in 32-bit mode. */
5789 if (stdarg_p (fntype))
5800 /* Use ecx and edx registers if function has fastcall attribute,
5801 else look for regparm information. */
5804 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5807 cum->fastcall = 1; /* Same first register as in fastcall. */
5809 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5815 cum->nregs = ix86_function_regparm (fntype, fndecl);
5818 /* Set up the number of SSE registers used for passing SFmode
5819 and DFmode arguments. Warn for mismatching ABI. */
5820 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5824 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5825 But in the case of vector types, it is some vector mode.
5827 When we have only some of our vector isa extensions enabled, then there
5828 are some modes for which vector_mode_supported_p is false. For these
5829 modes, the generic vector support in gcc will choose some non-vector mode
5830 in order to implement the type. By computing the natural mode, we'll
5831 select the proper ABI location for the operand and not depend on whatever
5832 the middle-end decides to do with these vector types.
5834 The midde-end can't deal with the vector types > 16 bytes. In this
5835 case, we return the original mode and warn ABI change if CUM isn't
5838 static enum machine_mode
5839 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5841 enum machine_mode mode = TYPE_MODE (type);
5843 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5845 HOST_WIDE_INT size = int_size_in_bytes (type);
5846 if ((size == 8 || size == 16 || size == 32)
5847 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5848 && TYPE_VECTOR_SUBPARTS (type) > 1)
5850 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5852 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5853 mode = MIN_MODE_VECTOR_FLOAT;
5855 mode = MIN_MODE_VECTOR_INT;
5857 /* Get the mode which has this inner mode and number of units. */
5858 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5859 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5860 && GET_MODE_INNER (mode) == innermode)
5862 if (size == 32 && !TARGET_AVX)
5864 static bool warnedavx;
5871 warning (0, "AVX vector argument without AVX "
5872 "enabled changes the ABI");
5874 return TYPE_MODE (type);
5887 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5888 this may not agree with the mode that the type system has chosen for the
5889 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5890 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5893 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5898 if (orig_mode != BLKmode)
5899 tmp = gen_rtx_REG (orig_mode, regno);
5902 tmp = gen_rtx_REG (mode, regno);
5903 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5904 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5910 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5911 of this code is to classify each 8bytes of incoming argument by the register
5912 class and assign registers accordingly. */
5914 /* Return the union class of CLASS1 and CLASS2.
5915 See the x86-64 PS ABI for details. */
5917 static enum x86_64_reg_class
5918 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5920 /* Rule #1: If both classes are equal, this is the resulting class. */
5921 if (class1 == class2)
5924 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5926 if (class1 == X86_64_NO_CLASS)
5928 if (class2 == X86_64_NO_CLASS)
5931 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5932 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5933 return X86_64_MEMORY_CLASS;
5935 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5936 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5937 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5938 return X86_64_INTEGERSI_CLASS;
5939 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5940 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5941 return X86_64_INTEGER_CLASS;
5943 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5945 if (class1 == X86_64_X87_CLASS
5946 || class1 == X86_64_X87UP_CLASS
5947 || class1 == X86_64_COMPLEX_X87_CLASS
5948 || class2 == X86_64_X87_CLASS
5949 || class2 == X86_64_X87UP_CLASS
5950 || class2 == X86_64_COMPLEX_X87_CLASS)
5951 return X86_64_MEMORY_CLASS;
5953 /* Rule #6: Otherwise class SSE is used. */
5954 return X86_64_SSE_CLASS;
5957 /* Classify the argument of type TYPE and mode MODE.
5958 CLASSES will be filled by the register class used to pass each word
5959 of the operand. The number of words is returned. In case the parameter
5960 should be passed in memory, 0 is returned. As a special case for zero
5961 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5963 BIT_OFFSET is used internally for handling records and specifies offset
5964 of the offset in bits modulo 256 to avoid overflow cases.
5966 See the x86-64 PS ABI for details.
5970 classify_argument (enum machine_mode mode, const_tree type,
5971 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5973 HOST_WIDE_INT bytes =
5974 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5975 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5977 /* Variable sized entities are always passed/returned in memory. */
5981 if (mode != VOIDmode
5982 && targetm.calls.must_pass_in_stack (mode, type))
5985 if (type && AGGREGATE_TYPE_P (type))
5989 enum x86_64_reg_class subclasses[MAX_CLASSES];
5991 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5995 for (i = 0; i < words; i++)
5996 classes[i] = X86_64_NO_CLASS;
5998 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5999 signalize memory class, so handle it as special case. */
6002 classes[0] = X86_64_NO_CLASS;
6006 /* Classify each field of record and merge classes. */
6007 switch (TREE_CODE (type))
6010 /* And now merge the fields of structure. */
6011 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6013 if (TREE_CODE (field) == FIELD_DECL)
6017 if (TREE_TYPE (field) == error_mark_node)
6020 /* Bitfields are always classified as integer. Handle them
6021 early, since later code would consider them to be
6022 misaligned integers. */
6023 if (DECL_BIT_FIELD (field))
6025 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6026 i < ((int_bit_position (field) + (bit_offset % 64))
6027 + tree_low_cst (DECL_SIZE (field), 0)
6030 merge_classes (X86_64_INTEGER_CLASS,
6037 type = TREE_TYPE (field);
6039 /* Flexible array member is ignored. */
6040 if (TYPE_MODE (type) == BLKmode
6041 && TREE_CODE (type) == ARRAY_TYPE
6042 && TYPE_SIZE (type) == NULL_TREE
6043 && TYPE_DOMAIN (type) != NULL_TREE
6044 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6049 if (!warned && warn_psabi)
6052 inform (input_location,
6053 "the ABI of passing struct with"
6054 " a flexible array member has"
6055 " changed in GCC 4.4");
6059 num = classify_argument (TYPE_MODE (type), type,
6061 (int_bit_position (field)
6062 + bit_offset) % 256);
6065 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6066 for (i = 0; i < num && (i + pos) < words; i++)
6068 merge_classes (subclasses[i], classes[i + pos]);
6075 /* Arrays are handled as small records. */
6078 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6079 TREE_TYPE (type), subclasses, bit_offset);
6083 /* The partial classes are now full classes. */
6084 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6085 subclasses[0] = X86_64_SSE_CLASS;
6086 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6087 && !((bit_offset % 64) == 0 && bytes == 4))
6088 subclasses[0] = X86_64_INTEGER_CLASS;
6090 for (i = 0; i < words; i++)
6091 classes[i] = subclasses[i % num];
6096 case QUAL_UNION_TYPE:
6097 /* Unions are similar to RECORD_TYPE but offset is always 0.
6099 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6101 if (TREE_CODE (field) == FIELD_DECL)
6105 if (TREE_TYPE (field) == error_mark_node)
6108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6109 TREE_TYPE (field), subclasses,
6113 for (i = 0; i < num; i++)
6114 classes[i] = merge_classes (subclasses[i], classes[i]);
6125 /* When size > 16 bytes, if the first one isn't
6126 X86_64_SSE_CLASS or any other ones aren't
6127 X86_64_SSEUP_CLASS, everything should be passed in
6129 if (classes[0] != X86_64_SSE_CLASS)
6132 for (i = 1; i < words; i++)
6133 if (classes[i] != X86_64_SSEUP_CLASS)
6137 /* Final merger cleanup. */
6138 for (i = 0; i < words; i++)
6140 /* If one class is MEMORY, everything should be passed in
6142 if (classes[i] == X86_64_MEMORY_CLASS)
6145 /* The X86_64_SSEUP_CLASS should be always preceded by
6146 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6147 if (classes[i] == X86_64_SSEUP_CLASS
6148 && classes[i - 1] != X86_64_SSE_CLASS
6149 && classes[i - 1] != X86_64_SSEUP_CLASS)
6151 /* The first one should never be X86_64_SSEUP_CLASS. */
6152 gcc_assert (i != 0);
6153 classes[i] = X86_64_SSE_CLASS;
6156 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6157 everything should be passed in memory. */
6158 if (classes[i] == X86_64_X87UP_CLASS
6159 && (classes[i - 1] != X86_64_X87_CLASS))
6163 /* The first one should never be X86_64_X87UP_CLASS. */
6164 gcc_assert (i != 0);
6165 if (!warned && warn_psabi)
6168 inform (input_location,
6169 "the ABI of passing union with long double"
6170 " has changed in GCC 4.4");
6178 /* Compute alignment needed. We align all types to natural boundaries with
6179 exception of XFmode that is aligned to 64bits. */
6180 if (mode != VOIDmode && mode != BLKmode)
6182 int mode_alignment = GET_MODE_BITSIZE (mode);
6185 mode_alignment = 128;
6186 else if (mode == XCmode)
6187 mode_alignment = 256;
6188 if (COMPLEX_MODE_P (mode))
6189 mode_alignment /= 2;
6190 /* Misaligned fields are always returned in memory. */
6191 if (bit_offset % mode_alignment)
6195 /* for V1xx modes, just use the base mode */
6196 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6197 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6198 mode = GET_MODE_INNER (mode);
6200 /* Classification of atomic types. */
6205 classes[0] = X86_64_SSE_CLASS;
6208 classes[0] = X86_64_SSE_CLASS;
6209 classes[1] = X86_64_SSEUP_CLASS;
6219 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6223 classes[0] = X86_64_INTEGERSI_CLASS;
6226 else if (size <= 64)
6228 classes[0] = X86_64_INTEGER_CLASS;
6231 else if (size <= 64+32)
6233 classes[0] = X86_64_INTEGER_CLASS;
6234 classes[1] = X86_64_INTEGERSI_CLASS;
6237 else if (size <= 64+64)
6239 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6247 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6251 /* OImode shouldn't be used directly. */
6256 if (!(bit_offset % 64))
6257 classes[0] = X86_64_SSESF_CLASS;
6259 classes[0] = X86_64_SSE_CLASS;
6262 classes[0] = X86_64_SSEDF_CLASS;
6265 classes[0] = X86_64_X87_CLASS;
6266 classes[1] = X86_64_X87UP_CLASS;
6269 classes[0] = X86_64_SSE_CLASS;
6270 classes[1] = X86_64_SSEUP_CLASS;
6273 classes[0] = X86_64_SSE_CLASS;
6274 if (!(bit_offset % 64))
6280 if (!warned && warn_psabi)
6283 inform (input_location,
6284 "the ABI of passing structure with complex float"
6285 " member has changed in GCC 4.4");
6287 classes[1] = X86_64_SSESF_CLASS;
6291 classes[0] = X86_64_SSEDF_CLASS;
6292 classes[1] = X86_64_SSEDF_CLASS;
6295 classes[0] = X86_64_COMPLEX_X87_CLASS;
6298 /* This modes is larger than 16 bytes. */
6306 classes[0] = X86_64_SSE_CLASS;
6307 classes[1] = X86_64_SSEUP_CLASS;
6308 classes[2] = X86_64_SSEUP_CLASS;
6309 classes[3] = X86_64_SSEUP_CLASS;
6317 classes[0] = X86_64_SSE_CLASS;
6318 classes[1] = X86_64_SSEUP_CLASS;
6326 classes[0] = X86_64_SSE_CLASS;
6332 gcc_assert (VECTOR_MODE_P (mode));
6337 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6339 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6340 classes[0] = X86_64_INTEGERSI_CLASS;
6342 classes[0] = X86_64_INTEGER_CLASS;
6343 classes[1] = X86_64_INTEGER_CLASS;
6344 return 1 + (bytes > 8);
6348 /* Examine the argument and return set number of register required in each
6349 class. Return 0 iff parameter should be passed in memory. */
6351 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6352 int *int_nregs, int *sse_nregs)
6354 enum x86_64_reg_class regclass[MAX_CLASSES];
6355 int n = classify_argument (mode, type, regclass, 0);
6361 for (n--; n >= 0; n--)
6362 switch (regclass[n])
6364 case X86_64_INTEGER_CLASS:
6365 case X86_64_INTEGERSI_CLASS:
6368 case X86_64_SSE_CLASS:
6369 case X86_64_SSESF_CLASS:
6370 case X86_64_SSEDF_CLASS:
6373 case X86_64_NO_CLASS:
6374 case X86_64_SSEUP_CLASS:
6376 case X86_64_X87_CLASS:
6377 case X86_64_X87UP_CLASS:
6381 case X86_64_COMPLEX_X87_CLASS:
6382 return in_return ? 2 : 0;
6383 case X86_64_MEMORY_CLASS:
6389 /* Construct container for the argument used by GCC interface. See
6390 FUNCTION_ARG for the detailed description. */
6393 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6394 const_tree type, int in_return, int nintregs, int nsseregs,
6395 const int *intreg, int sse_regno)
6397 /* The following variables hold the static issued_error state. */
6398 static bool issued_sse_arg_error;
6399 static bool issued_sse_ret_error;
6400 static bool issued_x87_ret_error;
6402 enum machine_mode tmpmode;
6404 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6405 enum x86_64_reg_class regclass[MAX_CLASSES];
6409 int needed_sseregs, needed_intregs;
6410 rtx exp[MAX_CLASSES];
6413 n = classify_argument (mode, type, regclass, 0);
6416 if (!examine_argument (mode, type, in_return, &needed_intregs,
6419 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6422 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6423 some less clueful developer tries to use floating-point anyway. */
6424 if (needed_sseregs && !TARGET_SSE)
6428 if (!issued_sse_ret_error)
6430 error ("SSE register return with SSE disabled");
6431 issued_sse_ret_error = true;
6434 else if (!issued_sse_arg_error)
6436 error ("SSE register argument with SSE disabled");
6437 issued_sse_arg_error = true;
6442 /* Likewise, error if the ABI requires us to return values in the
6443 x87 registers and the user specified -mno-80387. */
6444 if (!TARGET_80387 && in_return)
6445 for (i = 0; i < n; i++)
6446 if (regclass[i] == X86_64_X87_CLASS
6447 || regclass[i] == X86_64_X87UP_CLASS
6448 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6450 if (!issued_x87_ret_error)
6452 error ("x87 register return with x87 disabled");
6453 issued_x87_ret_error = true;
6458 /* First construct simple cases. Avoid SCmode, since we want to use
6459 single register to pass this type. */
6460 if (n == 1 && mode != SCmode)
6461 switch (regclass[0])
6463 case X86_64_INTEGER_CLASS:
6464 case X86_64_INTEGERSI_CLASS:
6465 return gen_rtx_REG (mode, intreg[0]);
6466 case X86_64_SSE_CLASS:
6467 case X86_64_SSESF_CLASS:
6468 case X86_64_SSEDF_CLASS:
6469 if (mode != BLKmode)
6470 return gen_reg_or_parallel (mode, orig_mode,
6471 SSE_REGNO (sse_regno));
6473 case X86_64_X87_CLASS:
6474 case X86_64_COMPLEX_X87_CLASS:
6475 return gen_rtx_REG (mode, FIRST_STACK_REG);
6476 case X86_64_NO_CLASS:
6477 /* Zero sized array, struct or class. */
6482 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6483 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6486 && regclass[0] == X86_64_SSE_CLASS
6487 && regclass[1] == X86_64_SSEUP_CLASS
6488 && regclass[2] == X86_64_SSEUP_CLASS
6489 && regclass[3] == X86_64_SSEUP_CLASS
6491 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6494 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6495 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6496 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6497 && regclass[1] == X86_64_INTEGER_CLASS
6498 && (mode == CDImode || mode == TImode || mode == TFmode)
6499 && intreg[0] + 1 == intreg[1])
6500 return gen_rtx_REG (mode, intreg[0]);
6502 /* Otherwise figure out the entries of the PARALLEL. */
6503 for (i = 0; i < n; i++)
6507 switch (regclass[i])
6509 case X86_64_NO_CLASS:
6511 case X86_64_INTEGER_CLASS:
6512 case X86_64_INTEGERSI_CLASS:
6513 /* Merge TImodes on aligned occasions here too. */
6514 if (i * 8 + 8 > bytes)
6515 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6516 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6520 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6521 if (tmpmode == BLKmode)
6523 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6524 gen_rtx_REG (tmpmode, *intreg),
6528 case X86_64_SSESF_CLASS:
6529 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6530 gen_rtx_REG (SFmode,
6531 SSE_REGNO (sse_regno)),
6535 case X86_64_SSEDF_CLASS:
6536 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6537 gen_rtx_REG (DFmode,
6538 SSE_REGNO (sse_regno)),
6542 case X86_64_SSE_CLASS:
6550 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6560 && regclass[1] == X86_64_SSEUP_CLASS
6561 && regclass[2] == X86_64_SSEUP_CLASS
6562 && regclass[3] == X86_64_SSEUP_CLASS);
6569 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6570 gen_rtx_REG (tmpmode,
6571 SSE_REGNO (sse_regno)),
6580 /* Empty aligned struct, union or class. */
6584 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6585 for (i = 0; i < nexps; i++)
6586 XVECEXP (ret, 0, i) = exp [i];
6590 /* Update the data in CUM to advance over an argument of mode MODE
6591 and data type TYPE. (TYPE is null for libcalls where that information
6592 may not be available.) */
6595 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6596 const_tree type, HOST_WIDE_INT bytes,
6597 HOST_WIDE_INT words)
6613 cum->words += words;
6614 cum->nregs -= words;
6615 cum->regno += words;
6617 if (cum->nregs <= 0)
6625 /* OImode shouldn't be used directly. */
6629 if (cum->float_in_sse < 2)
6632 if (cum->float_in_sse < 1)
6649 if (!type || !AGGREGATE_TYPE_P (type))
6651 cum->sse_words += words;
6652 cum->sse_nregs -= 1;
6653 cum->sse_regno += 1;
6654 if (cum->sse_nregs <= 0)
6668 if (!type || !AGGREGATE_TYPE_P (type))
6670 cum->mmx_words += words;
6671 cum->mmx_nregs -= 1;
6672 cum->mmx_regno += 1;
6673 if (cum->mmx_nregs <= 0)
6684 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6685 const_tree type, HOST_WIDE_INT words, bool named)
6687 int int_nregs, sse_nregs;
6689 /* Unnamed 256bit vector mode parameters are passed on stack. */
6690 if (!named && VALID_AVX256_REG_MODE (mode))
6693 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6694 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6696 cum->nregs -= int_nregs;
6697 cum->sse_nregs -= sse_nregs;
6698 cum->regno += int_nregs;
6699 cum->sse_regno += sse_nregs;
6703 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6704 cum->words = (cum->words + align - 1) & ~(align - 1);
6705 cum->words += words;
6710 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6711 HOST_WIDE_INT words)
6713 /* Otherwise, this should be passed indirect. */
6714 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6716 cum->words += words;
6724 /* Update the data in CUM to advance over an argument of mode MODE and
6725 data type TYPE. (TYPE is null for libcalls where that information
6726 may not be available.) */
6729 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6730 const_tree type, bool named)
6732 HOST_WIDE_INT bytes, words;
6734 if (mode == BLKmode)
6735 bytes = int_size_in_bytes (type);
6737 bytes = GET_MODE_SIZE (mode);
6738 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6741 mode = type_natural_mode (type, NULL);
6743 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6744 function_arg_advance_ms_64 (cum, bytes, words);
6745 else if (TARGET_64BIT)
6746 function_arg_advance_64 (cum, mode, type, words, named);
6748 function_arg_advance_32 (cum, mode, type, bytes, words);
6751 /* Define where to put the arguments to a function.
6752 Value is zero to push the argument on the stack,
6753 or a hard register in which to store the argument.
6755 MODE is the argument's machine mode.
6756 TYPE is the data type of the argument (as a tree).
6757 This is null for libcalls where that information may
6759 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6760 the preceding args and about the function being called.
6761 NAMED is nonzero if this argument is a named parameter
6762 (otherwise it is an extra parameter matching an ellipsis). */
6765 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6766 enum machine_mode orig_mode, const_tree type,
6767 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6769 static bool warnedsse, warnedmmx;
6771 /* Avoid the AL settings for the Unix64 ABI. */
6772 if (mode == VOIDmode)
6788 if (words <= cum->nregs)
6790 int regno = cum->regno;
6792 /* Fastcall allocates the first two DWORD (SImode) or
6793 smaller arguments to ECX and EDX if it isn't an
6799 || (type && AGGREGATE_TYPE_P (type)))
6802 /* ECX not EAX is the first allocated register. */
6803 if (regno == AX_REG)
6806 return gen_rtx_REG (mode, regno);
6811 if (cum->float_in_sse < 2)
6814 if (cum->float_in_sse < 1)
6818 /* In 32bit, we pass TImode in xmm registers. */
6825 if (!type || !AGGREGATE_TYPE_P (type))
6827 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6830 warning (0, "SSE vector argument without SSE enabled "
6834 return gen_reg_or_parallel (mode, orig_mode,
6835 cum->sse_regno + FIRST_SSE_REG);
6840 /* OImode shouldn't be used directly. */
6849 if (!type || !AGGREGATE_TYPE_P (type))
6852 return gen_reg_or_parallel (mode, orig_mode,
6853 cum->sse_regno + FIRST_SSE_REG);
6863 if (!type || !AGGREGATE_TYPE_P (type))
6865 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6868 warning (0, "MMX vector argument without MMX enabled "
6872 return gen_reg_or_parallel (mode, orig_mode,
6873 cum->mmx_regno + FIRST_MMX_REG);
6882 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6883 enum machine_mode orig_mode, const_tree type, bool named)
6885 /* Handle a hidden AL argument containing number of registers
6886 for varargs x86-64 functions. */
6887 if (mode == VOIDmode)
6888 return GEN_INT (cum->maybe_vaarg
6889 ? (cum->sse_nregs < 0
6890 ? X86_64_SSE_REGPARM_MAX
6905 /* Unnamed 256bit vector mode parameters are passed on stack. */
6911 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6913 &x86_64_int_parameter_registers [cum->regno],
6918 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6919 enum machine_mode orig_mode, bool named,
6920 HOST_WIDE_INT bytes)
6924 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6925 We use value of -2 to specify that current function call is MSABI. */
6926 if (mode == VOIDmode)
6927 return GEN_INT (-2);
6929 /* If we've run out of registers, it goes on the stack. */
6930 if (cum->nregs == 0)
6933 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6935 /* Only floating point modes are passed in anything but integer regs. */
6936 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6939 regno = cum->regno + FIRST_SSE_REG;
6944 /* Unnamed floating parameters are passed in both the
6945 SSE and integer registers. */
6946 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6947 t2 = gen_rtx_REG (mode, regno);
6948 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6949 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6950 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6953 /* Handle aggregated types passed in register. */
6954 if (orig_mode == BLKmode)
6956 if (bytes > 0 && bytes <= 8)
6957 mode = (bytes > 4 ? DImode : SImode);
6958 if (mode == BLKmode)
6962 return gen_reg_or_parallel (mode, orig_mode, regno);
6965 /* Return where to put the arguments to a function.
6966 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6968 MODE is the argument's machine mode. TYPE is the data type of the
6969 argument. It is null for libcalls where that information may not be
6970 available. CUM gives information about the preceding args and about
6971 the function being called. NAMED is nonzero if this argument is a
6972 named parameter (otherwise it is an extra parameter matching an
6976 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6977 const_tree type, bool named)
6979 enum machine_mode mode = omode;
6980 HOST_WIDE_INT bytes, words;
6983 if (mode == BLKmode)
6984 bytes = int_size_in_bytes (type);
6986 bytes = GET_MODE_SIZE (mode);
6987 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6989 /* To simplify the code below, represent vector types with a vector mode
6990 even if MMX/SSE are not active. */
6991 if (type && TREE_CODE (type) == VECTOR_TYPE)
6992 mode = type_natural_mode (type, cum);
6994 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6995 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6996 else if (TARGET_64BIT)
6997 arg = function_arg_64 (cum, mode, omode, type, named);
6999 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7001 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7003 /* This argument uses 256bit AVX modes. */
7005 cfun->machine->callee_pass_avx256_p = true;
7007 cfun->machine->caller_pass_avx256_p = true;
7013 /* A C expression that indicates when an argument must be passed by
7014 reference. If nonzero for an argument, a copy of that argument is
7015 made in memory and a pointer to the argument is passed instead of
7016 the argument itself. The pointer is passed in whatever way is
7017 appropriate for passing a pointer to that type. */
7020 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7021 enum machine_mode mode ATTRIBUTE_UNUSED,
7022 const_tree type, bool named ATTRIBUTE_UNUSED)
7024 /* See Windows x64 Software Convention. */
7025 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7027 int msize = (int) GET_MODE_SIZE (mode);
7030 /* Arrays are passed by reference. */
7031 if (TREE_CODE (type) == ARRAY_TYPE)
7034 if (AGGREGATE_TYPE_P (type))
7036 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7037 are passed by reference. */
7038 msize = int_size_in_bytes (type);
7042 /* __m128 is passed by reference. */
7044 case 1: case 2: case 4: case 8:
7050 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7056 /* Return true when TYPE should be 128bit aligned for 32bit argument
7057 passing ABI. XXX: This function is obsolete and is only used for
7058 checking psABI compatibility with previous versions of GCC. */
7061 ix86_compat_aligned_value_p (const_tree type)
7063 enum machine_mode mode = TYPE_MODE (type);
7064 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7068 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7070 if (TYPE_ALIGN (type) < 128)
7073 if (AGGREGATE_TYPE_P (type))
7075 /* Walk the aggregates recursively. */
7076 switch (TREE_CODE (type))
7080 case QUAL_UNION_TYPE:
7084 /* Walk all the structure fields. */
7085 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7087 if (TREE_CODE (field) == FIELD_DECL
7088 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7095 /* Just for use if some languages passes arrays by value. */
7096 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7107 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7108 XXX: This function is obsolete and is only used for checking psABI
7109 compatibility with previous versions of GCC. */
7112 ix86_compat_function_arg_boundary (enum machine_mode mode,
7113 const_tree type, unsigned int align)
7115 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7116 natural boundaries. */
7117 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7119 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7120 make an exception for SSE modes since these require 128bit
7123 The handling here differs from field_alignment. ICC aligns MMX
7124 arguments to 4 byte boundaries, while structure fields are aligned
7125 to 8 byte boundaries. */
7128 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7129 align = PARM_BOUNDARY;
7133 if (!ix86_compat_aligned_value_p (type))
7134 align = PARM_BOUNDARY;
7137 if (align > BIGGEST_ALIGNMENT)
7138 align = BIGGEST_ALIGNMENT;
7142 /* Return true when TYPE should be 128bit aligned for 32bit argument
7146 ix86_contains_aligned_value_p (const_tree type)
7148 enum machine_mode mode = TYPE_MODE (type);
7150 if (mode == XFmode || mode == XCmode)
7153 if (TYPE_ALIGN (type) < 128)
7156 if (AGGREGATE_TYPE_P (type))
7158 /* Walk the aggregates recursively. */
7159 switch (TREE_CODE (type))
7163 case QUAL_UNION_TYPE:
7167 /* Walk all the structure fields. */
7168 for (field = TYPE_FIELDS (type);
7170 field = DECL_CHAIN (field))
7172 if (TREE_CODE (field) == FIELD_DECL
7173 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7180 /* Just for use if some languages passes arrays by value. */
7181 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7190 return TYPE_ALIGN (type) >= 128;
7195 /* Gives the alignment boundary, in bits, of an argument with the
7196 specified mode and type. */
7199 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7204 /* Since the main variant type is used for call, we convert it to
7205 the main variant type. */
7206 type = TYPE_MAIN_VARIANT (type);
7207 align = TYPE_ALIGN (type);
7210 align = GET_MODE_ALIGNMENT (mode);
7211 if (align < PARM_BOUNDARY)
7212 align = PARM_BOUNDARY;
7216 unsigned int saved_align = align;
7220 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7223 if (mode == XFmode || mode == XCmode)
7224 align = PARM_BOUNDARY;
7226 else if (!ix86_contains_aligned_value_p (type))
7227 align = PARM_BOUNDARY;
7230 align = PARM_BOUNDARY;
7235 && align != ix86_compat_function_arg_boundary (mode, type,
7239 inform (input_location,
7240 "The ABI for passing parameters with %d-byte"
7241 " alignment has changed in GCC 4.6",
7242 align / BITS_PER_UNIT);
7249 /* Return true if N is a possible register number of function value. */
7252 ix86_function_value_regno_p (const unsigned int regno)
7259 case FIRST_FLOAT_REG:
7260 /* TODO: The function should depend on current function ABI but
7261 builtins.c would need updating then. Therefore we use the
7263 if (TARGET_64BIT && ix86_abi == MS_ABI)
7265 return TARGET_FLOAT_RETURNS_IN_80387;
7271 if (TARGET_MACHO || TARGET_64BIT)
7279 /* Define how to find the value returned by a function.
7280 VALTYPE is the data type of the value (as a tree).
7281 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7282 otherwise, FUNC is 0. */
7285 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7286 const_tree fntype, const_tree fn)
7290 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7291 we normally prevent this case when mmx is not available. However
7292 some ABIs may require the result to be returned like DImode. */
7293 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7294 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7296 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7297 we prevent this case when sse is not available. However some ABIs
7298 may require the result to be returned like integer TImode. */
7299 else if (mode == TImode
7300 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7301 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7303 /* 32-byte vector modes in %ymm0. */
7304 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7305 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7307 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7308 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7309 regno = FIRST_FLOAT_REG;
7311 /* Most things go in %eax. */
7314 /* Override FP return register with %xmm0 for local functions when
7315 SSE math is enabled or for functions with sseregparm attribute. */
7316 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7318 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7319 if ((sse_level >= 1 && mode == SFmode)
7320 || (sse_level == 2 && mode == DFmode))
7321 regno = FIRST_SSE_REG;
7324 /* OImode shouldn't be used directly. */
7325 gcc_assert (mode != OImode);
7327 return gen_rtx_REG (orig_mode, regno);
7331 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7336 /* Handle libcalls, which don't provide a type node. */
7337 if (valtype == NULL)
7349 return gen_rtx_REG (mode, FIRST_SSE_REG);
7352 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7356 return gen_rtx_REG (mode, AX_REG);
7360 ret = construct_container (mode, orig_mode, valtype, 1,
7361 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7362 x86_64_int_return_registers, 0);
7364 /* For zero sized structures, construct_container returns NULL, but we
7365 need to keep rest of compiler happy by returning meaningful value. */
7367 ret = gen_rtx_REG (orig_mode, AX_REG);
7373 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7375 unsigned int regno = AX_REG;
7379 switch (GET_MODE_SIZE (mode))
7382 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7383 && !COMPLEX_MODE_P (mode))
7384 regno = FIRST_SSE_REG;
7388 if (mode == SFmode || mode == DFmode)
7389 regno = FIRST_SSE_REG;
7395 return gen_rtx_REG (orig_mode, regno);
7399 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7400 enum machine_mode orig_mode, enum machine_mode mode)
7402 const_tree fn, fntype;
7405 if (fntype_or_decl && DECL_P (fntype_or_decl))
7406 fn = fntype_or_decl;
7407 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7409 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7410 return function_value_ms_64 (orig_mode, mode);
7411 else if (TARGET_64BIT)
7412 return function_value_64 (orig_mode, mode, valtype);
7414 return function_value_32 (orig_mode, mode, fntype, fn);
7418 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7419 bool outgoing ATTRIBUTE_UNUSED)
7421 enum machine_mode mode, orig_mode;
7423 orig_mode = TYPE_MODE (valtype);
7424 mode = type_natural_mode (valtype, NULL);
7425 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7429 ix86_libcall_value (enum machine_mode mode)
7431 return ix86_function_value_1 (NULL, NULL, mode, mode);
7434 /* Return true iff type is returned in memory. */
7436 static bool ATTRIBUTE_UNUSED
7437 return_in_memory_32 (const_tree type, enum machine_mode mode)
7441 if (mode == BLKmode)
7444 size = int_size_in_bytes (type);
7446 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7449 if (VECTOR_MODE_P (mode) || mode == TImode)
7451 /* User-created vectors small enough to fit in EAX. */
7455 /* MMX/3dNow values are returned in MM0,
7456 except when it doesn't exits or the ABI prescribes otherwise. */
7458 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7460 /* SSE values are returned in XMM0, except when it doesn't exist. */
7464 /* AVX values are returned in YMM0, except when it doesn't exist. */
7475 /* OImode shouldn't be used directly. */
7476 gcc_assert (mode != OImode);
7481 static bool ATTRIBUTE_UNUSED
7482 return_in_memory_64 (const_tree type, enum machine_mode mode)
7484 int needed_intregs, needed_sseregs;
7485 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7488 static bool ATTRIBUTE_UNUSED
7489 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7491 HOST_WIDE_INT size = int_size_in_bytes (type);
7493 /* __m128 is returned in xmm0. */
7494 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7495 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7498 /* Otherwise, the size must be exactly in [1248]. */
7499 return size != 1 && size != 2 && size != 4 && size != 8;
7503 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7505 #ifdef SUBTARGET_RETURN_IN_MEMORY
7506 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7508 const enum machine_mode mode = type_natural_mode (type, NULL);
7512 if (ix86_function_type_abi (fntype) == MS_ABI)
7513 return return_in_memory_ms_64 (type, mode);
7515 return return_in_memory_64 (type, mode);
7518 return return_in_memory_32 (type, mode);
7522 /* When returning SSE vector types, we have a choice of either
7523 (1) being abi incompatible with a -march switch, or
7524 (2) generating an error.
7525 Given no good solution, I think the safest thing is one warning.
7526 The user won't be able to use -Werror, but....
7528 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7529 called in response to actually generating a caller or callee that
7530 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7531 via aggregate_value_p for general type probing from tree-ssa. */
7534 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7536 static bool warnedsse, warnedmmx;
7538 if (!TARGET_64BIT && type)
7540 /* Look at the return type of the function, not the function type. */
7541 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7543 if (!TARGET_SSE && !warnedsse)
7546 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7549 warning (0, "SSE vector return without SSE enabled "
7554 if (!TARGET_MMX && !warnedmmx)
7556 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7559 warning (0, "MMX vector return without MMX enabled "
7569 /* Create the va_list data type. */
7571 /* Returns the calling convention specific va_list date type.
7572 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7575 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7577 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7579 /* For i386 we use plain pointer to argument area. */
7580 if (!TARGET_64BIT || abi == MS_ABI)
7581 return build_pointer_type (char_type_node);
7583 record = lang_hooks.types.make_type (RECORD_TYPE);
7584 type_decl = build_decl (BUILTINS_LOCATION,
7585 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7587 f_gpr = build_decl (BUILTINS_LOCATION,
7588 FIELD_DECL, get_identifier ("gp_offset"),
7589 unsigned_type_node);
7590 f_fpr = build_decl (BUILTINS_LOCATION,
7591 FIELD_DECL, get_identifier ("fp_offset"),
7592 unsigned_type_node);
7593 f_ovf = build_decl (BUILTINS_LOCATION,
7594 FIELD_DECL, get_identifier ("overflow_arg_area"),
7596 f_sav = build_decl (BUILTINS_LOCATION,
7597 FIELD_DECL, get_identifier ("reg_save_area"),
7600 va_list_gpr_counter_field = f_gpr;
7601 va_list_fpr_counter_field = f_fpr;
7603 DECL_FIELD_CONTEXT (f_gpr) = record;
7604 DECL_FIELD_CONTEXT (f_fpr) = record;
7605 DECL_FIELD_CONTEXT (f_ovf) = record;
7606 DECL_FIELD_CONTEXT (f_sav) = record;
7608 TYPE_STUB_DECL (record) = type_decl;
7609 TYPE_NAME (record) = type_decl;
7610 TYPE_FIELDS (record) = f_gpr;
7611 DECL_CHAIN (f_gpr) = f_fpr;
7612 DECL_CHAIN (f_fpr) = f_ovf;
7613 DECL_CHAIN (f_ovf) = f_sav;
7615 layout_type (record);
7617 /* The correct type is an array type of one element. */
7618 return build_array_type (record, build_index_type (size_zero_node));
7621 /* Setup the builtin va_list data type and for 64-bit the additional
7622 calling convention specific va_list data types. */
7625 ix86_build_builtin_va_list (void)
7627 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7629 /* Initialize abi specific va_list builtin types. */
7633 if (ix86_abi == MS_ABI)
7635 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7636 if (TREE_CODE (t) != RECORD_TYPE)
7637 t = build_variant_type_copy (t);
7638 sysv_va_list_type_node = t;
7643 if (TREE_CODE (t) != RECORD_TYPE)
7644 t = build_variant_type_copy (t);
7645 sysv_va_list_type_node = t;
7647 if (ix86_abi != MS_ABI)
7649 t = ix86_build_builtin_va_list_abi (MS_ABI);
7650 if (TREE_CODE (t) != RECORD_TYPE)
7651 t = build_variant_type_copy (t);
7652 ms_va_list_type_node = t;
7657 if (TREE_CODE (t) != RECORD_TYPE)
7658 t = build_variant_type_copy (t);
7659 ms_va_list_type_node = t;
7666 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7669 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7675 /* GPR size of varargs save area. */
7676 if (cfun->va_list_gpr_size)
7677 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7679 ix86_varargs_gpr_size = 0;
7681 /* FPR size of varargs save area. We don't need it if we don't pass
7682 anything in SSE registers. */
7683 if (TARGET_SSE && cfun->va_list_fpr_size)
7684 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7686 ix86_varargs_fpr_size = 0;
7688 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7691 save_area = frame_pointer_rtx;
7692 set = get_varargs_alias_set ();
7694 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7695 if (max > X86_64_REGPARM_MAX)
7696 max = X86_64_REGPARM_MAX;
7698 for (i = cum->regno; i < max; i++)
7700 mem = gen_rtx_MEM (Pmode,
7701 plus_constant (save_area, i * UNITS_PER_WORD));
7702 MEM_NOTRAP_P (mem) = 1;
7703 set_mem_alias_set (mem, set);
7704 emit_move_insn (mem, gen_rtx_REG (Pmode,
7705 x86_64_int_parameter_registers[i]));
7708 if (ix86_varargs_fpr_size)
7710 enum machine_mode smode;
7713 /* Now emit code to save SSE registers. The AX parameter contains number
7714 of SSE parameter registers used to call this function, though all we
7715 actually check here is the zero/non-zero status. */
7717 label = gen_label_rtx ();
7718 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7719 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7722 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7723 we used movdqa (i.e. TImode) instead? Perhaps even better would
7724 be if we could determine the real mode of the data, via a hook
7725 into pass_stdarg. Ignore all that for now. */
7727 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7728 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7730 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7731 if (max > X86_64_SSE_REGPARM_MAX)
7732 max = X86_64_SSE_REGPARM_MAX;
7734 for (i = cum->sse_regno; i < max; ++i)
7736 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7737 mem = gen_rtx_MEM (smode, mem);
7738 MEM_NOTRAP_P (mem) = 1;
7739 set_mem_alias_set (mem, set);
7740 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7742 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7750 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7752 alias_set_type set = get_varargs_alias_set ();
7755 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7759 mem = gen_rtx_MEM (Pmode,
7760 plus_constant (virtual_incoming_args_rtx,
7761 i * UNITS_PER_WORD));
7762 MEM_NOTRAP_P (mem) = 1;
7763 set_mem_alias_set (mem, set);
7765 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7766 emit_move_insn (mem, reg);
7771 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7772 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7775 CUMULATIVE_ARGS next_cum;
7778 /* This argument doesn't appear to be used anymore. Which is good,
7779 because the old code here didn't suppress rtl generation. */
7780 gcc_assert (!no_rtl);
7785 fntype = TREE_TYPE (current_function_decl);
7787 /* For varargs, we do not want to skip the dummy va_dcl argument.
7788 For stdargs, we do want to skip the last named argument. */
7790 if (stdarg_p (fntype))
7791 ix86_function_arg_advance (&next_cum, mode, type, true);
7793 if (cum->call_abi == MS_ABI)
7794 setup_incoming_varargs_ms_64 (&next_cum);
7796 setup_incoming_varargs_64 (&next_cum);
7799 /* Checks if TYPE is of kind va_list char *. */
7802 is_va_list_char_pointer (tree type)
7806 /* For 32-bit it is always true. */
7809 canonic = ix86_canonical_va_list_type (type);
7810 return (canonic == ms_va_list_type_node
7811 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7814 /* Implement va_start. */
7817 ix86_va_start (tree valist, rtx nextarg)
7819 HOST_WIDE_INT words, n_gpr, n_fpr;
7820 tree f_gpr, f_fpr, f_ovf, f_sav;
7821 tree gpr, fpr, ovf, sav, t;
7825 if (flag_split_stack
7826 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7828 unsigned int scratch_regno;
7830 /* When we are splitting the stack, we can't refer to the stack
7831 arguments using internal_arg_pointer, because they may be on
7832 the old stack. The split stack prologue will arrange to
7833 leave a pointer to the old stack arguments in a scratch
7834 register, which we here copy to a pseudo-register. The split
7835 stack prologue can't set the pseudo-register directly because
7836 it (the prologue) runs before any registers have been saved. */
7838 scratch_regno = split_stack_prologue_scratch_regno ();
7839 if (scratch_regno != INVALID_REGNUM)
7843 reg = gen_reg_rtx (Pmode);
7844 cfun->machine->split_stack_varargs_pointer = reg;
7847 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7851 push_topmost_sequence ();
7852 emit_insn_after (seq, entry_of_function ());
7853 pop_topmost_sequence ();
7857 /* Only 64bit target needs something special. */
7858 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7860 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7861 std_expand_builtin_va_start (valist, nextarg);
7866 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7867 next = expand_binop (ptr_mode, add_optab,
7868 cfun->machine->split_stack_varargs_pointer,
7869 crtl->args.arg_offset_rtx,
7870 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7871 convert_move (va_r, next, 0);
7876 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7877 f_fpr = DECL_CHAIN (f_gpr);
7878 f_ovf = DECL_CHAIN (f_fpr);
7879 f_sav = DECL_CHAIN (f_ovf);
7881 valist = build_simple_mem_ref (valist);
7882 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7883 /* The following should be folded into the MEM_REF offset. */
7884 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7886 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7888 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7890 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7893 /* Count number of gp and fp argument registers used. */
7894 words = crtl->args.info.words;
7895 n_gpr = crtl->args.info.regno;
7896 n_fpr = crtl->args.info.sse_regno;
7898 if (cfun->va_list_gpr_size)
7900 type = TREE_TYPE (gpr);
7901 t = build2 (MODIFY_EXPR, type,
7902 gpr, build_int_cst (type, n_gpr * 8));
7903 TREE_SIDE_EFFECTS (t) = 1;
7904 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7907 if (TARGET_SSE && cfun->va_list_fpr_size)
7909 type = TREE_TYPE (fpr);
7910 t = build2 (MODIFY_EXPR, type, fpr,
7911 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7912 TREE_SIDE_EFFECTS (t) = 1;
7913 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7916 /* Find the overflow area. */
7917 type = TREE_TYPE (ovf);
7918 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7919 ovf_rtx = crtl->args.internal_arg_pointer;
7921 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7922 t = make_tree (type, ovf_rtx);
7924 t = build2 (POINTER_PLUS_EXPR, type, t,
7925 size_int (words * UNITS_PER_WORD));
7926 t = build2 (MODIFY_EXPR, type, ovf, t);
7927 TREE_SIDE_EFFECTS (t) = 1;
7928 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7930 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7932 /* Find the register save area.
7933 Prologue of the function save it right above stack frame. */
7934 type = TREE_TYPE (sav);
7935 t = make_tree (type, frame_pointer_rtx);
7936 if (!ix86_varargs_gpr_size)
7937 t = build2 (POINTER_PLUS_EXPR, type, t,
7938 size_int (-8 * X86_64_REGPARM_MAX));
7939 t = build2 (MODIFY_EXPR, type, sav, t);
7940 TREE_SIDE_EFFECTS (t) = 1;
7941 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7945 /* Implement va_arg. */
7948 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7951 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7952 tree f_gpr, f_fpr, f_ovf, f_sav;
7953 tree gpr, fpr, ovf, sav, t;
7955 tree lab_false, lab_over = NULL_TREE;
7960 enum machine_mode nat_mode;
7961 unsigned int arg_boundary;
7963 /* Only 64bit target needs something special. */
7964 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7965 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7967 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7968 f_fpr = DECL_CHAIN (f_gpr);
7969 f_ovf = DECL_CHAIN (f_fpr);
7970 f_sav = DECL_CHAIN (f_ovf);
7972 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7973 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7974 valist = build_va_arg_indirect_ref (valist);
7975 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7976 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7977 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7979 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7981 type = build_pointer_type (type);
7982 size = int_size_in_bytes (type);
7983 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7985 nat_mode = type_natural_mode (type, NULL);
7994 /* Unnamed 256bit vector mode parameters are passed on stack. */
7995 if (ix86_cfun_abi () == SYSV_ABI)
8002 container = construct_container (nat_mode, TYPE_MODE (type),
8003 type, 0, X86_64_REGPARM_MAX,
8004 X86_64_SSE_REGPARM_MAX, intreg,
8009 /* Pull the value out of the saved registers. */
8011 addr = create_tmp_var (ptr_type_node, "addr");
8015 int needed_intregs, needed_sseregs;
8017 tree int_addr, sse_addr;
8019 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8020 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8022 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8024 need_temp = (!REG_P (container)
8025 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8026 || TYPE_ALIGN (type) > 128));
8028 /* In case we are passing structure, verify that it is consecutive block
8029 on the register save area. If not we need to do moves. */
8030 if (!need_temp && !REG_P (container))
8032 /* Verify that all registers are strictly consecutive */
8033 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8037 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8039 rtx slot = XVECEXP (container, 0, i);
8040 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8041 || INTVAL (XEXP (slot, 1)) != i * 16)
8049 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8051 rtx slot = XVECEXP (container, 0, i);
8052 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8053 || INTVAL (XEXP (slot, 1)) != i * 8)
8065 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8066 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8069 /* First ensure that we fit completely in registers. */
8072 t = build_int_cst (TREE_TYPE (gpr),
8073 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8074 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8075 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8076 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8077 gimplify_and_add (t, pre_p);
8081 t = build_int_cst (TREE_TYPE (fpr),
8082 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8083 + X86_64_REGPARM_MAX * 8);
8084 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8085 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8086 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8087 gimplify_and_add (t, pre_p);
8090 /* Compute index to start of area used for integer regs. */
8093 /* int_addr = gpr + sav; */
8094 t = fold_convert (sizetype, gpr);
8095 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8096 gimplify_assign (int_addr, t, pre_p);
8100 /* sse_addr = fpr + sav; */
8101 t = fold_convert (sizetype, fpr);
8102 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8103 gimplify_assign (sse_addr, t, pre_p);
8107 int i, prev_size = 0;
8108 tree temp = create_tmp_var (type, "va_arg_tmp");
8111 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8112 gimplify_assign (addr, t, pre_p);
8114 for (i = 0; i < XVECLEN (container, 0); i++)
8116 rtx slot = XVECEXP (container, 0, i);
8117 rtx reg = XEXP (slot, 0);
8118 enum machine_mode mode = GET_MODE (reg);
8124 tree dest_addr, dest;
8125 int cur_size = GET_MODE_SIZE (mode);
8127 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8128 prev_size = INTVAL (XEXP (slot, 1));
8129 if (prev_size + cur_size > size)
8131 cur_size = size - prev_size;
8132 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8133 if (mode == BLKmode)
8136 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8137 if (mode == GET_MODE (reg))
8138 addr_type = build_pointer_type (piece_type);
8140 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8142 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8145 if (SSE_REGNO_P (REGNO (reg)))
8147 src_addr = sse_addr;
8148 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8152 src_addr = int_addr;
8153 src_offset = REGNO (reg) * 8;
8155 src_addr = fold_convert (addr_type, src_addr);
8156 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8157 size_int (src_offset));
8159 dest_addr = fold_convert (daddr_type, addr);
8160 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8161 size_int (prev_size));
8162 if (cur_size == GET_MODE_SIZE (mode))
8164 src = build_va_arg_indirect_ref (src_addr);
8165 dest = build_va_arg_indirect_ref (dest_addr);
8167 gimplify_assign (dest, src, pre_p);
8172 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8173 3, dest_addr, src_addr,
8174 size_int (cur_size));
8175 gimplify_and_add (copy, pre_p);
8177 prev_size += cur_size;
8183 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8184 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8185 gimplify_assign (gpr, t, pre_p);
8190 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8191 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8192 gimplify_assign (fpr, t, pre_p);
8195 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8197 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8200 /* ... otherwise out of the overflow area. */
8202 /* When we align parameter on stack for caller, if the parameter
8203 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8204 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8205 here with caller. */
8206 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8207 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8208 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8210 /* Care for on-stack alignment if needed. */
8211 if (arg_boundary <= 64 || size == 0)
8215 HOST_WIDE_INT align = arg_boundary / 8;
8216 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8217 size_int (align - 1));
8218 t = fold_convert (sizetype, t);
8219 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8221 t = fold_convert (TREE_TYPE (ovf), t);
8224 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8225 gimplify_assign (addr, t, pre_p);
8227 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8228 size_int (rsize * UNITS_PER_WORD));
8229 gimplify_assign (unshare_expr (ovf), t, pre_p);
8232 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8234 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8235 addr = fold_convert (ptrtype, addr);
8238 addr = build_va_arg_indirect_ref (addr);
8239 return build_va_arg_indirect_ref (addr);
8242 /* Return true if OPNUM's MEM should be matched
8243 in movabs* patterns. */
8246 ix86_check_movabs (rtx insn, int opnum)
8250 set = PATTERN (insn);
8251 if (GET_CODE (set) == PARALLEL)
8252 set = XVECEXP (set, 0, 0);
8253 gcc_assert (GET_CODE (set) == SET);
8254 mem = XEXP (set, opnum);
8255 while (GET_CODE (mem) == SUBREG)
8256 mem = SUBREG_REG (mem);
8257 gcc_assert (MEM_P (mem));
8258 return volatile_ok || !MEM_VOLATILE_P (mem);
8261 /* Initialize the table of extra 80387 mathematical constants. */
8264 init_ext_80387_constants (void)
8266 static const char * cst[5] =
8268 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8269 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8270 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8271 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8272 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8276 for (i = 0; i < 5; i++)
8278 real_from_string (&ext_80387_constants_table[i], cst[i]);
8279 /* Ensure each constant is rounded to XFmode precision. */
8280 real_convert (&ext_80387_constants_table[i],
8281 XFmode, &ext_80387_constants_table[i]);
8284 ext_80387_constants_init = 1;
8287 /* Return non-zero if the constant is something that
8288 can be loaded with a special instruction. */
8291 standard_80387_constant_p (rtx x)
8293 enum machine_mode mode = GET_MODE (x);
8297 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8300 if (x == CONST0_RTX (mode))
8302 if (x == CONST1_RTX (mode))
8305 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8307 /* For XFmode constants, try to find a special 80387 instruction when
8308 optimizing for size or on those CPUs that benefit from them. */
8310 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8314 if (! ext_80387_constants_init)
8315 init_ext_80387_constants ();
8317 for (i = 0; i < 5; i++)
8318 if (real_identical (&r, &ext_80387_constants_table[i]))
8322 /* Load of the constant -0.0 or -1.0 will be split as
8323 fldz;fchs or fld1;fchs sequence. */
8324 if (real_isnegzero (&r))
8326 if (real_identical (&r, &dconstm1))
8332 /* Return the opcode of the special instruction to be used to load
8336 standard_80387_constant_opcode (rtx x)
8338 switch (standard_80387_constant_p (x))
8362 /* Return the CONST_DOUBLE representing the 80387 constant that is
8363 loaded by the specified special instruction. The argument IDX
8364 matches the return value from standard_80387_constant_p. */
8367 standard_80387_constant_rtx (int idx)
8371 if (! ext_80387_constants_init)
8372 init_ext_80387_constants ();
8388 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8392 /* Return 1 if X is all 0s and 2 if x is all 1s
8393 in supported SSE vector mode. */
8396 standard_sse_constant_p (rtx x)
8398 enum machine_mode mode = GET_MODE (x);
8400 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8402 if (vector_all_ones_operand (x, mode))
8418 /* Return the opcode of the special instruction to be used to load
8422 standard_sse_constant_opcode (rtx insn, rtx x)
8424 switch (standard_sse_constant_p (x))
8427 switch (get_attr_mode (insn))
8430 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8432 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8433 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8435 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8437 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8438 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8440 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8442 return "vxorps\t%x0, %x0, %x0";
8444 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8445 return "vxorps\t%x0, %x0, %x0";
8447 return "vxorpd\t%x0, %x0, %x0";
8449 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8450 return "vxorps\t%x0, %x0, %x0";
8452 return "vpxor\t%x0, %x0, %x0";
8457 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8464 /* Returns true if OP contains a symbol reference */
8467 symbolic_reference_mentioned_p (rtx op)
8472 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8475 fmt = GET_RTX_FORMAT (GET_CODE (op));
8476 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8482 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8483 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8487 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8494 /* Return true if it is appropriate to emit `ret' instructions in the
8495 body of a function. Do this only if the epilogue is simple, needing a
8496 couple of insns. Prior to reloading, we can't tell how many registers
8497 must be saved, so return false then. Return false if there is no frame
8498 marker to de-allocate. */
8501 ix86_can_use_return_insn_p (void)
8503 struct ix86_frame frame;
8505 if (! reload_completed || frame_pointer_needed)
8508 /* Don't allow more than 32k pop, since that's all we can do
8509 with one instruction. */
8510 if (crtl->args.pops_args && crtl->args.size >= 32768)
8513 ix86_compute_frame_layout (&frame);
8514 return (frame.stack_pointer_offset == UNITS_PER_WORD
8515 && (frame.nregs + frame.nsseregs) == 0);
8518 /* Value should be nonzero if functions must have frame pointers.
8519 Zero means the frame pointer need not be set up (and parms may
8520 be accessed via the stack pointer) in functions that seem suitable. */
8523 ix86_frame_pointer_required (void)
8525 /* If we accessed previous frames, then the generated code expects
8526 to be able to access the saved ebp value in our frame. */
8527 if (cfun->machine->accesses_prev_frame)
8530 /* Several x86 os'es need a frame pointer for other reasons,
8531 usually pertaining to setjmp. */
8532 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8535 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8536 turns off the frame pointer by default. Turn it back on now if
8537 we've not got a leaf function. */
8538 if (TARGET_OMIT_LEAF_FRAME_POINTER
8539 && (!current_function_is_leaf
8540 || ix86_current_function_calls_tls_descriptor))
8543 if (crtl->profile && !flag_fentry)
8549 /* Record that the current function accesses previous call frames. */
8552 ix86_setup_frame_addresses (void)
8554 cfun->machine->accesses_prev_frame = 1;
8557 #ifndef USE_HIDDEN_LINKONCE
8558 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8559 # define USE_HIDDEN_LINKONCE 1
8561 # define USE_HIDDEN_LINKONCE 0
8565 static int pic_labels_used;
8567 /* Fills in the label name that should be used for a pc thunk for
8568 the given register. */
8571 get_pc_thunk_name (char name[32], unsigned int regno)
8573 gcc_assert (!TARGET_64BIT);
8575 if (USE_HIDDEN_LINKONCE)
8576 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8578 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8582 /* This function generates code for -fpic that loads %ebx with
8583 the return address of the caller and then returns. */
8586 ix86_code_end (void)
8591 for (regno = AX_REG; regno <= SP_REG; regno++)
8596 if (!(pic_labels_used & (1 << regno)))
8599 get_pc_thunk_name (name, regno);
8601 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8602 get_identifier (name),
8603 build_function_type (void_type_node, void_list_node));
8604 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8605 NULL_TREE, void_type_node);
8606 TREE_PUBLIC (decl) = 1;
8607 TREE_STATIC (decl) = 1;
8612 switch_to_section (darwin_sections[text_coal_section]);
8613 fputs ("\t.weak_definition\t", asm_out_file);
8614 assemble_name (asm_out_file, name);
8615 fputs ("\n\t.private_extern\t", asm_out_file);
8616 assemble_name (asm_out_file, name);
8617 putc ('\n', asm_out_file);
8618 ASM_OUTPUT_LABEL (asm_out_file, name);
8619 DECL_WEAK (decl) = 1;
8623 if (USE_HIDDEN_LINKONCE)
8625 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8627 targetm.asm_out.unique_section (decl, 0);
8628 switch_to_section (get_named_section (decl, NULL, 0));
8630 targetm.asm_out.globalize_label (asm_out_file, name);
8631 fputs ("\t.hidden\t", asm_out_file);
8632 assemble_name (asm_out_file, name);
8633 putc ('\n', asm_out_file);
8634 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8638 switch_to_section (text_section);
8639 ASM_OUTPUT_LABEL (asm_out_file, name);
8642 DECL_INITIAL (decl) = make_node (BLOCK);
8643 current_function_decl = decl;
8644 init_function_start (decl);
8645 first_function_block_is_cold = false;
8646 /* Make sure unwind info is emitted for the thunk if needed. */
8647 final_start_function (emit_barrier (), asm_out_file, 1);
8649 /* Pad stack IP move with 4 instructions (two NOPs count
8650 as one instruction). */
8651 if (TARGET_PAD_SHORT_FUNCTION)
8656 fputs ("\tnop\n", asm_out_file);
8659 xops[0] = gen_rtx_REG (Pmode, regno);
8660 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8661 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8662 fputs ("\tret\n", asm_out_file);
8663 final_end_function ();
8664 init_insn_lengths ();
8665 free_after_compilation (cfun);
8667 current_function_decl = NULL;
8670 if (flag_split_stack)
8671 file_end_indicate_split_stack ();
8674 /* Emit code for the SET_GOT patterns. */
8677 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8683 if (TARGET_VXWORKS_RTP && flag_pic)
8685 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8686 xops[2] = gen_rtx_MEM (Pmode,
8687 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8688 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8690 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8691 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8692 an unadorned address. */
8693 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8694 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8695 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8699 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8701 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8703 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8706 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8709 output_asm_insn ("call\t%a2", xops);
8710 #ifdef DWARF2_UNWIND_INFO
8711 /* The call to next label acts as a push. */
8712 if (dwarf2out_do_frame ())
8716 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8717 gen_rtx_PLUS (Pmode,
8720 RTX_FRAME_RELATED_P (insn) = 1;
8721 dwarf2out_frame_debug (insn, true);
8728 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8729 is what will be referenced by the Mach-O PIC subsystem. */
8731 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8734 targetm.asm_out.internal_label (asm_out_file, "L",
8735 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8739 output_asm_insn ("pop%z0\t%0", xops);
8740 #ifdef DWARF2_UNWIND_INFO
8741 /* The pop is a pop and clobbers dest, but doesn't restore it
8742 for unwind info purposes. */
8743 if (dwarf2out_do_frame ())
8747 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8748 dwarf2out_frame_debug (insn, true);
8749 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8750 gen_rtx_PLUS (Pmode,
8753 RTX_FRAME_RELATED_P (insn) = 1;
8754 dwarf2out_frame_debug (insn, true);
8763 get_pc_thunk_name (name, REGNO (dest));
8764 pic_labels_used |= 1 << REGNO (dest);
8766 #ifdef DWARF2_UNWIND_INFO
8767 /* Ensure all queued register saves are flushed before the
8769 if (dwarf2out_do_frame ())
8770 dwarf2out_flush_queued_reg_saves ();
8772 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8773 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8774 output_asm_insn ("call\t%X2", xops);
8775 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8776 is what will be referenced by the Mach-O PIC subsystem. */
8779 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8781 targetm.asm_out.internal_label (asm_out_file, "L",
8782 CODE_LABEL_NUMBER (label));
8789 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8790 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8792 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8797 /* Generate an "push" pattern for input ARG. */
8802 struct machine_function *m = cfun->machine;
8804 if (m->fs.cfa_reg == stack_pointer_rtx)
8805 m->fs.cfa_offset += UNITS_PER_WORD;
8806 m->fs.sp_offset += UNITS_PER_WORD;
8808 return gen_rtx_SET (VOIDmode,
8810 gen_rtx_PRE_DEC (Pmode,
8811 stack_pointer_rtx)),
8815 /* Generate an "pop" pattern for input ARG. */
8820 return gen_rtx_SET (VOIDmode,
8823 gen_rtx_POST_INC (Pmode,
8824 stack_pointer_rtx)));
8827 /* Return >= 0 if there is an unused call-clobbered register available
8828 for the entire function. */
8831 ix86_select_alt_pic_regnum (void)
8833 if (current_function_is_leaf
8835 && !ix86_current_function_calls_tls_descriptor)
8838 /* Can't use the same register for both PIC and DRAP. */
8840 drap = REGNO (crtl->drap_reg);
8843 for (i = 2; i >= 0; --i)
8844 if (i != drap && !df_regs_ever_live_p (i))
8848 return INVALID_REGNUM;
8851 /* Return 1 if we need to save REGNO. */
8853 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8855 if (pic_offset_table_rtx
8856 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8857 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8859 || crtl->calls_eh_return
8860 || crtl->uses_const_pool))
8862 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8867 if (crtl->calls_eh_return && maybe_eh_return)
8872 unsigned test = EH_RETURN_DATA_REGNO (i);
8873 if (test == INVALID_REGNUM)
8880 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8883 return (df_regs_ever_live_p (regno)
8884 && !call_used_regs[regno]
8885 && !fixed_regs[regno]
8886 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8889 /* Return number of saved general prupose registers. */
8892 ix86_nsaved_regs (void)
8897 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8898 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8903 /* Return number of saved SSE registrers. */
8906 ix86_nsaved_sseregs (void)
8911 if (ix86_cfun_abi () != MS_ABI)
8913 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8914 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8919 /* Given FROM and TO register numbers, say whether this elimination is
8920 allowed. If stack alignment is needed, we can only replace argument
8921 pointer with hard frame pointer, or replace frame pointer with stack
8922 pointer. Otherwise, frame pointer elimination is automatically
8923 handled and all other eliminations are valid. */
8926 ix86_can_eliminate (const int from, const int to)
8928 if (stack_realign_fp)
8929 return ((from == ARG_POINTER_REGNUM
8930 && to == HARD_FRAME_POINTER_REGNUM)
8931 || (from == FRAME_POINTER_REGNUM
8932 && to == STACK_POINTER_REGNUM));
8934 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8937 /* Return the offset between two registers, one to be eliminated, and the other
8938 its replacement, at the start of a routine. */
8941 ix86_initial_elimination_offset (int from, int to)
8943 struct ix86_frame frame;
8944 ix86_compute_frame_layout (&frame);
8946 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8947 return frame.hard_frame_pointer_offset;
8948 else if (from == FRAME_POINTER_REGNUM
8949 && to == HARD_FRAME_POINTER_REGNUM)
8950 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8953 gcc_assert (to == STACK_POINTER_REGNUM);
8955 if (from == ARG_POINTER_REGNUM)
8956 return frame.stack_pointer_offset;
8958 gcc_assert (from == FRAME_POINTER_REGNUM);
8959 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8963 /* In a dynamically-aligned function, we can't know the offset from
8964 stack pointer to frame pointer, so we must ensure that setjmp
8965 eliminates fp against the hard fp (%ebp) rather than trying to
8966 index from %esp up to the top of the frame across a gap that is
8967 of unknown (at compile-time) size. */
8969 ix86_builtin_setjmp_frame_value (void)
8971 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8974 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8975 field in the TCB, so they can not be used together. */
8978 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
8979 struct gcc_options *opts ATTRIBUTE_UNUSED)
8983 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8985 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8988 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8991 error ("%<-fsplit-stack%> requires "
8992 "assembler support for CFI directives");
9000 /* When using -fsplit-stack, the allocation routines set a field in
9001 the TCB to the bottom of the stack plus this much space, measured
9004 #define SPLIT_STACK_AVAILABLE 256
9006 /* Fill structure ix86_frame about frame of currently computed function. */
9009 ix86_compute_frame_layout (struct ix86_frame *frame)
9011 unsigned int stack_alignment_needed;
9012 HOST_WIDE_INT offset;
9013 unsigned int preferred_alignment;
9014 HOST_WIDE_INT size = get_frame_size ();
9015 HOST_WIDE_INT to_allocate;
9017 frame->nregs = ix86_nsaved_regs ();
9018 frame->nsseregs = ix86_nsaved_sseregs ();
9020 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9021 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9023 /* MS ABI seem to require stack alignment to be always 16 except for function
9024 prologues and leaf. */
9025 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
9026 && (!current_function_is_leaf || cfun->calls_alloca != 0
9027 || ix86_current_function_calls_tls_descriptor))
9029 preferred_alignment = 16;
9030 stack_alignment_needed = 16;
9031 crtl->preferred_stack_boundary = 128;
9032 crtl->stack_alignment_needed = 128;
9035 gcc_assert (!size || stack_alignment_needed);
9036 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9037 gcc_assert (preferred_alignment <= stack_alignment_needed);
9039 /* For SEH we have to limit the amount of code movement into the prologue.
9040 At present we do this via a BLOCKAGE, at which point there's very little
9041 scheduling that can be done, which means that there's very little point
9042 in doing anything except PUSHs. */
9044 cfun->machine->use_fast_prologue_epilogue = false;
9046 /* During reload iteration the amount of registers saved can change.
9047 Recompute the value as needed. Do not recompute when amount of registers
9048 didn't change as reload does multiple calls to the function and does not
9049 expect the decision to change within single iteration. */
9050 else if (!optimize_function_for_size_p (cfun)
9051 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9053 int count = frame->nregs;
9054 struct cgraph_node *node = cgraph_node (current_function_decl);
9056 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9058 /* The fast prologue uses move instead of push to save registers. This
9059 is significantly longer, but also executes faster as modern hardware
9060 can execute the moves in parallel, but can't do that for push/pop.
9062 Be careful about choosing what prologue to emit: When function takes
9063 many instructions to execute we may use slow version as well as in
9064 case function is known to be outside hot spot (this is known with
9065 feedback only). Weight the size of function by number of registers
9066 to save as it is cheap to use one or two push instructions but very
9067 slow to use many of them. */
9069 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9070 if (node->frequency < NODE_FREQUENCY_NORMAL
9071 || (flag_branch_probabilities
9072 && node->frequency < NODE_FREQUENCY_HOT))
9073 cfun->machine->use_fast_prologue_epilogue = false;
9075 cfun->machine->use_fast_prologue_epilogue
9076 = !expensive_function_p (count);
9078 if (TARGET_PROLOGUE_USING_MOVE
9079 && cfun->machine->use_fast_prologue_epilogue)
9080 frame->save_regs_using_mov = true;
9082 frame->save_regs_using_mov = false;
9084 /* If static stack checking is enabled and done with probes, the registers
9085 need to be saved before allocating the frame. */
9086 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9087 frame->save_regs_using_mov = false;
9089 /* Skip return address. */
9090 offset = UNITS_PER_WORD;
9092 /* Skip pushed static chain. */
9093 if (ix86_static_chain_on_stack)
9094 offset += UNITS_PER_WORD;
9096 /* Skip saved base pointer. */
9097 if (frame_pointer_needed)
9098 offset += UNITS_PER_WORD;
9099 frame->hfp_save_offset = offset;
9101 /* The traditional frame pointer location is at the top of the frame. */
9102 frame->hard_frame_pointer_offset = offset;
9104 /* Register save area */
9105 offset += frame->nregs * UNITS_PER_WORD;
9106 frame->reg_save_offset = offset;
9108 /* Align and set SSE register save area. */
9109 if (frame->nsseregs)
9111 /* The only ABI that has saved SSE registers (Win64) also has a
9112 16-byte aligned default stack, and thus we don't need to be
9113 within the re-aligned local stack frame to save them. */
9114 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9115 offset = (offset + 16 - 1) & -16;
9116 offset += frame->nsseregs * 16;
9118 frame->sse_reg_save_offset = offset;
9120 /* The re-aligned stack starts here. Values before this point are not
9121 directly comparable with values below this point. In order to make
9122 sure that no value happens to be the same before and after, force
9123 the alignment computation below to add a non-zero value. */
9124 if (stack_realign_fp)
9125 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9128 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9129 offset += frame->va_arg_size;
9131 /* Align start of frame for local function. */
9132 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9134 /* Frame pointer points here. */
9135 frame->frame_pointer_offset = offset;
9139 /* Add outgoing arguments area. Can be skipped if we eliminated
9140 all the function calls as dead code.
9141 Skipping is however impossible when function calls alloca. Alloca
9142 expander assumes that last crtl->outgoing_args_size
9143 of stack frame are unused. */
9144 if (ACCUMULATE_OUTGOING_ARGS
9145 && (!current_function_is_leaf || cfun->calls_alloca
9146 || ix86_current_function_calls_tls_descriptor))
9148 offset += crtl->outgoing_args_size;
9149 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9152 frame->outgoing_arguments_size = 0;
9154 /* Align stack boundary. Only needed if we're calling another function
9156 if (!current_function_is_leaf || cfun->calls_alloca
9157 || ix86_current_function_calls_tls_descriptor)
9158 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9160 /* We've reached end of stack frame. */
9161 frame->stack_pointer_offset = offset;
9163 /* Size prologue needs to allocate. */
9164 to_allocate = offset - frame->sse_reg_save_offset;
9166 if ((!to_allocate && frame->nregs <= 1)
9167 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9168 frame->save_regs_using_mov = false;
9170 if (ix86_using_red_zone ()
9171 && current_function_sp_is_unchanging
9172 && current_function_is_leaf
9173 && !ix86_current_function_calls_tls_descriptor)
9175 frame->red_zone_size = to_allocate;
9176 if (frame->save_regs_using_mov)
9177 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9178 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9179 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9182 frame->red_zone_size = 0;
9183 frame->stack_pointer_offset -= frame->red_zone_size;
9185 /* The SEH frame pointer location is near the bottom of the frame.
9186 This is enforced by the fact that the difference between the
9187 stack pointer and the frame pointer is limited to 240 bytes in
9188 the unwind data structure. */
9193 /* If we can leave the frame pointer where it is, do so. */
9194 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9195 if (diff > 240 || (diff & 15) != 0)
9197 /* Ideally we'd determine what portion of the local stack frame
9198 (within the constraint of the lowest 240) is most heavily used.
9199 But without that complication, simply bias the frame pointer
9200 by 128 bytes so as to maximize the amount of the local stack
9201 frame that is addressable with 8-bit offsets. */
9202 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9207 /* This is semi-inlined memory_address_length, but simplified
9208 since we know that we're always dealing with reg+offset, and
9209 to avoid having to create and discard all that rtl. */
9212 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9218 /* EBP and R13 cannot be encoded without an offset. */
9219 len = (regno == BP_REG || regno == R13_REG);
9221 else if (IN_RANGE (offset, -128, 127))
9224 /* ESP and R12 must be encoded with a SIB byte. */
9225 if (regno == SP_REG || regno == R12_REG)
9231 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9232 The valid base registers are taken from CFUN->MACHINE->FS. */
9235 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9237 const struct machine_function *m = cfun->machine;
9238 rtx base_reg = NULL;
9239 HOST_WIDE_INT base_offset = 0;
9241 if (m->use_fast_prologue_epilogue)
9243 /* Choose the base register most likely to allow the most scheduling
9244 opportunities. Generally FP is valid througout the function,
9245 while DRAP must be reloaded within the epilogue. But choose either
9246 over the SP due to increased encoding size. */
9250 base_reg = hard_frame_pointer_rtx;
9251 base_offset = m->fs.fp_offset - cfa_offset;
9253 else if (m->fs.drap_valid)
9255 base_reg = crtl->drap_reg;
9256 base_offset = 0 - cfa_offset;
9258 else if (m->fs.sp_valid)
9260 base_reg = stack_pointer_rtx;
9261 base_offset = m->fs.sp_offset - cfa_offset;
9266 HOST_WIDE_INT toffset;
9269 /* Choose the base register with the smallest address encoding.
9270 With a tie, choose FP > DRAP > SP. */
9273 base_reg = stack_pointer_rtx;
9274 base_offset = m->fs.sp_offset - cfa_offset;
9275 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9277 if (m->fs.drap_valid)
9279 toffset = 0 - cfa_offset;
9280 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9283 base_reg = crtl->drap_reg;
9284 base_offset = toffset;
9290 toffset = m->fs.fp_offset - cfa_offset;
9291 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9294 base_reg = hard_frame_pointer_rtx;
9295 base_offset = toffset;
9300 gcc_assert (base_reg != NULL);
9302 return plus_constant (base_reg, base_offset);
9305 /* Emit code to save registers in the prologue. */
9308 ix86_emit_save_regs (void)
9313 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9314 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9316 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9317 RTX_FRAME_RELATED_P (insn) = 1;
9321 /* Emit a single register save at CFA - CFA_OFFSET. */
9324 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9325 HOST_WIDE_INT cfa_offset)
9327 struct machine_function *m = cfun->machine;
9328 rtx reg = gen_rtx_REG (mode, regno);
9329 rtx mem, addr, base, insn;
9331 addr = choose_baseaddr (cfa_offset);
9332 mem = gen_frame_mem (mode, addr);
9334 /* For SSE saves, we need to indicate the 128-bit alignment. */
9335 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9337 insn = emit_move_insn (mem, reg);
9338 RTX_FRAME_RELATED_P (insn) = 1;
9341 if (GET_CODE (base) == PLUS)
9342 base = XEXP (base, 0);
9343 gcc_checking_assert (REG_P (base));
9345 /* When saving registers into a re-aligned local stack frame, avoid
9346 any tricky guessing by dwarf2out. */
9347 if (m->fs.realigned)
9349 gcc_checking_assert (stack_realign_drap);
9351 if (regno == REGNO (crtl->drap_reg))
9353 /* A bit of a hack. We force the DRAP register to be saved in
9354 the re-aligned stack frame, which provides us with a copy
9355 of the CFA that will last past the prologue. Install it. */
9356 gcc_checking_assert (cfun->machine->fs.fp_valid);
9357 addr = plus_constant (hard_frame_pointer_rtx,
9358 cfun->machine->fs.fp_offset - cfa_offset);
9359 mem = gen_rtx_MEM (mode, addr);
9360 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9364 /* The frame pointer is a stable reference within the
9365 aligned frame. Use it. */
9366 gcc_checking_assert (cfun->machine->fs.fp_valid);
9367 addr = plus_constant (hard_frame_pointer_rtx,
9368 cfun->machine->fs.fp_offset - cfa_offset);
9369 mem = gen_rtx_MEM (mode, addr);
9370 add_reg_note (insn, REG_CFA_EXPRESSION,
9371 gen_rtx_SET (VOIDmode, mem, reg));
9375 /* The memory may not be relative to the current CFA register,
9376 which means that we may need to generate a new pattern for
9377 use by the unwind info. */
9378 else if (base != m->fs.cfa_reg)
9380 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9381 mem = gen_rtx_MEM (mode, addr);
9382 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9386 /* Emit code to save registers using MOV insns.
9387 First register is stored at CFA - CFA_OFFSET. */
9389 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9393 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9394 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9396 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9397 cfa_offset -= UNITS_PER_WORD;
9401 /* Emit code to save SSE registers using MOV insns.
9402 First register is stored at CFA - CFA_OFFSET. */
9404 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9408 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9409 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9411 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9416 static GTY(()) rtx queued_cfa_restores;
9418 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9419 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9420 Don't add the note if the previously saved value will be left untouched
9421 within stack red-zone till return, as unwinders can find the same value
9422 in the register and on the stack. */
9425 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9427 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9432 add_reg_note (insn, REG_CFA_RESTORE, reg);
9433 RTX_FRAME_RELATED_P (insn) = 1;
9437 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9440 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9443 ix86_add_queued_cfa_restore_notes (rtx insn)
9446 if (!queued_cfa_restores)
9448 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9450 XEXP (last, 1) = REG_NOTES (insn);
9451 REG_NOTES (insn) = queued_cfa_restores;
9452 queued_cfa_restores = NULL_RTX;
9453 RTX_FRAME_RELATED_P (insn) = 1;
9456 /* Expand prologue or epilogue stack adjustment.
9457 The pattern exist to put a dependency on all ebp-based memory accesses.
9458 STYLE should be negative if instructions should be marked as frame related,
9459 zero if %r11 register is live and cannot be freely used and positive
9463 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9464 int style, bool set_cfa)
9466 struct machine_function *m = cfun->machine;
9468 bool add_frame_related_expr = false;
9471 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9472 else if (x86_64_immediate_operand (offset, DImode))
9473 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9477 /* r11 is used by indirect sibcall return as well, set before the
9478 epilogue and used after the epilogue. */
9480 tmp = gen_rtx_REG (DImode, R11_REG);
9483 gcc_assert (src != hard_frame_pointer_rtx
9484 && dest != hard_frame_pointer_rtx);
9485 tmp = hard_frame_pointer_rtx;
9487 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9489 add_frame_related_expr = true;
9491 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9494 insn = emit_insn (insn);
9496 ix86_add_queued_cfa_restore_notes (insn);
9502 gcc_assert (m->fs.cfa_reg == src);
9503 m->fs.cfa_offset += INTVAL (offset);
9504 m->fs.cfa_reg = dest;
9506 r = gen_rtx_PLUS (Pmode, src, offset);
9507 r = gen_rtx_SET (VOIDmode, dest, r);
9508 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9509 RTX_FRAME_RELATED_P (insn) = 1;
9513 RTX_FRAME_RELATED_P (insn) = 1;
9514 if (add_frame_related_expr)
9516 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9517 r = gen_rtx_SET (VOIDmode, dest, r);
9518 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9522 if (dest == stack_pointer_rtx)
9524 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9525 bool valid = m->fs.sp_valid;
9527 if (src == hard_frame_pointer_rtx)
9529 valid = m->fs.fp_valid;
9530 ooffset = m->fs.fp_offset;
9532 else if (src == crtl->drap_reg)
9534 valid = m->fs.drap_valid;
9539 /* Else there are two possibilities: SP itself, which we set
9540 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9541 taken care of this by hand along the eh_return path. */
9542 gcc_checking_assert (src == stack_pointer_rtx
9543 || offset == const0_rtx);
9546 m->fs.sp_offset = ooffset - INTVAL (offset);
9547 m->fs.sp_valid = valid;
9551 /* Find an available register to be used as dynamic realign argument
9552 pointer regsiter. Such a register will be written in prologue and
9553 used in begin of body, so it must not be
9554 1. parameter passing register.
9556 We reuse static-chain register if it is available. Otherwise, we
9557 use DI for i386 and R13 for x86-64. We chose R13 since it has
9560 Return: the regno of chosen register. */
9563 find_drap_reg (void)
9565 tree decl = cfun->decl;
9569 /* Use R13 for nested function or function need static chain.
9570 Since function with tail call may use any caller-saved
9571 registers in epilogue, DRAP must not use caller-saved
9572 register in such case. */
9573 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9580 /* Use DI for nested function or function need static chain.
9581 Since function with tail call may use any caller-saved
9582 registers in epilogue, DRAP must not use caller-saved
9583 register in such case. */
9584 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9587 /* Reuse static chain register if it isn't used for parameter
9589 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9590 && !lookup_attribute ("fastcall",
9591 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9592 && !lookup_attribute ("thiscall",
9593 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9600 /* Return minimum incoming stack alignment. */
9603 ix86_minimum_incoming_stack_boundary (bool sibcall)
9605 unsigned int incoming_stack_boundary;
9607 /* Prefer the one specified at command line. */
9608 if (ix86_user_incoming_stack_boundary)
9609 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9610 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9611 if -mstackrealign is used, it isn't used for sibcall check and
9612 estimated stack alignment is 128bit. */
9615 && ix86_force_align_arg_pointer
9616 && crtl->stack_alignment_estimated == 128)
9617 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9619 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9621 /* Incoming stack alignment can be changed on individual functions
9622 via force_align_arg_pointer attribute. We use the smallest
9623 incoming stack boundary. */
9624 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9625 && lookup_attribute (ix86_force_align_arg_pointer_string,
9626 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9627 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9629 /* The incoming stack frame has to be aligned at least at
9630 parm_stack_boundary. */
9631 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9632 incoming_stack_boundary = crtl->parm_stack_boundary;
9634 /* Stack at entrance of main is aligned by runtime. We use the
9635 smallest incoming stack boundary. */
9636 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9637 && DECL_NAME (current_function_decl)
9638 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9639 && DECL_FILE_SCOPE_P (current_function_decl))
9640 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9642 return incoming_stack_boundary;
9645 /* Update incoming stack boundary and estimated stack alignment. */
9648 ix86_update_stack_boundary (void)
9650 ix86_incoming_stack_boundary
9651 = ix86_minimum_incoming_stack_boundary (false);
9653 /* x86_64 vararg needs 16byte stack alignment for register save
9657 && crtl->stack_alignment_estimated < 128)
9658 crtl->stack_alignment_estimated = 128;
9661 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9662 needed or an rtx for DRAP otherwise. */
9665 ix86_get_drap_rtx (void)
9667 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9668 crtl->need_drap = true;
9670 if (stack_realign_drap)
9672 /* Assign DRAP to vDRAP and returns vDRAP */
9673 unsigned int regno = find_drap_reg ();
9678 arg_ptr = gen_rtx_REG (Pmode, regno);
9679 crtl->drap_reg = arg_ptr;
9682 drap_vreg = copy_to_reg (arg_ptr);
9686 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9689 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9690 RTX_FRAME_RELATED_P (insn) = 1;
9698 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9701 ix86_internal_arg_pointer (void)
9703 return virtual_incoming_args_rtx;
9706 struct scratch_reg {
9711 /* Return a short-lived scratch register for use on function entry.
9712 In 32-bit mode, it is valid only after the registers are saved
9713 in the prologue. This register must be released by means of
9714 release_scratch_register_on_entry once it is dead. */
9717 get_scratch_register_on_entry (struct scratch_reg *sr)
9725 /* We always use R11 in 64-bit mode. */
9730 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9732 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9733 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9734 int regparm = ix86_function_regparm (fntype, decl);
9736 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9738 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9739 for the static chain register. */
9740 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9741 && drap_regno != AX_REG)
9743 else if (regparm < 2 && drap_regno != DX_REG)
9745 /* ecx is the static chain register. */
9746 else if (regparm < 3 && !fastcall_p && !static_chain_p
9747 && drap_regno != CX_REG)
9749 else if (ix86_save_reg (BX_REG, true))
9751 /* esi is the static chain register. */
9752 else if (!(regparm == 3 && static_chain_p)
9753 && ix86_save_reg (SI_REG, true))
9755 else if (ix86_save_reg (DI_REG, true))
9759 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9764 sr->reg = gen_rtx_REG (Pmode, regno);
9767 rtx insn = emit_insn (gen_push (sr->reg));
9768 RTX_FRAME_RELATED_P (insn) = 1;
9772 /* Release a scratch register obtained from the preceding function. */
9775 release_scratch_register_on_entry (struct scratch_reg *sr)
9779 rtx x, insn = emit_insn (gen_pop (sr->reg));
9781 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9782 RTX_FRAME_RELATED_P (insn) = 1;
9783 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9784 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9785 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9789 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9791 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9794 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9796 /* We skip the probe for the first interval + a small dope of 4 words and
9797 probe that many bytes past the specified size to maintain a protection
9798 area at the botton of the stack. */
9799 const int dope = 4 * UNITS_PER_WORD;
9800 rtx size_rtx = GEN_INT (size);
9802 /* See if we have a constant small number of probes to generate. If so,
9803 that's the easy case. The run-time loop is made up of 11 insns in the
9804 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9805 for n # of intervals. */
9806 if (size <= 5 * PROBE_INTERVAL)
9808 HOST_WIDE_INT i, adjust;
9809 bool first_probe = true;
9811 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9812 values of N from 1 until it exceeds SIZE. If only one probe is
9813 needed, this will not generate any code. Then adjust and probe
9814 to PROBE_INTERVAL + SIZE. */
9815 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9819 adjust = 2 * PROBE_INTERVAL + dope;
9820 first_probe = false;
9823 adjust = PROBE_INTERVAL;
9825 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9826 plus_constant (stack_pointer_rtx, -adjust)));
9827 emit_stack_probe (stack_pointer_rtx);
9831 adjust = size + PROBE_INTERVAL + dope;
9833 adjust = size + PROBE_INTERVAL - i;
9835 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9836 plus_constant (stack_pointer_rtx, -adjust)));
9837 emit_stack_probe (stack_pointer_rtx);
9839 /* Adjust back to account for the additional first interval. */
9840 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9841 plus_constant (stack_pointer_rtx,
9842 PROBE_INTERVAL + dope)));
9845 /* Otherwise, do the same as above, but in a loop. Note that we must be
9846 extra careful with variables wrapping around because we might be at
9847 the very top (or the very bottom) of the address space and we have
9848 to be able to handle this case properly; in particular, we use an
9849 equality test for the loop condition. */
9852 HOST_WIDE_INT rounded_size;
9853 struct scratch_reg sr;
9855 get_scratch_register_on_entry (&sr);
9858 /* Step 1: round SIZE to the previous multiple of the interval. */
9860 rounded_size = size & -PROBE_INTERVAL;
9863 /* Step 2: compute initial and final value of the loop counter. */
9865 /* SP = SP_0 + PROBE_INTERVAL. */
9866 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9867 plus_constant (stack_pointer_rtx,
9868 - (PROBE_INTERVAL + dope))));
9870 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9871 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9872 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9873 gen_rtx_PLUS (Pmode, sr.reg,
9874 stack_pointer_rtx)));
9879 while (SP != LAST_ADDR)
9881 SP = SP + PROBE_INTERVAL
9885 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9886 values of N from 1 until it is equal to ROUNDED_SIZE. */
9888 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9891 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9892 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9894 if (size != rounded_size)
9896 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9897 plus_constant (stack_pointer_rtx,
9898 rounded_size - size)));
9899 emit_stack_probe (stack_pointer_rtx);
9902 /* Adjust back to account for the additional first interval. */
9903 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9904 plus_constant (stack_pointer_rtx,
9905 PROBE_INTERVAL + dope)));
9907 release_scratch_register_on_entry (&sr);
9910 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9911 cfun->machine->fs.sp_offset += size;
9913 /* Make sure nothing is scheduled before we are done. */
9914 emit_insn (gen_blockage ());
9917 /* Adjust the stack pointer up to REG while probing it. */
9920 output_adjust_stack_and_probe (rtx reg)
9922 static int labelno = 0;
9923 char loop_lab[32], end_lab[32];
9926 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9927 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9929 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9931 /* Jump to END_LAB if SP == LAST_ADDR. */
9932 xops[0] = stack_pointer_rtx;
9934 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9935 fputs ("\tje\t", asm_out_file);
9936 assemble_name_raw (asm_out_file, end_lab);
9937 fputc ('\n', asm_out_file);
9939 /* SP = SP + PROBE_INTERVAL. */
9940 xops[1] = GEN_INT (PROBE_INTERVAL);
9941 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9944 xops[1] = const0_rtx;
9945 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9947 fprintf (asm_out_file, "\tjmp\t");
9948 assemble_name_raw (asm_out_file, loop_lab);
9949 fputc ('\n', asm_out_file);
9951 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9956 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9957 inclusive. These are offsets from the current stack pointer. */
9960 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9962 /* See if we have a constant small number of probes to generate. If so,
9963 that's the easy case. The run-time loop is made up of 7 insns in the
9964 generic case while the compile-time loop is made up of n insns for n #
9966 if (size <= 7 * PROBE_INTERVAL)
9970 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9971 it exceeds SIZE. If only one probe is needed, this will not
9972 generate any code. Then probe at FIRST + SIZE. */
9973 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9974 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9976 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9979 /* Otherwise, do the same as above, but in a loop. Note that we must be
9980 extra careful with variables wrapping around because we might be at
9981 the very top (or the very bottom) of the address space and we have
9982 to be able to handle this case properly; in particular, we use an
9983 equality test for the loop condition. */
9986 HOST_WIDE_INT rounded_size, last;
9987 struct scratch_reg sr;
9989 get_scratch_register_on_entry (&sr);
9992 /* Step 1: round SIZE to the previous multiple of the interval. */
9994 rounded_size = size & -PROBE_INTERVAL;
9997 /* Step 2: compute initial and final value of the loop counter. */
9999 /* TEST_OFFSET = FIRST. */
10000 emit_move_insn (sr.reg, GEN_INT (-first));
10002 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10003 last = first + rounded_size;
10006 /* Step 3: the loop
10008 while (TEST_ADDR != LAST_ADDR)
10010 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10014 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10015 until it is equal to ROUNDED_SIZE. */
10017 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10020 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10021 that SIZE is equal to ROUNDED_SIZE. */
10023 if (size != rounded_size)
10024 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10027 rounded_size - size));
10029 release_scratch_register_on_entry (&sr);
10032 /* Make sure nothing is scheduled before we are done. */
10033 emit_insn (gen_blockage ());
10036 /* Probe a range of stack addresses from REG to END, inclusive. These are
10037 offsets from the current stack pointer. */
10040 output_probe_stack_range (rtx reg, rtx end)
10042 static int labelno = 0;
10043 char loop_lab[32], end_lab[32];
10046 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10047 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10049 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10051 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10054 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10055 fputs ("\tje\t", asm_out_file);
10056 assemble_name_raw (asm_out_file, end_lab);
10057 fputc ('\n', asm_out_file);
10059 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10060 xops[1] = GEN_INT (PROBE_INTERVAL);
10061 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10063 /* Probe at TEST_ADDR. */
10064 xops[0] = stack_pointer_rtx;
10066 xops[2] = const0_rtx;
10067 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10069 fprintf (asm_out_file, "\tjmp\t");
10070 assemble_name_raw (asm_out_file, loop_lab);
10071 fputc ('\n', asm_out_file);
10073 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10078 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10079 to be generated in correct form. */
10081 ix86_finalize_stack_realign_flags (void)
10083 /* Check if stack realign is really needed after reload, and
10084 stores result in cfun */
10085 unsigned int incoming_stack_boundary
10086 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10087 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10088 unsigned int stack_realign = (incoming_stack_boundary
10089 < (current_function_is_leaf
10090 ? crtl->max_used_stack_slot_alignment
10091 : crtl->stack_alignment_needed));
10093 if (crtl->stack_realign_finalized)
10095 /* After stack_realign_needed is finalized, we can't no longer
10097 gcc_assert (crtl->stack_realign_needed == stack_realign);
10101 crtl->stack_realign_needed = stack_realign;
10102 crtl->stack_realign_finalized = true;
10106 /* Expand the prologue into a bunch of separate insns. */
10109 ix86_expand_prologue (void)
10111 struct machine_function *m = cfun->machine;
10114 struct ix86_frame frame;
10115 HOST_WIDE_INT allocate;
10116 bool int_registers_saved;
10118 ix86_finalize_stack_realign_flags ();
10120 /* DRAP should not coexist with stack_realign_fp */
10121 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10123 memset (&m->fs, 0, sizeof (m->fs));
10125 /* Initialize CFA state for before the prologue. */
10126 m->fs.cfa_reg = stack_pointer_rtx;
10127 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10129 /* Track SP offset to the CFA. We continue tracking this after we've
10130 swapped the CFA register away from SP. In the case of re-alignment
10131 this is fudged; we're interested to offsets within the local frame. */
10132 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10133 m->fs.sp_valid = true;
10135 ix86_compute_frame_layout (&frame);
10137 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10139 /* We should have already generated an error for any use of
10140 ms_hook on a nested function. */
10141 gcc_checking_assert (!ix86_static_chain_on_stack);
10143 /* Check if profiling is active and we shall use profiling before
10144 prologue variant. If so sorry. */
10145 if (crtl->profile && flag_fentry != 0)
10146 sorry ("ms_hook_prologue attribute isn%'t compatible "
10147 "with -mfentry for 32-bit");
10149 /* In ix86_asm_output_function_label we emitted:
10150 8b ff movl.s %edi,%edi
10152 8b ec movl.s %esp,%ebp
10154 This matches the hookable function prologue in Win32 API
10155 functions in Microsoft Windows XP Service Pack 2 and newer.
10156 Wine uses this to enable Windows apps to hook the Win32 API
10157 functions provided by Wine.
10159 What that means is that we've already set up the frame pointer. */
10161 if (frame_pointer_needed
10162 && !(crtl->drap_reg && crtl->stack_realign_needed))
10166 /* We've decided to use the frame pointer already set up.
10167 Describe this to the unwinder by pretending that both
10168 push and mov insns happen right here.
10170 Putting the unwind info here at the end of the ms_hook
10171 is done so that we can make absolutely certain we get
10172 the required byte sequence at the start of the function,
10173 rather than relying on an assembler that can produce
10174 the exact encoding required.
10176 However it does mean (in the unpatched case) that we have
10177 a 1 insn window where the asynchronous unwind info is
10178 incorrect. However, if we placed the unwind info at
10179 its correct location we would have incorrect unwind info
10180 in the patched case. Which is probably all moot since
10181 I don't expect Wine generates dwarf2 unwind info for the
10182 system libraries that use this feature. */
10184 insn = emit_insn (gen_blockage ());
10186 push = gen_push (hard_frame_pointer_rtx);
10187 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10188 stack_pointer_rtx);
10189 RTX_FRAME_RELATED_P (push) = 1;
10190 RTX_FRAME_RELATED_P (mov) = 1;
10192 RTX_FRAME_RELATED_P (insn) = 1;
10193 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10194 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10196 /* Note that gen_push incremented m->fs.cfa_offset, even
10197 though we didn't emit the push insn here. */
10198 m->fs.cfa_reg = hard_frame_pointer_rtx;
10199 m->fs.fp_offset = m->fs.cfa_offset;
10200 m->fs.fp_valid = true;
10204 /* The frame pointer is not needed so pop %ebp again.
10205 This leaves us with a pristine state. */
10206 emit_insn (gen_pop (hard_frame_pointer_rtx));
10210 /* The first insn of a function that accepts its static chain on the
10211 stack is to push the register that would be filled in by a direct
10212 call. This insn will be skipped by the trampoline. */
10213 else if (ix86_static_chain_on_stack)
10215 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10216 emit_insn (gen_blockage ());
10218 /* We don't want to interpret this push insn as a register save,
10219 only as a stack adjustment. The real copy of the register as
10220 a save will be done later, if needed. */
10221 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10222 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10223 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10224 RTX_FRAME_RELATED_P (insn) = 1;
10227 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10228 of DRAP is needed and stack realignment is really needed after reload */
10229 if (stack_realign_drap)
10231 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10233 /* Only need to push parameter pointer reg if it is caller saved. */
10234 if (!call_used_regs[REGNO (crtl->drap_reg)])
10236 /* Push arg pointer reg */
10237 insn = emit_insn (gen_push (crtl->drap_reg));
10238 RTX_FRAME_RELATED_P (insn) = 1;
10241 /* Grab the argument pointer. */
10242 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10243 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10244 RTX_FRAME_RELATED_P (insn) = 1;
10245 m->fs.cfa_reg = crtl->drap_reg;
10246 m->fs.cfa_offset = 0;
10248 /* Align the stack. */
10249 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10251 GEN_INT (-align_bytes)));
10252 RTX_FRAME_RELATED_P (insn) = 1;
10254 /* Replicate the return address on the stack so that return
10255 address can be reached via (argp - 1) slot. This is needed
10256 to implement macro RETURN_ADDR_RTX and intrinsic function
10257 expand_builtin_return_addr etc. */
10258 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10259 t = gen_frame_mem (Pmode, t);
10260 insn = emit_insn (gen_push (t));
10261 RTX_FRAME_RELATED_P (insn) = 1;
10263 /* For the purposes of frame and register save area addressing,
10264 we've started over with a new frame. */
10265 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10266 m->fs.realigned = true;
10269 if (frame_pointer_needed && !m->fs.fp_valid)
10271 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10272 slower on all targets. Also sdb doesn't like it. */
10273 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10274 RTX_FRAME_RELATED_P (insn) = 1;
10276 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10278 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10279 RTX_FRAME_RELATED_P (insn) = 1;
10281 if (m->fs.cfa_reg == stack_pointer_rtx)
10282 m->fs.cfa_reg = hard_frame_pointer_rtx;
10283 m->fs.fp_offset = m->fs.sp_offset;
10284 m->fs.fp_valid = true;
10288 int_registers_saved = (frame.nregs == 0);
10290 if (!int_registers_saved)
10292 /* If saving registers via PUSH, do so now. */
10293 if (!frame.save_regs_using_mov)
10295 ix86_emit_save_regs ();
10296 int_registers_saved = true;
10297 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10300 /* When using red zone we may start register saving before allocating
10301 the stack frame saving one cycle of the prologue. However, avoid
10302 doing this if we have to probe the stack; at least on x86_64 the
10303 stack probe can turn into a call that clobbers a red zone location. */
10304 else if (ix86_using_red_zone ()
10305 && (! TARGET_STACK_PROBE
10306 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10308 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10309 int_registers_saved = true;
10313 if (stack_realign_fp)
10315 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10316 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10318 /* The computation of the size of the re-aligned stack frame means
10319 that we must allocate the size of the register save area before
10320 performing the actual alignment. Otherwise we cannot guarantee
10321 that there's enough storage above the realignment point. */
10322 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10323 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10324 GEN_INT (m->fs.sp_offset
10325 - frame.sse_reg_save_offset),
10328 /* Align the stack. */
10329 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10331 GEN_INT (-align_bytes)));
10333 /* For the purposes of register save area addressing, the stack
10334 pointer is no longer valid. As for the value of sp_offset,
10335 see ix86_compute_frame_layout, which we need to match in order
10336 to pass verification of stack_pointer_offset at the end. */
10337 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10338 m->fs.sp_valid = false;
10341 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10343 if (flag_stack_usage)
10345 /* We start to count from ARG_POINTER. */
10346 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10348 /* If it was realigned, take into account the fake frame. */
10349 if (stack_realign_drap)
10351 if (ix86_static_chain_on_stack)
10352 stack_size += UNITS_PER_WORD;
10354 if (!call_used_regs[REGNO (crtl->drap_reg)])
10355 stack_size += UNITS_PER_WORD;
10357 /* This over-estimates by 1 minimal-stack-alignment-unit but
10358 mitigates that by counting in the new return address slot. */
10359 current_function_dynamic_stack_size
10360 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10363 current_function_static_stack_size = stack_size;
10366 /* The stack has already been decremented by the instruction calling us
10367 so we need to probe unconditionally to preserve the protection area. */
10368 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10370 /* We expect the registers to be saved when probes are used. */
10371 gcc_assert (int_registers_saved);
10373 if (STACK_CHECK_MOVING_SP)
10375 ix86_adjust_stack_and_probe (allocate);
10380 HOST_WIDE_INT size = allocate;
10382 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10383 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10385 if (TARGET_STACK_PROBE)
10386 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10388 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10394 else if (!ix86_target_stack_probe ()
10395 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10397 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10398 GEN_INT (-allocate), -1,
10399 m->fs.cfa_reg == stack_pointer_rtx);
10403 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10405 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10407 bool eax_live = false;
10408 bool r10_live = false;
10411 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10412 if (!TARGET_64BIT_MS_ABI)
10413 eax_live = ix86_eax_live_at_start_p ();
10417 emit_insn (gen_push (eax));
10418 allocate -= UNITS_PER_WORD;
10422 r10 = gen_rtx_REG (Pmode, R10_REG);
10423 emit_insn (gen_push (r10));
10424 allocate -= UNITS_PER_WORD;
10427 emit_move_insn (eax, GEN_INT (allocate));
10428 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10430 /* Use the fact that AX still contains ALLOCATE. */
10431 adjust_stack_insn = (TARGET_64BIT
10432 ? gen_pro_epilogue_adjust_stack_di_sub
10433 : gen_pro_epilogue_adjust_stack_si_sub);
10435 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10436 stack_pointer_rtx, eax));
10438 /* Note that SEH directives need to continue tracking the stack
10439 pointer even after the frame pointer has been set up. */
10440 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10442 if (m->fs.cfa_reg == stack_pointer_rtx)
10443 m->fs.cfa_offset += allocate;
10445 RTX_FRAME_RELATED_P (insn) = 1;
10446 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10447 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10448 plus_constant (stack_pointer_rtx,
10451 m->fs.sp_offset += allocate;
10453 if (r10_live && eax_live)
10455 t = choose_baseaddr (m->fs.sp_offset - allocate);
10456 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10457 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10458 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10460 else if (eax_live || r10_live)
10462 t = choose_baseaddr (m->fs.sp_offset - allocate);
10463 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10466 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10468 /* If we havn't already set up the frame pointer, do so now. */
10469 if (frame_pointer_needed && !m->fs.fp_valid)
10471 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10472 GEN_INT (frame.stack_pointer_offset
10473 - frame.hard_frame_pointer_offset));
10474 insn = emit_insn (insn);
10475 RTX_FRAME_RELATED_P (insn) = 1;
10476 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10478 if (m->fs.cfa_reg == stack_pointer_rtx)
10479 m->fs.cfa_reg = hard_frame_pointer_rtx;
10480 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10481 m->fs.fp_valid = true;
10484 if (!int_registers_saved)
10485 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10486 if (frame.nsseregs)
10487 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10489 pic_reg_used = false;
10490 if (pic_offset_table_rtx
10491 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10494 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10496 if (alt_pic_reg_used != INVALID_REGNUM)
10497 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10499 pic_reg_used = true;
10506 if (ix86_cmodel == CM_LARGE_PIC)
10508 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10509 rtx label = gen_label_rtx ();
10510 emit_label (label);
10511 LABEL_PRESERVE_P (label) = 1;
10512 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10513 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10514 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10515 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10516 pic_offset_table_rtx, tmp_reg));
10519 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10522 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10525 /* In the pic_reg_used case, make sure that the got load isn't deleted
10526 when mcount needs it. Blockage to avoid call movement across mcount
10527 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10529 if (crtl->profile && !flag_fentry && pic_reg_used)
10530 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10532 if (crtl->drap_reg && !crtl->stack_realign_needed)
10534 /* vDRAP is setup but after reload it turns out stack realign
10535 isn't necessary, here we will emit prologue to setup DRAP
10536 without stack realign adjustment */
10537 t = choose_baseaddr (0);
10538 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10541 /* Prevent instructions from being scheduled into register save push
10542 sequence when access to the redzone area is done through frame pointer.
10543 The offset between the frame pointer and the stack pointer is calculated
10544 relative to the value of the stack pointer at the end of the function
10545 prologue, and moving instructions that access redzone area via frame
10546 pointer inside push sequence violates this assumption. */
10547 if (frame_pointer_needed && frame.red_zone_size)
10548 emit_insn (gen_memory_blockage ());
10550 /* Emit cld instruction if stringops are used in the function. */
10551 if (TARGET_CLD && ix86_current_function_needs_cld)
10552 emit_insn (gen_cld ());
10554 /* SEH requires that the prologue end within 256 bytes of the start of
10555 the function. Prevent instruction schedules that would extend that. */
10557 emit_insn (gen_blockage ());
10560 /* Emit code to restore REG using a POP insn. */
10563 ix86_emit_restore_reg_using_pop (rtx reg)
10565 struct machine_function *m = cfun->machine;
10566 rtx insn = emit_insn (gen_pop (reg));
10568 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10569 m->fs.sp_offset -= UNITS_PER_WORD;
10571 if (m->fs.cfa_reg == crtl->drap_reg
10572 && REGNO (reg) == REGNO (crtl->drap_reg))
10574 /* Previously we'd represented the CFA as an expression
10575 like *(%ebp - 8). We've just popped that value from
10576 the stack, which means we need to reset the CFA to
10577 the drap register. This will remain until we restore
10578 the stack pointer. */
10579 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10580 RTX_FRAME_RELATED_P (insn) = 1;
10582 /* This means that the DRAP register is valid for addressing too. */
10583 m->fs.drap_valid = true;
10587 if (m->fs.cfa_reg == stack_pointer_rtx)
10589 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10590 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10591 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10592 RTX_FRAME_RELATED_P (insn) = 1;
10594 m->fs.cfa_offset -= UNITS_PER_WORD;
10597 /* When the frame pointer is the CFA, and we pop it, we are
10598 swapping back to the stack pointer as the CFA. This happens
10599 for stack frames that don't allocate other data, so we assume
10600 the stack pointer is now pointing at the return address, i.e.
10601 the function entry state, which makes the offset be 1 word. */
10602 if (reg == hard_frame_pointer_rtx)
10604 m->fs.fp_valid = false;
10605 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10607 m->fs.cfa_reg = stack_pointer_rtx;
10608 m->fs.cfa_offset -= UNITS_PER_WORD;
10610 add_reg_note (insn, REG_CFA_DEF_CFA,
10611 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10612 GEN_INT (m->fs.cfa_offset)));
10613 RTX_FRAME_RELATED_P (insn) = 1;
10618 /* Emit code to restore saved registers using POP insns. */
10621 ix86_emit_restore_regs_using_pop (void)
10623 unsigned int regno;
10625 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10626 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10627 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10630 /* Emit code and notes for the LEAVE instruction. */
10633 ix86_emit_leave (void)
10635 struct machine_function *m = cfun->machine;
10636 rtx insn = emit_insn (ix86_gen_leave ());
10638 ix86_add_queued_cfa_restore_notes (insn);
10640 gcc_assert (m->fs.fp_valid);
10641 m->fs.sp_valid = true;
10642 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10643 m->fs.fp_valid = false;
10645 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10647 m->fs.cfa_reg = stack_pointer_rtx;
10648 m->fs.cfa_offset = m->fs.sp_offset;
10650 add_reg_note (insn, REG_CFA_DEF_CFA,
10651 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10652 RTX_FRAME_RELATED_P (insn) = 1;
10653 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10658 /* Emit code to restore saved registers using MOV insns.
10659 First register is restored from CFA - CFA_OFFSET. */
10661 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10662 int maybe_eh_return)
10664 struct machine_function *m = cfun->machine;
10665 unsigned int regno;
10667 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10668 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10670 rtx reg = gen_rtx_REG (Pmode, regno);
10673 mem = choose_baseaddr (cfa_offset);
10674 mem = gen_frame_mem (Pmode, mem);
10675 insn = emit_move_insn (reg, mem);
10677 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10679 /* Previously we'd represented the CFA as an expression
10680 like *(%ebp - 8). We've just popped that value from
10681 the stack, which means we need to reset the CFA to
10682 the drap register. This will remain until we restore
10683 the stack pointer. */
10684 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10685 RTX_FRAME_RELATED_P (insn) = 1;
10687 /* This means that the DRAP register is valid for addressing. */
10688 m->fs.drap_valid = true;
10691 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10693 cfa_offset -= UNITS_PER_WORD;
10697 /* Emit code to restore saved registers using MOV insns.
10698 First register is restored from CFA - CFA_OFFSET. */
10700 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10701 int maybe_eh_return)
10703 unsigned int regno;
10705 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10706 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10708 rtx reg = gen_rtx_REG (V4SFmode, regno);
10711 mem = choose_baseaddr (cfa_offset);
10712 mem = gen_rtx_MEM (V4SFmode, mem);
10713 set_mem_align (mem, 128);
10714 emit_move_insn (reg, mem);
10716 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10722 /* Restore function stack, frame, and registers. */
10725 ix86_expand_epilogue (int style)
10727 struct machine_function *m = cfun->machine;
10728 struct machine_frame_state frame_state_save = m->fs;
10729 struct ix86_frame frame;
10730 bool restore_regs_via_mov;
10733 ix86_finalize_stack_realign_flags ();
10734 ix86_compute_frame_layout (&frame);
10736 m->fs.sp_valid = (!frame_pointer_needed
10737 || (current_function_sp_is_unchanging
10738 && !stack_realign_fp));
10739 gcc_assert (!m->fs.sp_valid
10740 || m->fs.sp_offset == frame.stack_pointer_offset);
10742 /* The FP must be valid if the frame pointer is present. */
10743 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10744 gcc_assert (!m->fs.fp_valid
10745 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10747 /* We must have *some* valid pointer to the stack frame. */
10748 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10750 /* The DRAP is never valid at this point. */
10751 gcc_assert (!m->fs.drap_valid);
10753 /* See the comment about red zone and frame
10754 pointer usage in ix86_expand_prologue. */
10755 if (frame_pointer_needed && frame.red_zone_size)
10756 emit_insn (gen_memory_blockage ());
10758 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10759 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10761 /* Determine the CFA offset of the end of the red-zone. */
10762 m->fs.red_zone_offset = 0;
10763 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10765 /* The red-zone begins below the return address. */
10766 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10768 /* When the register save area is in the aligned portion of
10769 the stack, determine the maximum runtime displacement that
10770 matches up with the aligned frame. */
10771 if (stack_realign_drap)
10772 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10776 /* Special care must be taken for the normal return case of a function
10777 using eh_return: the eax and edx registers are marked as saved, but
10778 not restored along this path. Adjust the save location to match. */
10779 if (crtl->calls_eh_return && style != 2)
10780 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10782 /* EH_RETURN requires the use of moves to function properly. */
10783 if (crtl->calls_eh_return)
10784 restore_regs_via_mov = true;
10785 /* SEH requires the use of pops to identify the epilogue. */
10786 else if (TARGET_SEH)
10787 restore_regs_via_mov = false;
10788 /* If we're only restoring one register and sp is not valid then
10789 using a move instruction to restore the register since it's
10790 less work than reloading sp and popping the register. */
10791 else if (!m->fs.sp_valid && frame.nregs <= 1)
10792 restore_regs_via_mov = true;
10793 else if (TARGET_EPILOGUE_USING_MOVE
10794 && cfun->machine->use_fast_prologue_epilogue
10795 && (frame.nregs > 1
10796 || m->fs.sp_offset != frame.reg_save_offset))
10797 restore_regs_via_mov = true;
10798 else if (frame_pointer_needed
10800 && m->fs.sp_offset != frame.reg_save_offset)
10801 restore_regs_via_mov = true;
10802 else if (frame_pointer_needed
10803 && TARGET_USE_LEAVE
10804 && cfun->machine->use_fast_prologue_epilogue
10805 && frame.nregs == 1)
10806 restore_regs_via_mov = true;
10808 restore_regs_via_mov = false;
10810 if (restore_regs_via_mov || frame.nsseregs)
10812 /* Ensure that the entire register save area is addressable via
10813 the stack pointer, if we will restore via sp. */
10815 && m->fs.sp_offset > 0x7fffffff
10816 && !(m->fs.fp_valid || m->fs.drap_valid)
10817 && (frame.nsseregs + frame.nregs) != 0)
10819 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10820 GEN_INT (m->fs.sp_offset
10821 - frame.sse_reg_save_offset),
10823 m->fs.cfa_reg == stack_pointer_rtx);
10827 /* If there are any SSE registers to restore, then we have to do it
10828 via moves, since there's obviously no pop for SSE regs. */
10829 if (frame.nsseregs)
10830 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10833 if (restore_regs_via_mov)
10838 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10840 /* eh_return epilogues need %ecx added to the stack pointer. */
10843 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10845 /* Stack align doesn't work with eh_return. */
10846 gcc_assert (!stack_realign_drap);
10847 /* Neither does regparm nested functions. */
10848 gcc_assert (!ix86_static_chain_on_stack);
10850 if (frame_pointer_needed)
10852 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10853 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10854 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10856 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10857 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10859 /* Note that we use SA as a temporary CFA, as the return
10860 address is at the proper place relative to it. We
10861 pretend this happens at the FP restore insn because
10862 prior to this insn the FP would be stored at the wrong
10863 offset relative to SA, and after this insn we have no
10864 other reasonable register to use for the CFA. We don't
10865 bother resetting the CFA to the SP for the duration of
10866 the return insn. */
10867 add_reg_note (insn, REG_CFA_DEF_CFA,
10868 plus_constant (sa, UNITS_PER_WORD));
10869 ix86_add_queued_cfa_restore_notes (insn);
10870 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10871 RTX_FRAME_RELATED_P (insn) = 1;
10873 m->fs.cfa_reg = sa;
10874 m->fs.cfa_offset = UNITS_PER_WORD;
10875 m->fs.fp_valid = false;
10877 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10878 const0_rtx, style, false);
10882 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10883 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10884 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10885 ix86_add_queued_cfa_restore_notes (insn);
10887 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10888 if (m->fs.cfa_offset != UNITS_PER_WORD)
10890 m->fs.cfa_offset = UNITS_PER_WORD;
10891 add_reg_note (insn, REG_CFA_DEF_CFA,
10892 plus_constant (stack_pointer_rtx,
10894 RTX_FRAME_RELATED_P (insn) = 1;
10897 m->fs.sp_offset = UNITS_PER_WORD;
10898 m->fs.sp_valid = true;
10903 /* SEH requires that the function end with (1) a stack adjustment
10904 if necessary, (2) a sequence of pops, and (3) a return or
10905 jump instruction. Prevent insns from the function body from
10906 being scheduled into this sequence. */
10909 /* Prevent a catch region from being adjacent to the standard
10910 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10911 several other flags that would be interesting to test are
10913 if (flag_non_call_exceptions)
10914 emit_insn (gen_nops (const1_rtx));
10916 emit_insn (gen_blockage ());
10919 /* First step is to deallocate the stack frame so that we can
10920 pop the registers. */
10921 if (!m->fs.sp_valid)
10923 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10924 GEN_INT (m->fs.fp_offset
10925 - frame.reg_save_offset),
10928 else if (m->fs.sp_offset != frame.reg_save_offset)
10930 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10931 GEN_INT (m->fs.sp_offset
10932 - frame.reg_save_offset),
10934 m->fs.cfa_reg == stack_pointer_rtx);
10937 ix86_emit_restore_regs_using_pop ();
10940 /* If we used a stack pointer and haven't already got rid of it,
10942 if (m->fs.fp_valid)
10944 /* If the stack pointer is valid and pointing at the frame
10945 pointer store address, then we only need a pop. */
10946 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10947 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10948 /* Leave results in shorter dependency chains on CPUs that are
10949 able to grok it fast. */
10950 else if (TARGET_USE_LEAVE
10951 || optimize_function_for_size_p (cfun)
10952 || !cfun->machine->use_fast_prologue_epilogue)
10953 ix86_emit_leave ();
10956 pro_epilogue_adjust_stack (stack_pointer_rtx,
10957 hard_frame_pointer_rtx,
10958 const0_rtx, style, !using_drap);
10959 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10965 int param_ptr_offset = UNITS_PER_WORD;
10968 gcc_assert (stack_realign_drap);
10970 if (ix86_static_chain_on_stack)
10971 param_ptr_offset += UNITS_PER_WORD;
10972 if (!call_used_regs[REGNO (crtl->drap_reg)])
10973 param_ptr_offset += UNITS_PER_WORD;
10975 insn = emit_insn (gen_rtx_SET
10976 (VOIDmode, stack_pointer_rtx,
10977 gen_rtx_PLUS (Pmode,
10979 GEN_INT (-param_ptr_offset))));
10980 m->fs.cfa_reg = stack_pointer_rtx;
10981 m->fs.cfa_offset = param_ptr_offset;
10982 m->fs.sp_offset = param_ptr_offset;
10983 m->fs.realigned = false;
10985 add_reg_note (insn, REG_CFA_DEF_CFA,
10986 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10987 GEN_INT (param_ptr_offset)));
10988 RTX_FRAME_RELATED_P (insn) = 1;
10990 if (!call_used_regs[REGNO (crtl->drap_reg)])
10991 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10994 /* At this point the stack pointer must be valid, and we must have
10995 restored all of the registers. We may not have deallocated the
10996 entire stack frame. We've delayed this until now because it may
10997 be possible to merge the local stack deallocation with the
10998 deallocation forced by ix86_static_chain_on_stack. */
10999 gcc_assert (m->fs.sp_valid);
11000 gcc_assert (!m->fs.fp_valid);
11001 gcc_assert (!m->fs.realigned);
11002 if (m->fs.sp_offset != UNITS_PER_WORD)
11004 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11005 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11009 /* Sibcall epilogues don't want a return instruction. */
11012 m->fs = frame_state_save;
11016 /* Emit vzeroupper if needed. */
11017 if (TARGET_VZEROUPPER
11018 && !TREE_THIS_VOLATILE (cfun->decl)
11019 && !cfun->machine->caller_return_avx256_p)
11020 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11022 if (crtl->args.pops_args && crtl->args.size)
11024 rtx popc = GEN_INT (crtl->args.pops_args);
11026 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11027 address, do explicit add, and jump indirectly to the caller. */
11029 if (crtl->args.pops_args >= 65536)
11031 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11034 /* There is no "pascal" calling convention in any 64bit ABI. */
11035 gcc_assert (!TARGET_64BIT);
11037 insn = emit_insn (gen_pop (ecx));
11038 m->fs.cfa_offset -= UNITS_PER_WORD;
11039 m->fs.sp_offset -= UNITS_PER_WORD;
11041 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11042 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11043 add_reg_note (insn, REG_CFA_REGISTER,
11044 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11045 RTX_FRAME_RELATED_P (insn) = 1;
11047 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11049 emit_jump_insn (gen_return_indirect_internal (ecx));
11052 emit_jump_insn (gen_return_pop_internal (popc));
11055 emit_jump_insn (gen_return_internal ());
11057 /* Restore the state back to the state from the prologue,
11058 so that it's correct for the next epilogue. */
11059 m->fs = frame_state_save;
11062 /* Reset from the function's potential modifications. */
11065 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11066 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11068 if (pic_offset_table_rtx)
11069 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11071 /* Mach-O doesn't support labels at the end of objects, so if
11072 it looks like we might want one, insert a NOP. */
11074 rtx insn = get_last_insn ();
11077 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11078 insn = PREV_INSN (insn);
11082 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11083 fputs ("\tnop\n", file);
11089 /* Return a scratch register to use in the split stack prologue. The
11090 split stack prologue is used for -fsplit-stack. It is the first
11091 instructions in the function, even before the regular prologue.
11092 The scratch register can be any caller-saved register which is not
11093 used for parameters or for the static chain. */
11095 static unsigned int
11096 split_stack_prologue_scratch_regno (void)
11105 is_fastcall = (lookup_attribute ("fastcall",
11106 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11108 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11112 if (DECL_STATIC_CHAIN (cfun->decl))
11114 sorry ("-fsplit-stack does not support fastcall with "
11115 "nested function");
11116 return INVALID_REGNUM;
11120 else if (regparm < 3)
11122 if (!DECL_STATIC_CHAIN (cfun->decl))
11128 sorry ("-fsplit-stack does not support 2 register "
11129 " parameters for a nested function");
11130 return INVALID_REGNUM;
11137 /* FIXME: We could make this work by pushing a register
11138 around the addition and comparison. */
11139 sorry ("-fsplit-stack does not support 3 register parameters");
11140 return INVALID_REGNUM;
11145 /* A SYMBOL_REF for the function which allocates new stackspace for
11148 static GTY(()) rtx split_stack_fn;
11150 /* A SYMBOL_REF for the more stack function when using the large
11153 static GTY(()) rtx split_stack_fn_large;
11155 /* Handle -fsplit-stack. These are the first instructions in the
11156 function, even before the regular prologue. */
11159 ix86_expand_split_stack_prologue (void)
11161 struct ix86_frame frame;
11162 HOST_WIDE_INT allocate;
11163 unsigned HOST_WIDE_INT args_size;
11164 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11165 rtx scratch_reg = NULL_RTX;
11166 rtx varargs_label = NULL_RTX;
11169 gcc_assert (flag_split_stack && reload_completed);
11171 ix86_finalize_stack_realign_flags ();
11172 ix86_compute_frame_layout (&frame);
11173 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11175 /* This is the label we will branch to if we have enough stack
11176 space. We expect the basic block reordering pass to reverse this
11177 branch if optimizing, so that we branch in the unlikely case. */
11178 label = gen_label_rtx ();
11180 /* We need to compare the stack pointer minus the frame size with
11181 the stack boundary in the TCB. The stack boundary always gives
11182 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11183 can compare directly. Otherwise we need to do an addition. */
11185 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11186 UNSPEC_STACK_CHECK);
11187 limit = gen_rtx_CONST (Pmode, limit);
11188 limit = gen_rtx_MEM (Pmode, limit);
11189 if (allocate < SPLIT_STACK_AVAILABLE)
11190 current = stack_pointer_rtx;
11193 unsigned int scratch_regno;
11196 /* We need a scratch register to hold the stack pointer minus
11197 the required frame size. Since this is the very start of the
11198 function, the scratch register can be any caller-saved
11199 register which is not used for parameters. */
11200 offset = GEN_INT (- allocate);
11201 scratch_regno = split_stack_prologue_scratch_regno ();
11202 if (scratch_regno == INVALID_REGNUM)
11204 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11205 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11207 /* We don't use ix86_gen_add3 in this case because it will
11208 want to split to lea, but when not optimizing the insn
11209 will not be split after this point. */
11210 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11211 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11216 emit_move_insn (scratch_reg, offset);
11217 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11218 stack_pointer_rtx));
11220 current = scratch_reg;
11223 ix86_expand_branch (GEU, current, limit, label);
11224 jump_insn = get_last_insn ();
11225 JUMP_LABEL (jump_insn) = label;
11227 /* Mark the jump as very likely to be taken. */
11228 add_reg_note (jump_insn, REG_BR_PROB,
11229 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11231 if (split_stack_fn == NULL_RTX)
11232 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11233 fn = split_stack_fn;
11235 /* Get more stack space. We pass in the desired stack space and the
11236 size of the arguments to copy to the new stack. In 32-bit mode
11237 we push the parameters; __morestack will return on a new stack
11238 anyhow. In 64-bit mode we pass the parameters in r10 and
11240 allocate_rtx = GEN_INT (allocate);
11241 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11242 call_fusage = NULL_RTX;
11247 reg10 = gen_rtx_REG (Pmode, R10_REG);
11248 reg11 = gen_rtx_REG (Pmode, R11_REG);
11250 /* If this function uses a static chain, it will be in %r10.
11251 Preserve it across the call to __morestack. */
11252 if (DECL_STATIC_CHAIN (cfun->decl))
11256 rax = gen_rtx_REG (Pmode, AX_REG);
11257 emit_move_insn (rax, reg10);
11258 use_reg (&call_fusage, rax);
11261 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11263 HOST_WIDE_INT argval;
11265 /* When using the large model we need to load the address
11266 into a register, and we've run out of registers. So we
11267 switch to a different calling convention, and we call a
11268 different function: __morestack_large. We pass the
11269 argument size in the upper 32 bits of r10 and pass the
11270 frame size in the lower 32 bits. */
11271 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11272 gcc_assert ((args_size & 0xffffffff) == args_size);
11274 if (split_stack_fn_large == NULL_RTX)
11275 split_stack_fn_large =
11276 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11278 if (ix86_cmodel == CM_LARGE_PIC)
11282 label = gen_label_rtx ();
11283 emit_label (label);
11284 LABEL_PRESERVE_P (label) = 1;
11285 emit_insn (gen_set_rip_rex64 (reg10, label));
11286 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11287 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11288 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11290 x = gen_rtx_CONST (Pmode, x);
11291 emit_move_insn (reg11, x);
11292 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11293 x = gen_const_mem (Pmode, x);
11294 emit_move_insn (reg11, x);
11297 emit_move_insn (reg11, split_stack_fn_large);
11301 argval = ((args_size << 16) << 16) + allocate;
11302 emit_move_insn (reg10, GEN_INT (argval));
11306 emit_move_insn (reg10, allocate_rtx);
11307 emit_move_insn (reg11, GEN_INT (args_size));
11308 use_reg (&call_fusage, reg11);
11311 use_reg (&call_fusage, reg10);
11315 emit_insn (gen_push (GEN_INT (args_size)));
11316 emit_insn (gen_push (allocate_rtx));
11318 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11319 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11321 add_function_usage_to (call_insn, call_fusage);
11323 /* In order to make call/return prediction work right, we now need
11324 to execute a return instruction. See
11325 libgcc/config/i386/morestack.S for the details on how this works.
11327 For flow purposes gcc must not see this as a return
11328 instruction--we need control flow to continue at the subsequent
11329 label. Therefore, we use an unspec. */
11330 gcc_assert (crtl->args.pops_args < 65536);
11331 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11333 /* If we are in 64-bit mode and this function uses a static chain,
11334 we saved %r10 in %rax before calling _morestack. */
11335 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11336 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11337 gen_rtx_REG (Pmode, AX_REG));
11339 /* If this function calls va_start, we need to store a pointer to
11340 the arguments on the old stack, because they may not have been
11341 all copied to the new stack. At this point the old stack can be
11342 found at the frame pointer value used by __morestack, because
11343 __morestack has set that up before calling back to us. Here we
11344 store that pointer in a scratch register, and in
11345 ix86_expand_prologue we store the scratch register in a stack
11347 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11349 unsigned int scratch_regno;
11353 scratch_regno = split_stack_prologue_scratch_regno ();
11354 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11355 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11359 return address within this function
11360 return address of caller of this function
11362 So we add three words to get to the stack arguments.
11366 return address within this function
11367 first argument to __morestack
11368 second argument to __morestack
11369 return address of caller of this function
11371 So we add five words to get to the stack arguments.
11373 words = TARGET_64BIT ? 3 : 5;
11374 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11375 gen_rtx_PLUS (Pmode, frame_reg,
11376 GEN_INT (words * UNITS_PER_WORD))));
11378 varargs_label = gen_label_rtx ();
11379 emit_jump_insn (gen_jump (varargs_label));
11380 JUMP_LABEL (get_last_insn ()) = varargs_label;
11385 emit_label (label);
11386 LABEL_NUSES (label) = 1;
11388 /* If this function calls va_start, we now have to set the scratch
11389 register for the case where we do not call __morestack. In this
11390 case we need to set it based on the stack pointer. */
11391 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11393 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11394 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11395 GEN_INT (UNITS_PER_WORD))));
11397 emit_label (varargs_label);
11398 LABEL_NUSES (varargs_label) = 1;
11402 /* We may have to tell the dataflow pass that the split stack prologue
11403 is initializing a scratch register. */
11406 ix86_live_on_entry (bitmap regs)
11408 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11410 gcc_assert (flag_split_stack);
11411 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11415 /* Extract the parts of an RTL expression that is a valid memory address
11416 for an instruction. Return 0 if the structure of the address is
11417 grossly off. Return -1 if the address contains ASHIFT, so it is not
11418 strictly valid, but still used for computing length of lea instruction. */
11421 ix86_decompose_address (rtx addr, struct ix86_address *out)
11423 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11424 rtx base_reg, index_reg;
11425 HOST_WIDE_INT scale = 1;
11426 rtx scale_rtx = NULL_RTX;
11429 enum ix86_address_seg seg = SEG_DEFAULT;
11431 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11433 else if (GET_CODE (addr) == PLUS)
11435 rtx addends[4], op;
11443 addends[n++] = XEXP (op, 1);
11446 while (GET_CODE (op) == PLUS);
11451 for (i = n; i >= 0; --i)
11454 switch (GET_CODE (op))
11459 index = XEXP (op, 0);
11460 scale_rtx = XEXP (op, 1);
11466 index = XEXP (op, 0);
11467 tmp = XEXP (op, 1);
11468 if (!CONST_INT_P (tmp))
11470 scale = INTVAL (tmp);
11471 if ((unsigned HOST_WIDE_INT) scale > 3)
11473 scale = 1 << scale;
11477 if (XINT (op, 1) == UNSPEC_TP
11478 && TARGET_TLS_DIRECT_SEG_REFS
11479 && seg == SEG_DEFAULT)
11480 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11509 else if (GET_CODE (addr) == MULT)
11511 index = XEXP (addr, 0); /* index*scale */
11512 scale_rtx = XEXP (addr, 1);
11514 else if (GET_CODE (addr) == ASHIFT)
11516 /* We're called for lea too, which implements ashift on occasion. */
11517 index = XEXP (addr, 0);
11518 tmp = XEXP (addr, 1);
11519 if (!CONST_INT_P (tmp))
11521 scale = INTVAL (tmp);
11522 if ((unsigned HOST_WIDE_INT) scale > 3)
11524 scale = 1 << scale;
11528 disp = addr; /* displacement */
11530 /* Extract the integral value of scale. */
11533 if (!CONST_INT_P (scale_rtx))
11535 scale = INTVAL (scale_rtx);
11538 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11539 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11541 /* Avoid useless 0 displacement. */
11542 if (disp == const0_rtx && (base || index))
11545 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11546 if (base_reg && index_reg && scale == 1
11547 && (index_reg == arg_pointer_rtx
11548 || index_reg == frame_pointer_rtx
11549 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11552 tmp = base, base = index, index = tmp;
11553 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11556 /* Special case: %ebp cannot be encoded as a base without a displacement.
11560 && (base_reg == hard_frame_pointer_rtx
11561 || base_reg == frame_pointer_rtx
11562 || base_reg == arg_pointer_rtx
11563 || (REG_P (base_reg)
11564 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11565 || REGNO (base_reg) == R13_REG))))
11568 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11569 Avoid this by transforming to [%esi+0].
11570 Reload calls address legitimization without cfun defined, so we need
11571 to test cfun for being non-NULL. */
11572 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11573 && base_reg && !index_reg && !disp
11574 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11577 /* Special case: encode reg+reg instead of reg*2. */
11578 if (!base && index && scale == 2)
11579 base = index, base_reg = index_reg, scale = 1;
11581 /* Special case: scaling cannot be encoded without base or displacement. */
11582 if (!base && !disp && index && scale != 1)
11586 out->index = index;
11588 out->scale = scale;
11594 /* Return cost of the memory address x.
11595 For i386, it is better to use a complex address than let gcc copy
11596 the address into a reg and make a new pseudo. But not if the address
11597 requires to two regs - that would mean more pseudos with longer
11600 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11602 struct ix86_address parts;
11604 int ok = ix86_decompose_address (x, &parts);
11608 if (parts.base && GET_CODE (parts.base) == SUBREG)
11609 parts.base = SUBREG_REG (parts.base);
11610 if (parts.index && GET_CODE (parts.index) == SUBREG)
11611 parts.index = SUBREG_REG (parts.index);
11613 /* Attempt to minimize number of registers in the address. */
11615 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11617 && (!REG_P (parts.index)
11618 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11622 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11624 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11625 && parts.base != parts.index)
11628 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11629 since it's predecode logic can't detect the length of instructions
11630 and it degenerates to vector decoded. Increase cost of such
11631 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11632 to split such addresses or even refuse such addresses at all.
11634 Following addressing modes are affected:
11639 The first and last case may be avoidable by explicitly coding the zero in
11640 memory address, but I don't have AMD-K6 machine handy to check this
11644 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11645 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11646 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11652 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11653 this is used for to form addresses to local data when -fPIC is in
11657 darwin_local_data_pic (rtx disp)
11659 return (GET_CODE (disp) == UNSPEC
11660 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11663 /* Determine if a given RTX is a valid constant. We already know this
11664 satisfies CONSTANT_P. */
11667 legitimate_constant_p (rtx x)
11669 switch (GET_CODE (x))
11674 if (GET_CODE (x) == PLUS)
11676 if (!CONST_INT_P (XEXP (x, 1)))
11681 if (TARGET_MACHO && darwin_local_data_pic (x))
11684 /* Only some unspecs are valid as "constants". */
11685 if (GET_CODE (x) == UNSPEC)
11686 switch (XINT (x, 1))
11689 case UNSPEC_GOTOFF:
11690 case UNSPEC_PLTOFF:
11691 return TARGET_64BIT;
11693 case UNSPEC_NTPOFF:
11694 x = XVECEXP (x, 0, 0);
11695 return (GET_CODE (x) == SYMBOL_REF
11696 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11697 case UNSPEC_DTPOFF:
11698 x = XVECEXP (x, 0, 0);
11699 return (GET_CODE (x) == SYMBOL_REF
11700 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11705 /* We must have drilled down to a symbol. */
11706 if (GET_CODE (x) == LABEL_REF)
11708 if (GET_CODE (x) != SYMBOL_REF)
11713 /* TLS symbols are never valid. */
11714 if (SYMBOL_REF_TLS_MODEL (x))
11717 /* DLLIMPORT symbols are never valid. */
11718 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11719 && SYMBOL_REF_DLLIMPORT_P (x))
11723 /* mdynamic-no-pic */
11724 if (MACHO_DYNAMIC_NO_PIC_P)
11725 return machopic_symbol_defined_p (x);
11730 if (GET_MODE (x) == TImode
11731 && x != CONST0_RTX (TImode)
11737 if (!standard_sse_constant_p (x))
11744 /* Otherwise we handle everything else in the move patterns. */
11748 /* Determine if it's legal to put X into the constant pool. This
11749 is not possible for the address of thread-local symbols, which
11750 is checked above. */
11753 ix86_cannot_force_const_mem (rtx x)
11755 /* We can always put integral constants and vectors in memory. */
11756 switch (GET_CODE (x))
11766 return !legitimate_constant_p (x);
11770 /* Nonzero if the constant value X is a legitimate general operand
11771 when generating PIC code. It is given that flag_pic is on and
11772 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11775 legitimate_pic_operand_p (rtx x)
11779 switch (GET_CODE (x))
11782 inner = XEXP (x, 0);
11783 if (GET_CODE (inner) == PLUS
11784 && CONST_INT_P (XEXP (inner, 1)))
11785 inner = XEXP (inner, 0);
11787 /* Only some unspecs are valid as "constants". */
11788 if (GET_CODE (inner) == UNSPEC)
11789 switch (XINT (inner, 1))
11792 case UNSPEC_GOTOFF:
11793 case UNSPEC_PLTOFF:
11794 return TARGET_64BIT;
11796 x = XVECEXP (inner, 0, 0);
11797 return (GET_CODE (x) == SYMBOL_REF
11798 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11799 case UNSPEC_MACHOPIC_OFFSET:
11800 return legitimate_pic_address_disp_p (x);
11808 return legitimate_pic_address_disp_p (x);
11815 /* Determine if a given CONST RTX is a valid memory displacement
11819 legitimate_pic_address_disp_p (rtx disp)
11823 /* In 64bit mode we can allow direct addresses of symbols and labels
11824 when they are not dynamic symbols. */
11827 rtx op0 = disp, op1;
11829 switch (GET_CODE (disp))
11835 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11837 op0 = XEXP (XEXP (disp, 0), 0);
11838 op1 = XEXP (XEXP (disp, 0), 1);
11839 if (!CONST_INT_P (op1)
11840 || INTVAL (op1) >= 16*1024*1024
11841 || INTVAL (op1) < -16*1024*1024)
11843 if (GET_CODE (op0) == LABEL_REF)
11845 if (GET_CODE (op0) != SYMBOL_REF)
11850 /* TLS references should always be enclosed in UNSPEC. */
11851 if (SYMBOL_REF_TLS_MODEL (op0))
11853 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11854 && ix86_cmodel != CM_LARGE_PIC)
11862 if (GET_CODE (disp) != CONST)
11864 disp = XEXP (disp, 0);
11868 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11869 of GOT tables. We should not need these anyway. */
11870 if (GET_CODE (disp) != UNSPEC
11871 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11872 && XINT (disp, 1) != UNSPEC_GOTOFF
11873 && XINT (disp, 1) != UNSPEC_PCREL
11874 && XINT (disp, 1) != UNSPEC_PLTOFF))
11877 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11878 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11884 if (GET_CODE (disp) == PLUS)
11886 if (!CONST_INT_P (XEXP (disp, 1)))
11888 disp = XEXP (disp, 0);
11892 if (TARGET_MACHO && darwin_local_data_pic (disp))
11895 if (GET_CODE (disp) != UNSPEC)
11898 switch (XINT (disp, 1))
11903 /* We need to check for both symbols and labels because VxWorks loads
11904 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11906 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11907 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11908 case UNSPEC_GOTOFF:
11909 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11910 While ABI specify also 32bit relocation but we don't produce it in
11911 small PIC model at all. */
11912 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11913 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11915 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11917 case UNSPEC_GOTTPOFF:
11918 case UNSPEC_GOTNTPOFF:
11919 case UNSPEC_INDNTPOFF:
11922 disp = XVECEXP (disp, 0, 0);
11923 return (GET_CODE (disp) == SYMBOL_REF
11924 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11925 case UNSPEC_NTPOFF:
11926 disp = XVECEXP (disp, 0, 0);
11927 return (GET_CODE (disp) == SYMBOL_REF
11928 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11929 case UNSPEC_DTPOFF:
11930 disp = XVECEXP (disp, 0, 0);
11931 return (GET_CODE (disp) == SYMBOL_REF
11932 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11938 /* Recognizes RTL expressions that are valid memory addresses for an
11939 instruction. The MODE argument is the machine mode for the MEM
11940 expression that wants to use this address.
11942 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11943 convert common non-canonical forms to canonical form so that they will
11947 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11948 rtx addr, bool strict)
11950 struct ix86_address parts;
11951 rtx base, index, disp;
11952 HOST_WIDE_INT scale;
11954 if (ix86_decompose_address (addr, &parts) <= 0)
11955 /* Decomposition failed. */
11959 index = parts.index;
11961 scale = parts.scale;
11963 /* Validate base register.
11965 Don't allow SUBREG's that span more than a word here. It can lead to spill
11966 failures when the base is one word out of a two word structure, which is
11967 represented internally as a DImode int. */
11975 else if (GET_CODE (base) == SUBREG
11976 && REG_P (SUBREG_REG (base))
11977 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11979 reg = SUBREG_REG (base);
11981 /* Base is not a register. */
11984 if (GET_MODE (base) != Pmode)
11985 /* Base is not in Pmode. */
11988 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11989 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11990 /* Base is not valid. */
11994 /* Validate index register.
11996 Don't allow SUBREG's that span more than a word here -- same as above. */
12004 else if (GET_CODE (index) == SUBREG
12005 && REG_P (SUBREG_REG (index))
12006 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12008 reg = SUBREG_REG (index);
12010 /* Index is not a register. */
12013 if (GET_MODE (index) != Pmode)
12014 /* Index is not in Pmode. */
12017 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12018 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12019 /* Index is not valid. */
12023 /* Validate scale factor. */
12027 /* Scale without index. */
12030 if (scale != 2 && scale != 4 && scale != 8)
12031 /* Scale is not a valid multiplier. */
12035 /* Validate displacement. */
12038 if (GET_CODE (disp) == CONST
12039 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12040 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12041 switch (XINT (XEXP (disp, 0), 1))
12043 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12044 used. While ABI specify also 32bit relocations, we don't produce
12045 them at all and use IP relative instead. */
12047 case UNSPEC_GOTOFF:
12048 gcc_assert (flag_pic);
12050 goto is_legitimate_pic;
12052 /* 64bit address unspec. */
12055 case UNSPEC_GOTPCREL:
12057 gcc_assert (flag_pic);
12058 goto is_legitimate_pic;
12060 case UNSPEC_GOTTPOFF:
12061 case UNSPEC_GOTNTPOFF:
12062 case UNSPEC_INDNTPOFF:
12063 case UNSPEC_NTPOFF:
12064 case UNSPEC_DTPOFF:
12067 case UNSPEC_STACK_CHECK:
12068 gcc_assert (flag_split_stack);
12072 /* Invalid address unspec. */
12076 else if (SYMBOLIC_CONST (disp)
12080 && MACHOPIC_INDIRECT
12081 && !machopic_operand_p (disp)
12087 if (TARGET_64BIT && (index || base))
12089 /* foo@dtpoff(%rX) is ok. */
12090 if (GET_CODE (disp) != CONST
12091 || GET_CODE (XEXP (disp, 0)) != PLUS
12092 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12093 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12094 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12095 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12096 /* Non-constant pic memory reference. */
12099 else if ((!TARGET_MACHO || flag_pic)
12100 && ! legitimate_pic_address_disp_p (disp))
12101 /* Displacement is an invalid pic construct. */
12104 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12105 /* displacment must be referenced via non_lazy_pointer */
12109 /* This code used to verify that a symbolic pic displacement
12110 includes the pic_offset_table_rtx register.
12112 While this is good idea, unfortunately these constructs may
12113 be created by "adds using lea" optimization for incorrect
12122 This code is nonsensical, but results in addressing
12123 GOT table with pic_offset_table_rtx base. We can't
12124 just refuse it easily, since it gets matched by
12125 "addsi3" pattern, that later gets split to lea in the
12126 case output register differs from input. While this
12127 can be handled by separate addsi pattern for this case
12128 that never results in lea, this seems to be easier and
12129 correct fix for crash to disable this test. */
12131 else if (GET_CODE (disp) != LABEL_REF
12132 && !CONST_INT_P (disp)
12133 && (GET_CODE (disp) != CONST
12134 || !legitimate_constant_p (disp))
12135 && (GET_CODE (disp) != SYMBOL_REF
12136 || !legitimate_constant_p (disp)))
12137 /* Displacement is not constant. */
12139 else if (TARGET_64BIT
12140 && !x86_64_immediate_operand (disp, VOIDmode))
12141 /* Displacement is out of range. */
12145 /* Everything looks valid. */
12149 /* Determine if a given RTX is a valid constant address. */
12152 constant_address_p (rtx x)
12154 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12157 /* Return a unique alias set for the GOT. */
12159 static alias_set_type
12160 ix86_GOT_alias_set (void)
12162 static alias_set_type set = -1;
12164 set = new_alias_set ();
12168 /* Return a legitimate reference for ORIG (an address) using the
12169 register REG. If REG is 0, a new pseudo is generated.
12171 There are two types of references that must be handled:
12173 1. Global data references must load the address from the GOT, via
12174 the PIC reg. An insn is emitted to do this load, and the reg is
12177 2. Static data references, constant pool addresses, and code labels
12178 compute the address as an offset from the GOT, whose base is in
12179 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12180 differentiate them from global data objects. The returned
12181 address is the PIC reg + an unspec constant.
12183 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12184 reg also appears in the address. */
12187 legitimize_pic_address (rtx orig, rtx reg)
12190 rtx new_rtx = orig;
12194 if (TARGET_MACHO && !TARGET_64BIT)
12197 reg = gen_reg_rtx (Pmode);
12198 /* Use the generic Mach-O PIC machinery. */
12199 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12203 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12205 else if (TARGET_64BIT
12206 && ix86_cmodel != CM_SMALL_PIC
12207 && gotoff_operand (addr, Pmode))
12210 /* This symbol may be referenced via a displacement from the PIC
12211 base address (@GOTOFF). */
12213 if (reload_in_progress)
12214 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12215 if (GET_CODE (addr) == CONST)
12216 addr = XEXP (addr, 0);
12217 if (GET_CODE (addr) == PLUS)
12219 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12221 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12224 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12225 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12227 tmpreg = gen_reg_rtx (Pmode);
12230 emit_move_insn (tmpreg, new_rtx);
12234 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12235 tmpreg, 1, OPTAB_DIRECT);
12238 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12240 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12242 /* This symbol may be referenced via a displacement from the PIC
12243 base address (@GOTOFF). */
12245 if (reload_in_progress)
12246 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12247 if (GET_CODE (addr) == CONST)
12248 addr = XEXP (addr, 0);
12249 if (GET_CODE (addr) == PLUS)
12251 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12253 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12256 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12257 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12258 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12262 emit_move_insn (reg, new_rtx);
12266 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12267 /* We can't use @GOTOFF for text labels on VxWorks;
12268 see gotoff_operand. */
12269 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12271 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12273 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12274 return legitimize_dllimport_symbol (addr, true);
12275 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12276 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12277 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12279 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12280 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12284 /* For x64 PE-COFF there is no GOT table. So we use address
12286 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12288 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12289 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12292 reg = gen_reg_rtx (Pmode);
12293 emit_move_insn (reg, new_rtx);
12296 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12298 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12299 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12300 new_rtx = gen_const_mem (Pmode, new_rtx);
12301 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12304 reg = gen_reg_rtx (Pmode);
12305 /* Use directly gen_movsi, otherwise the address is loaded
12306 into register for CSE. We don't want to CSE this addresses,
12307 instead we CSE addresses from the GOT table, so skip this. */
12308 emit_insn (gen_movsi (reg, new_rtx));
12313 /* This symbol must be referenced via a load from the
12314 Global Offset Table (@GOT). */
12316 if (reload_in_progress)
12317 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12318 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12319 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12321 new_rtx = force_reg (Pmode, new_rtx);
12322 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12323 new_rtx = gen_const_mem (Pmode, new_rtx);
12324 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12327 reg = gen_reg_rtx (Pmode);
12328 emit_move_insn (reg, new_rtx);
12334 if (CONST_INT_P (addr)
12335 && !x86_64_immediate_operand (addr, VOIDmode))
12339 emit_move_insn (reg, addr);
12343 new_rtx = force_reg (Pmode, addr);
12345 else if (GET_CODE (addr) == CONST)
12347 addr = XEXP (addr, 0);
12349 /* We must match stuff we generate before. Assume the only
12350 unspecs that can get here are ours. Not that we could do
12351 anything with them anyway.... */
12352 if (GET_CODE (addr) == UNSPEC
12353 || (GET_CODE (addr) == PLUS
12354 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12356 gcc_assert (GET_CODE (addr) == PLUS);
12358 if (GET_CODE (addr) == PLUS)
12360 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12362 /* Check first to see if this is a constant offset from a @GOTOFF
12363 symbol reference. */
12364 if (gotoff_operand (op0, Pmode)
12365 && CONST_INT_P (op1))
12369 if (reload_in_progress)
12370 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12371 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12373 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12374 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12375 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12379 emit_move_insn (reg, new_rtx);
12385 if (INTVAL (op1) < -16*1024*1024
12386 || INTVAL (op1) >= 16*1024*1024)
12388 if (!x86_64_immediate_operand (op1, Pmode))
12389 op1 = force_reg (Pmode, op1);
12390 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12396 base = legitimize_pic_address (XEXP (addr, 0), reg);
12397 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12398 base == reg ? NULL_RTX : reg);
12400 if (CONST_INT_P (new_rtx))
12401 new_rtx = plus_constant (base, INTVAL (new_rtx));
12404 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12406 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12407 new_rtx = XEXP (new_rtx, 1);
12409 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12417 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12420 get_thread_pointer (int to_reg)
12424 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12428 reg = gen_reg_rtx (Pmode);
12429 insn = gen_rtx_SET (VOIDmode, reg, tp);
12430 insn = emit_insn (insn);
12435 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12436 false if we expect this to be used for a memory address and true if
12437 we expect to load the address into a register. */
12440 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12442 rtx dest, base, off, pic, tp;
12447 case TLS_MODEL_GLOBAL_DYNAMIC:
12448 dest = gen_reg_rtx (Pmode);
12449 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12451 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12453 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12456 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12457 insns = get_insns ();
12460 RTL_CONST_CALL_P (insns) = 1;
12461 emit_libcall_block (insns, dest, rax, x);
12463 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12464 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12466 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12468 if (TARGET_GNU2_TLS)
12470 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12472 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12476 case TLS_MODEL_LOCAL_DYNAMIC:
12477 base = gen_reg_rtx (Pmode);
12478 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12480 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12482 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12485 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12486 insns = get_insns ();
12489 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12490 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12491 RTL_CONST_CALL_P (insns) = 1;
12492 emit_libcall_block (insns, base, rax, note);
12494 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12495 emit_insn (gen_tls_local_dynamic_base_64 (base));
12497 emit_insn (gen_tls_local_dynamic_base_32 (base));
12499 if (TARGET_GNU2_TLS)
12501 rtx x = ix86_tls_module_base ();
12503 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12504 gen_rtx_MINUS (Pmode, x, tp));
12507 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12508 off = gen_rtx_CONST (Pmode, off);
12510 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12512 if (TARGET_GNU2_TLS)
12514 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12516 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12521 case TLS_MODEL_INITIAL_EXEC:
12525 type = UNSPEC_GOTNTPOFF;
12529 if (reload_in_progress)
12530 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12531 pic = pic_offset_table_rtx;
12532 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12534 else if (!TARGET_ANY_GNU_TLS)
12536 pic = gen_reg_rtx (Pmode);
12537 emit_insn (gen_set_got (pic));
12538 type = UNSPEC_GOTTPOFF;
12543 type = UNSPEC_INDNTPOFF;
12546 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12547 off = gen_rtx_CONST (Pmode, off);
12549 off = gen_rtx_PLUS (Pmode, pic, off);
12550 off = gen_const_mem (Pmode, off);
12551 set_mem_alias_set (off, ix86_GOT_alias_set ());
12553 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12555 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12556 off = force_reg (Pmode, off);
12557 return gen_rtx_PLUS (Pmode, base, off);
12561 base = get_thread_pointer (true);
12562 dest = gen_reg_rtx (Pmode);
12563 emit_insn (gen_subsi3 (dest, base, off));
12567 case TLS_MODEL_LOCAL_EXEC:
12568 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12569 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12570 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12571 off = gen_rtx_CONST (Pmode, off);
12573 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12575 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12576 return gen_rtx_PLUS (Pmode, base, off);
12580 base = get_thread_pointer (true);
12581 dest = gen_reg_rtx (Pmode);
12582 emit_insn (gen_subsi3 (dest, base, off));
12587 gcc_unreachable ();
12593 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12596 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12597 htab_t dllimport_map;
12600 get_dllimport_decl (tree decl)
12602 struct tree_map *h, in;
12605 const char *prefix;
12606 size_t namelen, prefixlen;
12611 if (!dllimport_map)
12612 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12614 in.hash = htab_hash_pointer (decl);
12615 in.base.from = decl;
12616 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12617 h = (struct tree_map *) *loc;
12621 *loc = h = ggc_alloc_tree_map ();
12623 h->base.from = decl;
12624 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12625 VAR_DECL, NULL, ptr_type_node);
12626 DECL_ARTIFICIAL (to) = 1;
12627 DECL_IGNORED_P (to) = 1;
12628 DECL_EXTERNAL (to) = 1;
12629 TREE_READONLY (to) = 1;
12631 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12632 name = targetm.strip_name_encoding (name);
12633 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12634 ? "*__imp_" : "*__imp__";
12635 namelen = strlen (name);
12636 prefixlen = strlen (prefix);
12637 imp_name = (char *) alloca (namelen + prefixlen + 1);
12638 memcpy (imp_name, prefix, prefixlen);
12639 memcpy (imp_name + prefixlen, name, namelen + 1);
12641 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12642 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12643 SET_SYMBOL_REF_DECL (rtl, to);
12644 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12646 rtl = gen_const_mem (Pmode, rtl);
12647 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12649 SET_DECL_RTL (to, rtl);
12650 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12655 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12656 true if we require the result be a register. */
12659 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12664 gcc_assert (SYMBOL_REF_DECL (symbol));
12665 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12667 x = DECL_RTL (imp_decl);
12669 x = force_reg (Pmode, x);
12673 /* Try machine-dependent ways of modifying an illegitimate address
12674 to be legitimate. If we find one, return the new, valid address.
12675 This macro is used in only one place: `memory_address' in explow.c.
12677 OLDX is the address as it was before break_out_memory_refs was called.
12678 In some cases it is useful to look at this to decide what needs to be done.
12680 It is always safe for this macro to do nothing. It exists to recognize
12681 opportunities to optimize the output.
12683 For the 80386, we handle X+REG by loading X into a register R and
12684 using R+REG. R will go in a general reg and indexing will be used.
12685 However, if REG is a broken-out memory address or multiplication,
12686 nothing needs to be done because REG can certainly go in a general reg.
12688 When -fpic is used, special handling is needed for symbolic references.
12689 See comments by legitimize_pic_address in i386.c for details. */
12692 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12693 enum machine_mode mode)
12698 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12700 return legitimize_tls_address (x, (enum tls_model) log, false);
12701 if (GET_CODE (x) == CONST
12702 && GET_CODE (XEXP (x, 0)) == PLUS
12703 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12704 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12706 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12707 (enum tls_model) log, false);
12708 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12711 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12713 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12714 return legitimize_dllimport_symbol (x, true);
12715 if (GET_CODE (x) == CONST
12716 && GET_CODE (XEXP (x, 0)) == PLUS
12717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12718 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12720 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12721 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12725 if (flag_pic && SYMBOLIC_CONST (x))
12726 return legitimize_pic_address (x, 0);
12729 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12730 return machopic_indirect_data_reference (x, 0);
12733 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12734 if (GET_CODE (x) == ASHIFT
12735 && CONST_INT_P (XEXP (x, 1))
12736 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12739 log = INTVAL (XEXP (x, 1));
12740 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12741 GEN_INT (1 << log));
12744 if (GET_CODE (x) == PLUS)
12746 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12748 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12749 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12750 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12753 log = INTVAL (XEXP (XEXP (x, 0), 1));
12754 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12755 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12756 GEN_INT (1 << log));
12759 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12760 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12761 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12764 log = INTVAL (XEXP (XEXP (x, 1), 1));
12765 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12766 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12767 GEN_INT (1 << log));
12770 /* Put multiply first if it isn't already. */
12771 if (GET_CODE (XEXP (x, 1)) == MULT)
12773 rtx tmp = XEXP (x, 0);
12774 XEXP (x, 0) = XEXP (x, 1);
12779 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12780 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12781 created by virtual register instantiation, register elimination, and
12782 similar optimizations. */
12783 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12786 x = gen_rtx_PLUS (Pmode,
12787 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12788 XEXP (XEXP (x, 1), 0)),
12789 XEXP (XEXP (x, 1), 1));
12793 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12794 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12795 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12796 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12797 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12798 && CONSTANT_P (XEXP (x, 1)))
12801 rtx other = NULL_RTX;
12803 if (CONST_INT_P (XEXP (x, 1)))
12805 constant = XEXP (x, 1);
12806 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12808 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12810 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12811 other = XEXP (x, 1);
12819 x = gen_rtx_PLUS (Pmode,
12820 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12821 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12822 plus_constant (other, INTVAL (constant)));
12826 if (changed && ix86_legitimate_address_p (mode, x, false))
12829 if (GET_CODE (XEXP (x, 0)) == MULT)
12832 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12835 if (GET_CODE (XEXP (x, 1)) == MULT)
12838 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12842 && REG_P (XEXP (x, 1))
12843 && REG_P (XEXP (x, 0)))
12846 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12849 x = legitimize_pic_address (x, 0);
12852 if (changed && ix86_legitimate_address_p (mode, x, false))
12855 if (REG_P (XEXP (x, 0)))
12857 rtx temp = gen_reg_rtx (Pmode);
12858 rtx val = force_operand (XEXP (x, 1), temp);
12860 emit_move_insn (temp, val);
12862 XEXP (x, 1) = temp;
12866 else if (REG_P (XEXP (x, 1)))
12868 rtx temp = gen_reg_rtx (Pmode);
12869 rtx val = force_operand (XEXP (x, 0), temp);
12871 emit_move_insn (temp, val);
12873 XEXP (x, 0) = temp;
12881 /* Print an integer constant expression in assembler syntax. Addition
12882 and subtraction are the only arithmetic that may appear in these
12883 expressions. FILE is the stdio stream to write to, X is the rtx, and
12884 CODE is the operand print code from the output string. */
12887 output_pic_addr_const (FILE *file, rtx x, int code)
12891 switch (GET_CODE (x))
12894 gcc_assert (flag_pic);
12899 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12900 output_addr_const (file, x);
12903 const char *name = XSTR (x, 0);
12905 /* Mark the decl as referenced so that cgraph will
12906 output the function. */
12907 if (SYMBOL_REF_DECL (x))
12908 mark_decl_referenced (SYMBOL_REF_DECL (x));
12911 if (MACHOPIC_INDIRECT
12912 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12913 name = machopic_indirection_name (x, /*stub_p=*/true);
12915 assemble_name (file, name);
12917 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12918 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12919 fputs ("@PLT", file);
12926 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12927 assemble_name (asm_out_file, buf);
12931 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12935 /* This used to output parentheses around the expression,
12936 but that does not work on the 386 (either ATT or BSD assembler). */
12937 output_pic_addr_const (file, XEXP (x, 0), code);
12941 if (GET_MODE (x) == VOIDmode)
12943 /* We can use %d if the number is <32 bits and positive. */
12944 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12945 fprintf (file, "0x%lx%08lx",
12946 (unsigned long) CONST_DOUBLE_HIGH (x),
12947 (unsigned long) CONST_DOUBLE_LOW (x));
12949 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12952 /* We can't handle floating point constants;
12953 TARGET_PRINT_OPERAND must handle them. */
12954 output_operand_lossage ("floating constant misused");
12958 /* Some assemblers need integer constants to appear first. */
12959 if (CONST_INT_P (XEXP (x, 0)))
12961 output_pic_addr_const (file, XEXP (x, 0), code);
12963 output_pic_addr_const (file, XEXP (x, 1), code);
12967 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12968 output_pic_addr_const (file, XEXP (x, 1), code);
12970 output_pic_addr_const (file, XEXP (x, 0), code);
12976 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12977 output_pic_addr_const (file, XEXP (x, 0), code);
12979 output_pic_addr_const (file, XEXP (x, 1), code);
12981 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12985 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12987 bool f = i386_asm_output_addr_const_extra (file, x);
12992 gcc_assert (XVECLEN (x, 0) == 1);
12993 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12994 switch (XINT (x, 1))
12997 fputs ("@GOT", file);
12999 case UNSPEC_GOTOFF:
13000 fputs ("@GOTOFF", file);
13002 case UNSPEC_PLTOFF:
13003 fputs ("@PLTOFF", file);
13006 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13007 "(%rip)" : "[rip]", file);
13009 case UNSPEC_GOTPCREL:
13010 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13011 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13013 case UNSPEC_GOTTPOFF:
13014 /* FIXME: This might be @TPOFF in Sun ld too. */
13015 fputs ("@gottpoff", file);
13018 fputs ("@tpoff", file);
13020 case UNSPEC_NTPOFF:
13022 fputs ("@tpoff", file);
13024 fputs ("@ntpoff", file);
13026 case UNSPEC_DTPOFF:
13027 fputs ("@dtpoff", file);
13029 case UNSPEC_GOTNTPOFF:
13031 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13032 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13034 fputs ("@gotntpoff", file);
13036 case UNSPEC_INDNTPOFF:
13037 fputs ("@indntpoff", file);
13040 case UNSPEC_MACHOPIC_OFFSET:
13042 machopic_output_function_base_name (file);
13046 output_operand_lossage ("invalid UNSPEC as operand");
13052 output_operand_lossage ("invalid expression as operand");
13056 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13057 We need to emit DTP-relative relocations. */
13059 static void ATTRIBUTE_UNUSED
13060 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13062 fputs (ASM_LONG, file);
13063 output_addr_const (file, x);
13064 fputs ("@dtpoff", file);
13070 fputs (", 0", file);
13073 gcc_unreachable ();
13077 /* Return true if X is a representation of the PIC register. This copes
13078 with calls from ix86_find_base_term, where the register might have
13079 been replaced by a cselib value. */
13082 ix86_pic_register_p (rtx x)
13084 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13085 return (pic_offset_table_rtx
13086 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13088 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13091 /* Helper function for ix86_delegitimize_address.
13092 Attempt to delegitimize TLS local-exec accesses. */
13095 ix86_delegitimize_tls_address (rtx orig_x)
13097 rtx x = orig_x, unspec;
13098 struct ix86_address addr;
13100 if (!TARGET_TLS_DIRECT_SEG_REFS)
13104 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13106 if (ix86_decompose_address (x, &addr) == 0
13107 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13108 || addr.disp == NULL_RTX
13109 || GET_CODE (addr.disp) != CONST)
13111 unspec = XEXP (addr.disp, 0);
13112 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13113 unspec = XEXP (unspec, 0);
13114 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13116 x = XVECEXP (unspec, 0, 0);
13117 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13118 if (unspec != XEXP (addr.disp, 0))
13119 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13122 rtx idx = addr.index;
13123 if (addr.scale != 1)
13124 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13125 x = gen_rtx_PLUS (Pmode, idx, x);
13128 x = gen_rtx_PLUS (Pmode, addr.base, x);
13129 if (MEM_P (orig_x))
13130 x = replace_equiv_address_nv (orig_x, x);
13134 /* In the name of slightly smaller debug output, and to cater to
13135 general assembler lossage, recognize PIC+GOTOFF and turn it back
13136 into a direct symbol reference.
13138 On Darwin, this is necessary to avoid a crash, because Darwin
13139 has a different PIC label for each routine but the DWARF debugging
13140 information is not associated with any particular routine, so it's
13141 necessary to remove references to the PIC label from RTL stored by
13142 the DWARF output code. */
13145 ix86_delegitimize_address (rtx x)
13147 rtx orig_x = delegitimize_mem_from_attrs (x);
13148 /* addend is NULL or some rtx if x is something+GOTOFF where
13149 something doesn't include the PIC register. */
13150 rtx addend = NULL_RTX;
13151 /* reg_addend is NULL or a multiple of some register. */
13152 rtx reg_addend = NULL_RTX;
13153 /* const_addend is NULL or a const_int. */
13154 rtx const_addend = NULL_RTX;
13155 /* This is the result, or NULL. */
13156 rtx result = NULL_RTX;
13165 if (GET_CODE (x) != CONST
13166 || GET_CODE (XEXP (x, 0)) != UNSPEC
13167 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13168 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13169 || !MEM_P (orig_x))
13170 return ix86_delegitimize_tls_address (orig_x);
13171 x = XVECEXP (XEXP (x, 0), 0, 0);
13172 if (GET_MODE (orig_x) != Pmode)
13173 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13177 if (GET_CODE (x) != PLUS
13178 || GET_CODE (XEXP (x, 1)) != CONST)
13179 return ix86_delegitimize_tls_address (orig_x);
13181 if (ix86_pic_register_p (XEXP (x, 0)))
13182 /* %ebx + GOT/GOTOFF */
13184 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13186 /* %ebx + %reg * scale + GOT/GOTOFF */
13187 reg_addend = XEXP (x, 0);
13188 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13189 reg_addend = XEXP (reg_addend, 1);
13190 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13191 reg_addend = XEXP (reg_addend, 0);
13194 reg_addend = NULL_RTX;
13195 addend = XEXP (x, 0);
13199 addend = XEXP (x, 0);
13201 x = XEXP (XEXP (x, 1), 0);
13202 if (GET_CODE (x) == PLUS
13203 && CONST_INT_P (XEXP (x, 1)))
13205 const_addend = XEXP (x, 1);
13209 if (GET_CODE (x) == UNSPEC
13210 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13211 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13212 result = XVECEXP (x, 0, 0);
13214 if (TARGET_MACHO && darwin_local_data_pic (x)
13215 && !MEM_P (orig_x))
13216 result = XVECEXP (x, 0, 0);
13219 return ix86_delegitimize_tls_address (orig_x);
13222 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13224 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13227 /* If the rest of original X doesn't involve the PIC register, add
13228 addend and subtract pic_offset_table_rtx. This can happen e.g.
13230 leal (%ebx, %ecx, 4), %ecx
13232 movl foo@GOTOFF(%ecx), %edx
13233 in which case we return (%ecx - %ebx) + foo. */
13234 if (pic_offset_table_rtx)
13235 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13236 pic_offset_table_rtx),
13241 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13242 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13246 /* If X is a machine specific address (i.e. a symbol or label being
13247 referenced as a displacement from the GOT implemented using an
13248 UNSPEC), then return the base term. Otherwise return X. */
13251 ix86_find_base_term (rtx x)
13257 if (GET_CODE (x) != CONST)
13259 term = XEXP (x, 0);
13260 if (GET_CODE (term) == PLUS
13261 && (CONST_INT_P (XEXP (term, 1))
13262 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13263 term = XEXP (term, 0);
13264 if (GET_CODE (term) != UNSPEC
13265 || (XINT (term, 1) != UNSPEC_GOTPCREL
13266 && XINT (term, 1) != UNSPEC_PCREL))
13269 return XVECEXP (term, 0, 0);
13272 return ix86_delegitimize_address (x);
13276 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13277 int fp, FILE *file)
13279 const char *suffix;
13281 if (mode == CCFPmode || mode == CCFPUmode)
13283 code = ix86_fp_compare_code_to_integer (code);
13287 code = reverse_condition (code);
13338 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13342 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13343 Those same assemblers have the same but opposite lossage on cmov. */
13344 if (mode == CCmode)
13345 suffix = fp ? "nbe" : "a";
13346 else if (mode == CCCmode)
13349 gcc_unreachable ();
13365 gcc_unreachable ();
13369 gcc_assert (mode == CCmode || mode == CCCmode);
13386 gcc_unreachable ();
13390 /* ??? As above. */
13391 gcc_assert (mode == CCmode || mode == CCCmode);
13392 suffix = fp ? "nb" : "ae";
13395 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13399 /* ??? As above. */
13400 if (mode == CCmode)
13402 else if (mode == CCCmode)
13403 suffix = fp ? "nb" : "ae";
13405 gcc_unreachable ();
13408 suffix = fp ? "u" : "p";
13411 suffix = fp ? "nu" : "np";
13414 gcc_unreachable ();
13416 fputs (suffix, file);
13419 /* Print the name of register X to FILE based on its machine mode and number.
13420 If CODE is 'w', pretend the mode is HImode.
13421 If CODE is 'b', pretend the mode is QImode.
13422 If CODE is 'k', pretend the mode is SImode.
13423 If CODE is 'q', pretend the mode is DImode.
13424 If CODE is 'x', pretend the mode is V4SFmode.
13425 If CODE is 't', pretend the mode is V8SFmode.
13426 If CODE is 'h', pretend the reg is the 'high' byte register.
13427 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13428 If CODE is 'd', duplicate the operand for AVX instruction.
13432 print_reg (rtx x, int code, FILE *file)
13435 bool duplicated = code == 'd' && TARGET_AVX;
13437 gcc_assert (x == pc_rtx
13438 || (REGNO (x) != ARG_POINTER_REGNUM
13439 && REGNO (x) != FRAME_POINTER_REGNUM
13440 && REGNO (x) != FLAGS_REG
13441 && REGNO (x) != FPSR_REG
13442 && REGNO (x) != FPCR_REG));
13444 if (ASSEMBLER_DIALECT == ASM_ATT)
13449 gcc_assert (TARGET_64BIT);
13450 fputs ("rip", file);
13454 if (code == 'w' || MMX_REG_P (x))
13456 else if (code == 'b')
13458 else if (code == 'k')
13460 else if (code == 'q')
13462 else if (code == 'y')
13464 else if (code == 'h')
13466 else if (code == 'x')
13468 else if (code == 't')
13471 code = GET_MODE_SIZE (GET_MODE (x));
13473 /* Irritatingly, AMD extended registers use different naming convention
13474 from the normal registers. */
13475 if (REX_INT_REG_P (x))
13477 gcc_assert (TARGET_64BIT);
13481 error ("extended registers have no high halves");
13484 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13487 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13490 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13493 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13496 error ("unsupported operand size for extended register");
13506 if (STACK_TOP_P (x))
13515 if (! ANY_FP_REG_P (x))
13516 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13521 reg = hi_reg_name[REGNO (x)];
13524 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13526 reg = qi_reg_name[REGNO (x)];
13529 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13531 reg = qi_high_reg_name[REGNO (x)];
13536 gcc_assert (!duplicated);
13538 fputs (hi_reg_name[REGNO (x)] + 1, file);
13543 gcc_unreachable ();
13549 if (ASSEMBLER_DIALECT == ASM_ATT)
13550 fprintf (file, ", %%%s", reg);
13552 fprintf (file, ", %s", reg);
13556 /* Locate some local-dynamic symbol still in use by this function
13557 so that we can print its name in some tls_local_dynamic_base
13561 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13565 if (GET_CODE (x) == SYMBOL_REF
13566 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13568 cfun->machine->some_ld_name = XSTR (x, 0);
13575 static const char *
13576 get_some_local_dynamic_name (void)
13580 if (cfun->machine->some_ld_name)
13581 return cfun->machine->some_ld_name;
13583 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13584 if (NONDEBUG_INSN_P (insn)
13585 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13586 return cfun->machine->some_ld_name;
13591 /* Meaning of CODE:
13592 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13593 C -- print opcode suffix for set/cmov insn.
13594 c -- like C, but print reversed condition
13595 F,f -- likewise, but for floating-point.
13596 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13598 R -- print the prefix for register names.
13599 z -- print the opcode suffix for the size of the current operand.
13600 Z -- likewise, with special suffixes for x87 instructions.
13601 * -- print a star (in certain assembler syntax)
13602 A -- print an absolute memory reference.
13603 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13604 s -- print a shift double count, followed by the assemblers argument
13606 b -- print the QImode name of the register for the indicated operand.
13607 %b0 would print %al if operands[0] is reg 0.
13608 w -- likewise, print the HImode name of the register.
13609 k -- likewise, print the SImode name of the register.
13610 q -- likewise, print the DImode name of the register.
13611 x -- likewise, print the V4SFmode name of the register.
13612 t -- likewise, print the V8SFmode name of the register.
13613 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13614 y -- print "st(0)" instead of "st" as a register.
13615 d -- print duplicated register operand for AVX instruction.
13616 D -- print condition for SSE cmp instruction.
13617 P -- if PIC, print an @PLT suffix.
13618 X -- don't print any sort of PIC '@' suffix for a symbol.
13619 & -- print some in-use local-dynamic symbol name.
13620 H -- print a memory address offset by 8; used for sse high-parts
13621 Y -- print condition for XOP pcom* instruction.
13622 + -- print a branch hint as 'cs' or 'ds' prefix
13623 ; -- print a semicolon (after prefixes due to bug in older gas).
13624 @ -- print a segment register of thread base pointer load
13628 ix86_print_operand (FILE *file, rtx x, int code)
13635 if (ASSEMBLER_DIALECT == ASM_ATT)
13641 const char *name = get_some_local_dynamic_name ();
13643 output_operand_lossage ("'%%&' used without any "
13644 "local dynamic TLS references");
13646 assemble_name (file, name);
13651 switch (ASSEMBLER_DIALECT)
13658 /* Intel syntax. For absolute addresses, registers should not
13659 be surrounded by braces. */
13663 ix86_print_operand (file, x, 0);
13670 gcc_unreachable ();
13673 ix86_print_operand (file, x, 0);
13678 if (ASSEMBLER_DIALECT == ASM_ATT)
13683 if (ASSEMBLER_DIALECT == ASM_ATT)
13688 if (ASSEMBLER_DIALECT == ASM_ATT)
13693 if (ASSEMBLER_DIALECT == ASM_ATT)
13698 if (ASSEMBLER_DIALECT == ASM_ATT)
13703 if (ASSEMBLER_DIALECT == ASM_ATT)
13708 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13710 /* Opcodes don't get size suffixes if using Intel opcodes. */
13711 if (ASSEMBLER_DIALECT == ASM_INTEL)
13714 switch (GET_MODE_SIZE (GET_MODE (x)))
13733 output_operand_lossage
13734 ("invalid operand size for operand code '%c'", code);
13739 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13741 (0, "non-integer operand used with operand code '%c'", code);
13745 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13746 if (ASSEMBLER_DIALECT == ASM_INTEL)
13749 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13751 switch (GET_MODE_SIZE (GET_MODE (x)))
13754 #ifdef HAVE_AS_IX86_FILDS
13764 #ifdef HAVE_AS_IX86_FILDQ
13767 fputs ("ll", file);
13775 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13777 /* 387 opcodes don't get size suffixes
13778 if the operands are registers. */
13779 if (STACK_REG_P (x))
13782 switch (GET_MODE_SIZE (GET_MODE (x)))
13803 output_operand_lossage
13804 ("invalid operand type used with operand code '%c'", code);
13808 output_operand_lossage
13809 ("invalid operand size for operand code '%c'", code);
13826 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13828 ix86_print_operand (file, x, 0);
13829 fputs (", ", file);
13834 /* Little bit of braindamage here. The SSE compare instructions
13835 does use completely different names for the comparisons that the
13836 fp conditional moves. */
13839 switch (GET_CODE (x))
13842 fputs ("eq", file);
13845 fputs ("eq_us", file);
13848 fputs ("lt", file);
13851 fputs ("nge", file);
13854 fputs ("le", file);
13857 fputs ("ngt", file);
13860 fputs ("unord", file);
13863 fputs ("neq", file);
13866 fputs ("neq_oq", file);
13869 fputs ("ge", file);
13872 fputs ("nlt", file);
13875 fputs ("gt", file);
13878 fputs ("nle", file);
13881 fputs ("ord", file);
13884 output_operand_lossage ("operand is not a condition code, "
13885 "invalid operand code 'D'");
13891 switch (GET_CODE (x))
13895 fputs ("eq", file);
13899 fputs ("lt", file);
13903 fputs ("le", file);
13906 fputs ("unord", file);
13910 fputs ("neq", file);
13914 fputs ("nlt", file);
13918 fputs ("nle", file);
13921 fputs ("ord", file);
13924 output_operand_lossage ("operand is not a condition code, "
13925 "invalid operand code 'D'");
13931 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13932 if (ASSEMBLER_DIALECT == ASM_ATT)
13934 switch (GET_MODE (x))
13936 case HImode: putc ('w', file); break;
13938 case SFmode: putc ('l', file); break;
13940 case DFmode: putc ('q', file); break;
13941 default: gcc_unreachable ();
13948 if (!COMPARISON_P (x))
13950 output_operand_lossage ("operand is neither a constant nor a "
13951 "condition code, invalid operand code "
13955 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13958 if (!COMPARISON_P (x))
13960 output_operand_lossage ("operand is neither a constant nor a "
13961 "condition code, invalid operand code "
13965 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13966 if (ASSEMBLER_DIALECT == ASM_ATT)
13969 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13972 /* Like above, but reverse condition */
13974 /* Check to see if argument to %c is really a constant
13975 and not a condition code which needs to be reversed. */
13976 if (!COMPARISON_P (x))
13978 output_operand_lossage ("operand is neither a constant nor a "
13979 "condition code, invalid operand "
13983 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13986 if (!COMPARISON_P (x))
13988 output_operand_lossage ("operand is neither a constant nor a "
13989 "condition code, invalid operand "
13993 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13994 if (ASSEMBLER_DIALECT == ASM_ATT)
13997 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14001 /* It doesn't actually matter what mode we use here, as we're
14002 only going to use this for printing. */
14003 x = adjust_address_nv (x, DImode, 8);
14011 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14014 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14017 int pred_val = INTVAL (XEXP (x, 0));
14019 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14020 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14022 int taken = pred_val > REG_BR_PROB_BASE / 2;
14023 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14025 /* Emit hints only in the case default branch prediction
14026 heuristics would fail. */
14027 if (taken != cputaken)
14029 /* We use 3e (DS) prefix for taken branches and
14030 2e (CS) prefix for not taken branches. */
14032 fputs ("ds ; ", file);
14034 fputs ("cs ; ", file);
14042 switch (GET_CODE (x))
14045 fputs ("neq", file);
14048 fputs ("eq", file);
14052 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14056 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14060 fputs ("le", file);
14064 fputs ("lt", file);
14067 fputs ("unord", file);
14070 fputs ("ord", file);
14073 fputs ("ueq", file);
14076 fputs ("nlt", file);
14079 fputs ("nle", file);
14082 fputs ("ule", file);
14085 fputs ("ult", file);
14088 fputs ("une", file);
14091 output_operand_lossage ("operand is not a condition code, "
14092 "invalid operand code 'Y'");
14098 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14104 if (ASSEMBLER_DIALECT == ASM_ATT)
14107 /* The kernel uses a different segment register for performance
14108 reasons; a system call would not have to trash the userspace
14109 segment register, which would be expensive. */
14110 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14111 fputs ("fs", file);
14113 fputs ("gs", file);
14117 output_operand_lossage ("invalid operand code '%c'", code);
14122 print_reg (x, code, file);
14124 else if (MEM_P (x))
14126 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14127 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14128 && GET_MODE (x) != BLKmode)
14131 switch (GET_MODE_SIZE (GET_MODE (x)))
14133 case 1: size = "BYTE"; break;
14134 case 2: size = "WORD"; break;
14135 case 4: size = "DWORD"; break;
14136 case 8: size = "QWORD"; break;
14137 case 12: size = "TBYTE"; break;
14139 if (GET_MODE (x) == XFmode)
14144 case 32: size = "YMMWORD"; break;
14146 gcc_unreachable ();
14149 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14152 else if (code == 'w')
14154 else if (code == 'k')
14157 fputs (size, file);
14158 fputs (" PTR ", file);
14162 /* Avoid (%rip) for call operands. */
14163 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14164 && !CONST_INT_P (x))
14165 output_addr_const (file, x);
14166 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14167 output_operand_lossage ("invalid constraints for operand");
14169 output_address (x);
14172 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14177 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14178 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14180 if (ASSEMBLER_DIALECT == ASM_ATT)
14182 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14184 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14186 fprintf (file, "0x%08x", (unsigned int) l);
14189 /* These float cases don't actually occur as immediate operands. */
14190 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14194 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14195 fputs (dstr, file);
14198 else if (GET_CODE (x) == CONST_DOUBLE
14199 && GET_MODE (x) == XFmode)
14203 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14204 fputs (dstr, file);
14209 /* We have patterns that allow zero sets of memory, for instance.
14210 In 64-bit mode, we should probably support all 8-byte vectors,
14211 since we can in fact encode that into an immediate. */
14212 if (GET_CODE (x) == CONST_VECTOR)
14214 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14220 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14222 if (ASSEMBLER_DIALECT == ASM_ATT)
14225 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14226 || GET_CODE (x) == LABEL_REF)
14228 if (ASSEMBLER_DIALECT == ASM_ATT)
14231 fputs ("OFFSET FLAT:", file);
14234 if (CONST_INT_P (x))
14235 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14236 else if (flag_pic || MACHOPIC_INDIRECT)
14237 output_pic_addr_const (file, x, code);
14239 output_addr_const (file, x);
14244 ix86_print_operand_punct_valid_p (unsigned char code)
14246 return (code == '@' || code == '*' || code == '+'
14247 || code == '&' || code == ';');
14250 /* Print a memory operand whose address is ADDR. */
14253 ix86_print_operand_address (FILE *file, rtx addr)
14255 struct ix86_address parts;
14256 rtx base, index, disp;
14258 int ok = ix86_decompose_address (addr, &parts);
14263 index = parts.index;
14265 scale = parts.scale;
14273 if (ASSEMBLER_DIALECT == ASM_ATT)
14275 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14278 gcc_unreachable ();
14281 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14282 if (TARGET_64BIT && !base && !index)
14286 if (GET_CODE (disp) == CONST
14287 && GET_CODE (XEXP (disp, 0)) == PLUS
14288 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14289 symbol = XEXP (XEXP (disp, 0), 0);
14291 if (GET_CODE (symbol) == LABEL_REF
14292 || (GET_CODE (symbol) == SYMBOL_REF
14293 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14296 if (!base && !index)
14298 /* Displacement only requires special attention. */
14300 if (CONST_INT_P (disp))
14302 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14303 fputs ("ds:", file);
14304 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14307 output_pic_addr_const (file, disp, 0);
14309 output_addr_const (file, disp);
14313 if (ASSEMBLER_DIALECT == ASM_ATT)
14318 output_pic_addr_const (file, disp, 0);
14319 else if (GET_CODE (disp) == LABEL_REF)
14320 output_asm_label (disp);
14322 output_addr_const (file, disp);
14327 print_reg (base, 0, file);
14331 print_reg (index, 0, file);
14333 fprintf (file, ",%d", scale);
14339 rtx offset = NULL_RTX;
14343 /* Pull out the offset of a symbol; print any symbol itself. */
14344 if (GET_CODE (disp) == CONST
14345 && GET_CODE (XEXP (disp, 0)) == PLUS
14346 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14348 offset = XEXP (XEXP (disp, 0), 1);
14349 disp = gen_rtx_CONST (VOIDmode,
14350 XEXP (XEXP (disp, 0), 0));
14354 output_pic_addr_const (file, disp, 0);
14355 else if (GET_CODE (disp) == LABEL_REF)
14356 output_asm_label (disp);
14357 else if (CONST_INT_P (disp))
14360 output_addr_const (file, disp);
14366 print_reg (base, 0, file);
14369 if (INTVAL (offset) >= 0)
14371 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14375 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14382 print_reg (index, 0, file);
14384 fprintf (file, "*%d", scale);
14391 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14394 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14398 if (GET_CODE (x) != UNSPEC)
14401 op = XVECEXP (x, 0, 0);
14402 switch (XINT (x, 1))
14404 case UNSPEC_GOTTPOFF:
14405 output_addr_const (file, op);
14406 /* FIXME: This might be @TPOFF in Sun ld. */
14407 fputs ("@gottpoff", file);
14410 output_addr_const (file, op);
14411 fputs ("@tpoff", file);
14413 case UNSPEC_NTPOFF:
14414 output_addr_const (file, op);
14416 fputs ("@tpoff", file);
14418 fputs ("@ntpoff", file);
14420 case UNSPEC_DTPOFF:
14421 output_addr_const (file, op);
14422 fputs ("@dtpoff", file);
14424 case UNSPEC_GOTNTPOFF:
14425 output_addr_const (file, op);
14427 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14428 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14430 fputs ("@gotntpoff", file);
14432 case UNSPEC_INDNTPOFF:
14433 output_addr_const (file, op);
14434 fputs ("@indntpoff", file);
14437 case UNSPEC_MACHOPIC_OFFSET:
14438 output_addr_const (file, op);
14440 machopic_output_function_base_name (file);
14444 case UNSPEC_STACK_CHECK:
14448 gcc_assert (flag_split_stack);
14450 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14451 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14453 gcc_unreachable ();
14456 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14467 /* Split one or more double-mode RTL references into pairs of half-mode
14468 references. The RTL can be REG, offsettable MEM, integer constant, or
14469 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14470 split and "num" is its length. lo_half and hi_half are output arrays
14471 that parallel "operands". */
14474 split_double_mode (enum machine_mode mode, rtx operands[],
14475 int num, rtx lo_half[], rtx hi_half[])
14477 enum machine_mode half_mode;
14483 half_mode = DImode;
14486 half_mode = SImode;
14489 gcc_unreachable ();
14492 byte = GET_MODE_SIZE (half_mode);
14496 rtx op = operands[num];
14498 /* simplify_subreg refuse to split volatile memory addresses,
14499 but we still have to handle it. */
14502 lo_half[num] = adjust_address (op, half_mode, 0);
14503 hi_half[num] = adjust_address (op, half_mode, byte);
14507 lo_half[num] = simplify_gen_subreg (half_mode, op,
14508 GET_MODE (op) == VOIDmode
14509 ? mode : GET_MODE (op), 0);
14510 hi_half[num] = simplify_gen_subreg (half_mode, op,
14511 GET_MODE (op) == VOIDmode
14512 ? mode : GET_MODE (op), byte);
14517 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14518 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14519 is the expression of the binary operation. The output may either be
14520 emitted here, or returned to the caller, like all output_* functions.
14522 There is no guarantee that the operands are the same mode, as they
14523 might be within FLOAT or FLOAT_EXTEND expressions. */
14525 #ifndef SYSV386_COMPAT
14526 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14527 wants to fix the assemblers because that causes incompatibility
14528 with gcc. No-one wants to fix gcc because that causes
14529 incompatibility with assemblers... You can use the option of
14530 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14531 #define SYSV386_COMPAT 1
14535 output_387_binary_op (rtx insn, rtx *operands)
14537 static char buf[40];
14540 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14542 #ifdef ENABLE_CHECKING
14543 /* Even if we do not want to check the inputs, this documents input
14544 constraints. Which helps in understanding the following code. */
14545 if (STACK_REG_P (operands[0])
14546 && ((REG_P (operands[1])
14547 && REGNO (operands[0]) == REGNO (operands[1])
14548 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14549 || (REG_P (operands[2])
14550 && REGNO (operands[0]) == REGNO (operands[2])
14551 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14552 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14555 gcc_assert (is_sse);
14558 switch (GET_CODE (operands[3]))
14561 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14562 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14570 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14571 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14579 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14580 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14588 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14589 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14597 gcc_unreachable ();
14604 strcpy (buf, ssep);
14605 if (GET_MODE (operands[0]) == SFmode)
14606 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14608 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14612 strcpy (buf, ssep + 1);
14613 if (GET_MODE (operands[0]) == SFmode)
14614 strcat (buf, "ss\t{%2, %0|%0, %2}");
14616 strcat (buf, "sd\t{%2, %0|%0, %2}");
14622 switch (GET_CODE (operands[3]))
14626 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14628 rtx temp = operands[2];
14629 operands[2] = operands[1];
14630 operands[1] = temp;
14633 /* know operands[0] == operands[1]. */
14635 if (MEM_P (operands[2]))
14641 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14643 if (STACK_TOP_P (operands[0]))
14644 /* How is it that we are storing to a dead operand[2]?
14645 Well, presumably operands[1] is dead too. We can't
14646 store the result to st(0) as st(0) gets popped on this
14647 instruction. Instead store to operands[2] (which I
14648 think has to be st(1)). st(1) will be popped later.
14649 gcc <= 2.8.1 didn't have this check and generated
14650 assembly code that the Unixware assembler rejected. */
14651 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14653 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14657 if (STACK_TOP_P (operands[0]))
14658 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14660 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14665 if (MEM_P (operands[1]))
14671 if (MEM_P (operands[2]))
14677 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14680 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14681 derived assemblers, confusingly reverse the direction of
14682 the operation for fsub{r} and fdiv{r} when the
14683 destination register is not st(0). The Intel assembler
14684 doesn't have this brain damage. Read !SYSV386_COMPAT to
14685 figure out what the hardware really does. */
14686 if (STACK_TOP_P (operands[0]))
14687 p = "{p\t%0, %2|rp\t%2, %0}";
14689 p = "{rp\t%2, %0|p\t%0, %2}";
14691 if (STACK_TOP_P (operands[0]))
14692 /* As above for fmul/fadd, we can't store to st(0). */
14693 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14695 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14700 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14703 if (STACK_TOP_P (operands[0]))
14704 p = "{rp\t%0, %1|p\t%1, %0}";
14706 p = "{p\t%1, %0|rp\t%0, %1}";
14708 if (STACK_TOP_P (operands[0]))
14709 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14711 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14716 if (STACK_TOP_P (operands[0]))
14718 if (STACK_TOP_P (operands[1]))
14719 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14721 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14724 else if (STACK_TOP_P (operands[1]))
14727 p = "{\t%1, %0|r\t%0, %1}";
14729 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14735 p = "{r\t%2, %0|\t%0, %2}";
14737 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14743 gcc_unreachable ();
14750 /* Return needed mode for entity in optimize_mode_switching pass. */
14753 ix86_mode_needed (int entity, rtx insn)
14755 enum attr_i387_cw mode;
14757 /* The mode UNINITIALIZED is used to store control word after a
14758 function call or ASM pattern. The mode ANY specify that function
14759 has no requirements on the control word and make no changes in the
14760 bits we are interested in. */
14763 || (NONJUMP_INSN_P (insn)
14764 && (asm_noperands (PATTERN (insn)) >= 0
14765 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14766 return I387_CW_UNINITIALIZED;
14768 if (recog_memoized (insn) < 0)
14769 return I387_CW_ANY;
14771 mode = get_attr_i387_cw (insn);
14776 if (mode == I387_CW_TRUNC)
14781 if (mode == I387_CW_FLOOR)
14786 if (mode == I387_CW_CEIL)
14791 if (mode == I387_CW_MASK_PM)
14796 gcc_unreachable ();
14799 return I387_CW_ANY;
14802 /* Output code to initialize control word copies used by trunc?f?i and
14803 rounding patterns. CURRENT_MODE is set to current control word,
14804 while NEW_MODE is set to new control word. */
14807 emit_i387_cw_initialization (int mode)
14809 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14812 enum ix86_stack_slot slot;
14814 rtx reg = gen_reg_rtx (HImode);
14816 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14817 emit_move_insn (reg, copy_rtx (stored_mode));
14819 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14820 || optimize_function_for_size_p (cfun))
14824 case I387_CW_TRUNC:
14825 /* round toward zero (truncate) */
14826 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14827 slot = SLOT_CW_TRUNC;
14830 case I387_CW_FLOOR:
14831 /* round down toward -oo */
14832 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14833 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14834 slot = SLOT_CW_FLOOR;
14838 /* round up toward +oo */
14839 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14840 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14841 slot = SLOT_CW_CEIL;
14844 case I387_CW_MASK_PM:
14845 /* mask precision exception for nearbyint() */
14846 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14847 slot = SLOT_CW_MASK_PM;
14851 gcc_unreachable ();
14858 case I387_CW_TRUNC:
14859 /* round toward zero (truncate) */
14860 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14861 slot = SLOT_CW_TRUNC;
14864 case I387_CW_FLOOR:
14865 /* round down toward -oo */
14866 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14867 slot = SLOT_CW_FLOOR;
14871 /* round up toward +oo */
14872 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14873 slot = SLOT_CW_CEIL;
14876 case I387_CW_MASK_PM:
14877 /* mask precision exception for nearbyint() */
14878 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14879 slot = SLOT_CW_MASK_PM;
14883 gcc_unreachable ();
14887 gcc_assert (slot < MAX_386_STACK_LOCALS);
14889 new_mode = assign_386_stack_local (HImode, slot);
14890 emit_move_insn (new_mode, reg);
14893 /* Output code for INSN to convert a float to a signed int. OPERANDS
14894 are the insn operands. The output may be [HSD]Imode and the input
14895 operand may be [SDX]Fmode. */
14898 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14900 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14901 int dimode_p = GET_MODE (operands[0]) == DImode;
14902 int round_mode = get_attr_i387_cw (insn);
14904 /* Jump through a hoop or two for DImode, since the hardware has no
14905 non-popping instruction. We used to do this a different way, but
14906 that was somewhat fragile and broke with post-reload splitters. */
14907 if ((dimode_p || fisttp) && !stack_top_dies)
14908 output_asm_insn ("fld\t%y1", operands);
14910 gcc_assert (STACK_TOP_P (operands[1]));
14911 gcc_assert (MEM_P (operands[0]));
14912 gcc_assert (GET_MODE (operands[1]) != TFmode);
14915 output_asm_insn ("fisttp%Z0\t%0", operands);
14918 if (round_mode != I387_CW_ANY)
14919 output_asm_insn ("fldcw\t%3", operands);
14920 if (stack_top_dies || dimode_p)
14921 output_asm_insn ("fistp%Z0\t%0", operands);
14923 output_asm_insn ("fist%Z0\t%0", operands);
14924 if (round_mode != I387_CW_ANY)
14925 output_asm_insn ("fldcw\t%2", operands);
14931 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14932 have the values zero or one, indicates the ffreep insn's operand
14933 from the OPERANDS array. */
14935 static const char *
14936 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14938 if (TARGET_USE_FFREEP)
14939 #ifdef HAVE_AS_IX86_FFREEP
14940 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14943 static char retval[32];
14944 int regno = REGNO (operands[opno]);
14946 gcc_assert (FP_REGNO_P (regno));
14948 regno -= FIRST_STACK_REG;
14950 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14955 return opno ? "fstp\t%y1" : "fstp\t%y0";
14959 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14960 should be used. UNORDERED_P is true when fucom should be used. */
14963 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14965 int stack_top_dies;
14966 rtx cmp_op0, cmp_op1;
14967 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14971 cmp_op0 = operands[0];
14972 cmp_op1 = operands[1];
14976 cmp_op0 = operands[1];
14977 cmp_op1 = operands[2];
14982 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14983 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14984 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14985 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14987 if (GET_MODE (operands[0]) == SFmode)
14989 return &ucomiss[TARGET_AVX ? 0 : 1];
14991 return &comiss[TARGET_AVX ? 0 : 1];
14994 return &ucomisd[TARGET_AVX ? 0 : 1];
14996 return &comisd[TARGET_AVX ? 0 : 1];
14999 gcc_assert (STACK_TOP_P (cmp_op0));
15001 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15003 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15005 if (stack_top_dies)
15007 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15008 return output_387_ffreep (operands, 1);
15011 return "ftst\n\tfnstsw\t%0";
15014 if (STACK_REG_P (cmp_op1)
15016 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15017 && REGNO (cmp_op1) != FIRST_STACK_REG)
15019 /* If both the top of the 387 stack dies, and the other operand
15020 is also a stack register that dies, then this must be a
15021 `fcompp' float compare */
15025 /* There is no double popping fcomi variant. Fortunately,
15026 eflags is immune from the fstp's cc clobbering. */
15028 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15030 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15031 return output_387_ffreep (operands, 0);
15036 return "fucompp\n\tfnstsw\t%0";
15038 return "fcompp\n\tfnstsw\t%0";
15043 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15045 static const char * const alt[16] =
15047 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15048 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15049 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15050 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15052 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15053 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15057 "fcomi\t{%y1, %0|%0, %y1}",
15058 "fcomip\t{%y1, %0|%0, %y1}",
15059 "fucomi\t{%y1, %0|%0, %y1}",
15060 "fucomip\t{%y1, %0|%0, %y1}",
15071 mask = eflags_p << 3;
15072 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15073 mask |= unordered_p << 1;
15074 mask |= stack_top_dies;
15076 gcc_assert (mask < 16);
15085 ix86_output_addr_vec_elt (FILE *file, int value)
15087 const char *directive = ASM_LONG;
15091 directive = ASM_QUAD;
15093 gcc_assert (!TARGET_64BIT);
15096 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15100 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15102 const char *directive = ASM_LONG;
15105 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15106 directive = ASM_QUAD;
15108 gcc_assert (!TARGET_64BIT);
15110 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15111 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15112 fprintf (file, "%s%s%d-%s%d\n",
15113 directive, LPREFIX, value, LPREFIX, rel);
15114 else if (HAVE_AS_GOTOFF_IN_DATA)
15115 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15117 else if (TARGET_MACHO)
15119 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15120 machopic_output_function_base_name (file);
15125 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15126 GOT_SYMBOL_NAME, LPREFIX, value);
15129 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15133 ix86_expand_clear (rtx dest)
15137 /* We play register width games, which are only valid after reload. */
15138 gcc_assert (reload_completed);
15140 /* Avoid HImode and its attendant prefix byte. */
15141 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15142 dest = gen_rtx_REG (SImode, REGNO (dest));
15143 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15145 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15146 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15148 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15149 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15155 /* X is an unchanging MEM. If it is a constant pool reference, return
15156 the constant pool rtx, else NULL. */
15159 maybe_get_pool_constant (rtx x)
15161 x = ix86_delegitimize_address (XEXP (x, 0));
15163 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15164 return get_pool_constant (x);
15170 ix86_expand_move (enum machine_mode mode, rtx operands[])
15173 enum tls_model model;
15178 if (GET_CODE (op1) == SYMBOL_REF)
15180 model = SYMBOL_REF_TLS_MODEL (op1);
15183 op1 = legitimize_tls_address (op1, model, true);
15184 op1 = force_operand (op1, op0);
15188 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15189 && SYMBOL_REF_DLLIMPORT_P (op1))
15190 op1 = legitimize_dllimport_symbol (op1, false);
15192 else if (GET_CODE (op1) == CONST
15193 && GET_CODE (XEXP (op1, 0)) == PLUS
15194 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15196 rtx addend = XEXP (XEXP (op1, 0), 1);
15197 rtx symbol = XEXP (XEXP (op1, 0), 0);
15200 model = SYMBOL_REF_TLS_MODEL (symbol);
15202 tmp = legitimize_tls_address (symbol, model, true);
15203 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15204 && SYMBOL_REF_DLLIMPORT_P (symbol))
15205 tmp = legitimize_dllimport_symbol (symbol, true);
15209 tmp = force_operand (tmp, NULL);
15210 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15211 op0, 1, OPTAB_DIRECT);
15217 if ((flag_pic || MACHOPIC_INDIRECT)
15218 && mode == Pmode && symbolic_operand (op1, Pmode))
15220 if (TARGET_MACHO && !TARGET_64BIT)
15223 /* dynamic-no-pic */
15224 if (MACHOPIC_INDIRECT)
15226 rtx temp = ((reload_in_progress
15227 || ((op0 && REG_P (op0))
15229 ? op0 : gen_reg_rtx (Pmode));
15230 op1 = machopic_indirect_data_reference (op1, temp);
15232 op1 = machopic_legitimize_pic_address (op1, mode,
15233 temp == op1 ? 0 : temp);
15235 if (op0 != op1 && GET_CODE (op0) != MEM)
15237 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15241 if (GET_CODE (op0) == MEM)
15242 op1 = force_reg (Pmode, op1);
15246 if (GET_CODE (temp) != REG)
15247 temp = gen_reg_rtx (Pmode);
15248 temp = legitimize_pic_address (op1, temp);
15253 /* dynamic-no-pic */
15259 op1 = force_reg (Pmode, op1);
15260 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15262 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15263 op1 = legitimize_pic_address (op1, reg);
15272 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15273 || !push_operand (op0, mode))
15275 op1 = force_reg (mode, op1);
15277 if (push_operand (op0, mode)
15278 && ! general_no_elim_operand (op1, mode))
15279 op1 = copy_to_mode_reg (mode, op1);
15281 /* Force large constants in 64bit compilation into register
15282 to get them CSEed. */
15283 if (can_create_pseudo_p ()
15284 && (mode == DImode) && TARGET_64BIT
15285 && immediate_operand (op1, mode)
15286 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15287 && !register_operand (op0, mode)
15289 op1 = copy_to_mode_reg (mode, op1);
15291 if (can_create_pseudo_p ()
15292 && FLOAT_MODE_P (mode)
15293 && GET_CODE (op1) == CONST_DOUBLE)
15295 /* If we are loading a floating point constant to a register,
15296 force the value to memory now, since we'll get better code
15297 out the back end. */
15299 op1 = validize_mem (force_const_mem (mode, op1));
15300 if (!register_operand (op0, mode))
15302 rtx temp = gen_reg_rtx (mode);
15303 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15304 emit_move_insn (op0, temp);
15310 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15314 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15316 rtx op0 = operands[0], op1 = operands[1];
15317 unsigned int align = GET_MODE_ALIGNMENT (mode);
15319 /* Force constants other than zero into memory. We do not know how
15320 the instructions used to build constants modify the upper 64 bits
15321 of the register, once we have that information we may be able
15322 to handle some of them more efficiently. */
15323 if (can_create_pseudo_p ()
15324 && register_operand (op0, mode)
15325 && (CONSTANT_P (op1)
15326 || (GET_CODE (op1) == SUBREG
15327 && CONSTANT_P (SUBREG_REG (op1))))
15328 && !standard_sse_constant_p (op1))
15329 op1 = validize_mem (force_const_mem (mode, op1));
15331 /* We need to check memory alignment for SSE mode since attribute
15332 can make operands unaligned. */
15333 if (can_create_pseudo_p ()
15334 && SSE_REG_MODE_P (mode)
15335 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15336 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15340 /* ix86_expand_vector_move_misalign() does not like constants ... */
15341 if (CONSTANT_P (op1)
15342 || (GET_CODE (op1) == SUBREG
15343 && CONSTANT_P (SUBREG_REG (op1))))
15344 op1 = validize_mem (force_const_mem (mode, op1));
15346 /* ... nor both arguments in memory. */
15347 if (!register_operand (op0, mode)
15348 && !register_operand (op1, mode))
15349 op1 = force_reg (mode, op1);
15351 tmp[0] = op0; tmp[1] = op1;
15352 ix86_expand_vector_move_misalign (mode, tmp);
15356 /* Make operand1 a register if it isn't already. */
15357 if (can_create_pseudo_p ()
15358 && !register_operand (op0, mode)
15359 && !register_operand (op1, mode))
15361 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15365 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15368 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15369 straight to ix86_expand_vector_move. */
15370 /* Code generation for scalar reg-reg moves of single and double precision data:
15371 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15375 if (x86_sse_partial_reg_dependency == true)
15380 Code generation for scalar loads of double precision data:
15381 if (x86_sse_split_regs == true)
15382 movlpd mem, reg (gas syntax)
15386 Code generation for unaligned packed loads of single precision data
15387 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15388 if (x86_sse_unaligned_move_optimal)
15391 if (x86_sse_partial_reg_dependency == true)
15403 Code generation for unaligned packed loads of double precision data
15404 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15405 if (x86_sse_unaligned_move_optimal)
15408 if (x86_sse_split_regs == true)
15421 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15430 switch (GET_MODE_CLASS (mode))
15432 case MODE_VECTOR_INT:
15434 switch (GET_MODE_SIZE (mode))
15437 /* If we're optimizing for size, movups is the smallest. */
15438 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15440 op0 = gen_lowpart (V4SFmode, op0);
15441 op1 = gen_lowpart (V4SFmode, op1);
15442 emit_insn (gen_avx_movups (op0, op1));
15445 op0 = gen_lowpart (V16QImode, op0);
15446 op1 = gen_lowpart (V16QImode, op1);
15447 emit_insn (gen_avx_movdqu (op0, op1));
15450 op0 = gen_lowpart (V32QImode, op0);
15451 op1 = gen_lowpart (V32QImode, op1);
15452 emit_insn (gen_avx_movdqu256 (op0, op1));
15455 gcc_unreachable ();
15458 case MODE_VECTOR_FLOAT:
15459 op0 = gen_lowpart (mode, op0);
15460 op1 = gen_lowpart (mode, op1);
15465 emit_insn (gen_avx_movups (op0, op1));
15468 emit_insn (gen_avx_movups256 (op0, op1));
15471 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15473 op0 = gen_lowpart (V4SFmode, op0);
15474 op1 = gen_lowpart (V4SFmode, op1);
15475 emit_insn (gen_avx_movups (op0, op1));
15478 emit_insn (gen_avx_movupd (op0, op1));
15481 emit_insn (gen_avx_movupd256 (op0, op1));
15484 gcc_unreachable ();
15489 gcc_unreachable ();
15497 /* If we're optimizing for size, movups is the smallest. */
15498 if (optimize_insn_for_size_p ()
15499 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15501 op0 = gen_lowpart (V4SFmode, op0);
15502 op1 = gen_lowpart (V4SFmode, op1);
15503 emit_insn (gen_sse_movups (op0, op1));
15507 /* ??? If we have typed data, then it would appear that using
15508 movdqu is the only way to get unaligned data loaded with
15510 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15512 op0 = gen_lowpart (V16QImode, op0);
15513 op1 = gen_lowpart (V16QImode, op1);
15514 emit_insn (gen_sse2_movdqu (op0, op1));
15518 if (TARGET_SSE2 && mode == V2DFmode)
15522 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15524 op0 = gen_lowpart (V2DFmode, op0);
15525 op1 = gen_lowpart (V2DFmode, op1);
15526 emit_insn (gen_sse2_movupd (op0, op1));
15530 /* When SSE registers are split into halves, we can avoid
15531 writing to the top half twice. */
15532 if (TARGET_SSE_SPLIT_REGS)
15534 emit_clobber (op0);
15539 /* ??? Not sure about the best option for the Intel chips.
15540 The following would seem to satisfy; the register is
15541 entirely cleared, breaking the dependency chain. We
15542 then store to the upper half, with a dependency depth
15543 of one. A rumor has it that Intel recommends two movsd
15544 followed by an unpacklpd, but this is unconfirmed. And
15545 given that the dependency depth of the unpacklpd would
15546 still be one, I'm not sure why this would be better. */
15547 zero = CONST0_RTX (V2DFmode);
15550 m = adjust_address (op1, DFmode, 0);
15551 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15552 m = adjust_address (op1, DFmode, 8);
15553 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15557 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15559 op0 = gen_lowpart (V4SFmode, op0);
15560 op1 = gen_lowpart (V4SFmode, op1);
15561 emit_insn (gen_sse_movups (op0, op1));
15565 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15566 emit_move_insn (op0, CONST0_RTX (mode));
15568 emit_clobber (op0);
15570 if (mode != V4SFmode)
15571 op0 = gen_lowpart (V4SFmode, op0);
15572 m = adjust_address (op1, V2SFmode, 0);
15573 emit_insn (gen_sse_loadlps (op0, op0, m));
15574 m = adjust_address (op1, V2SFmode, 8);
15575 emit_insn (gen_sse_loadhps (op0, op0, m));
15578 else if (MEM_P (op0))
15580 /* If we're optimizing for size, movups is the smallest. */
15581 if (optimize_insn_for_size_p ()
15582 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15584 op0 = gen_lowpart (V4SFmode, op0);
15585 op1 = gen_lowpart (V4SFmode, op1);
15586 emit_insn (gen_sse_movups (op0, op1));
15590 /* ??? Similar to above, only less clear because of quote
15591 typeless stores unquote. */
15592 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15593 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15595 op0 = gen_lowpart (V16QImode, op0);
15596 op1 = gen_lowpart (V16QImode, op1);
15597 emit_insn (gen_sse2_movdqu (op0, op1));
15601 if (TARGET_SSE2 && mode == V2DFmode)
15603 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15605 op0 = gen_lowpart (V2DFmode, op0);
15606 op1 = gen_lowpart (V2DFmode, op1);
15607 emit_insn (gen_sse2_movupd (op0, op1));
15611 m = adjust_address (op0, DFmode, 0);
15612 emit_insn (gen_sse2_storelpd (m, op1));
15613 m = adjust_address (op0, DFmode, 8);
15614 emit_insn (gen_sse2_storehpd (m, op1));
15619 if (mode != V4SFmode)
15620 op1 = gen_lowpart (V4SFmode, op1);
15622 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15624 op0 = gen_lowpart (V4SFmode, op0);
15625 emit_insn (gen_sse_movups (op0, op1));
15629 m = adjust_address (op0, V2SFmode, 0);
15630 emit_insn (gen_sse_storelps (m, op1));
15631 m = adjust_address (op0, V2SFmode, 8);
15632 emit_insn (gen_sse_storehps (m, op1));
15637 gcc_unreachable ();
15640 /* Expand a push in MODE. This is some mode for which we do not support
15641 proper push instructions, at least from the registers that we expect
15642 the value to live in. */
15645 ix86_expand_push (enum machine_mode mode, rtx x)
15649 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15650 GEN_INT (-GET_MODE_SIZE (mode)),
15651 stack_pointer_rtx, 1, OPTAB_DIRECT);
15652 if (tmp != stack_pointer_rtx)
15653 emit_move_insn (stack_pointer_rtx, tmp);
15655 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15657 /* When we push an operand onto stack, it has to be aligned at least
15658 at the function argument boundary. However since we don't have
15659 the argument type, we can't determine the actual argument
15661 emit_move_insn (tmp, x);
15664 /* Helper function of ix86_fixup_binary_operands to canonicalize
15665 operand order. Returns true if the operands should be swapped. */
15668 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15671 rtx dst = operands[0];
15672 rtx src1 = operands[1];
15673 rtx src2 = operands[2];
15675 /* If the operation is not commutative, we can't do anything. */
15676 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15679 /* Highest priority is that src1 should match dst. */
15680 if (rtx_equal_p (dst, src1))
15682 if (rtx_equal_p (dst, src2))
15685 /* Next highest priority is that immediate constants come second. */
15686 if (immediate_operand (src2, mode))
15688 if (immediate_operand (src1, mode))
15691 /* Lowest priority is that memory references should come second. */
15701 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15702 destination to use for the operation. If different from the true
15703 destination in operands[0], a copy operation will be required. */
15706 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15709 rtx dst = operands[0];
15710 rtx src1 = operands[1];
15711 rtx src2 = operands[2];
15713 /* Canonicalize operand order. */
15714 if (ix86_swap_binary_operands_p (code, mode, operands))
15718 /* It is invalid to swap operands of different modes. */
15719 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15726 /* Both source operands cannot be in memory. */
15727 if (MEM_P (src1) && MEM_P (src2))
15729 /* Optimization: Only read from memory once. */
15730 if (rtx_equal_p (src1, src2))
15732 src2 = force_reg (mode, src2);
15736 src2 = force_reg (mode, src2);
15739 /* If the destination is memory, and we do not have matching source
15740 operands, do things in registers. */
15741 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15742 dst = gen_reg_rtx (mode);
15744 /* Source 1 cannot be a constant. */
15745 if (CONSTANT_P (src1))
15746 src1 = force_reg (mode, src1);
15748 /* Source 1 cannot be a non-matching memory. */
15749 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15750 src1 = force_reg (mode, src1);
15752 operands[1] = src1;
15753 operands[2] = src2;
15757 /* Similarly, but assume that the destination has already been
15758 set up properly. */
15761 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15762 enum machine_mode mode, rtx operands[])
15764 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15765 gcc_assert (dst == operands[0]);
15768 /* Attempt to expand a binary operator. Make the expansion closer to the
15769 actual machine, then just general_operand, which will allow 3 separate
15770 memory references (one output, two input) in a single insn. */
15773 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15776 rtx src1, src2, dst, op, clob;
15778 dst = ix86_fixup_binary_operands (code, mode, operands);
15779 src1 = operands[1];
15780 src2 = operands[2];
15782 /* Emit the instruction. */
15784 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15785 if (reload_in_progress)
15787 /* Reload doesn't know about the flags register, and doesn't know that
15788 it doesn't want to clobber it. We can only do this with PLUS. */
15789 gcc_assert (code == PLUS);
15792 else if (reload_completed
15794 && !rtx_equal_p (dst, src1))
15796 /* This is going to be an LEA; avoid splitting it later. */
15801 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15802 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15805 /* Fix up the destination if needed. */
15806 if (dst != operands[0])
15807 emit_move_insn (operands[0], dst);
15810 /* Return TRUE or FALSE depending on whether the binary operator meets the
15811 appropriate constraints. */
15814 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15817 rtx dst = operands[0];
15818 rtx src1 = operands[1];
15819 rtx src2 = operands[2];
15821 /* Both source operands cannot be in memory. */
15822 if (MEM_P (src1) && MEM_P (src2))
15825 /* Canonicalize operand order for commutative operators. */
15826 if (ix86_swap_binary_operands_p (code, mode, operands))
15833 /* If the destination is memory, we must have a matching source operand. */
15834 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15837 /* Source 1 cannot be a constant. */
15838 if (CONSTANT_P (src1))
15841 /* Source 1 cannot be a non-matching memory. */
15842 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15844 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15845 return (code == AND
15848 || (TARGET_64BIT && mode == DImode))
15849 && CONST_INT_P (src2)
15850 && (INTVAL (src2) == 0xff
15851 || INTVAL (src2) == 0xffff));
15857 /* Attempt to expand a unary operator. Make the expansion closer to the
15858 actual machine, then just general_operand, which will allow 2 separate
15859 memory references (one output, one input) in a single insn. */
15862 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15865 int matching_memory;
15866 rtx src, dst, op, clob;
15871 /* If the destination is memory, and we do not have matching source
15872 operands, do things in registers. */
15873 matching_memory = 0;
15876 if (rtx_equal_p (dst, src))
15877 matching_memory = 1;
15879 dst = gen_reg_rtx (mode);
15882 /* When source operand is memory, destination must match. */
15883 if (MEM_P (src) && !matching_memory)
15884 src = force_reg (mode, src);
15886 /* Emit the instruction. */
15888 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15889 if (reload_in_progress || code == NOT)
15891 /* Reload doesn't know about the flags register, and doesn't know that
15892 it doesn't want to clobber it. */
15893 gcc_assert (code == NOT);
15898 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15899 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15902 /* Fix up the destination if needed. */
15903 if (dst != operands[0])
15904 emit_move_insn (operands[0], dst);
15907 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15908 divisor are within the the range [0-255]. */
15911 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15914 rtx end_label, qimode_label;
15915 rtx insn, div, mod;
15916 rtx scratch, tmp0, tmp1, tmp2;
15917 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15918 rtx (*gen_zero_extend) (rtx, rtx);
15919 rtx (*gen_test_ccno_1) (rtx, rtx);
15924 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15925 gen_test_ccno_1 = gen_testsi_ccno_1;
15926 gen_zero_extend = gen_zero_extendqisi2;
15929 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15930 gen_test_ccno_1 = gen_testdi_ccno_1;
15931 gen_zero_extend = gen_zero_extendqidi2;
15934 gcc_unreachable ();
15937 end_label = gen_label_rtx ();
15938 qimode_label = gen_label_rtx ();
15940 scratch = gen_reg_rtx (mode);
15942 /* Use 8bit unsigned divimod if dividend and divisor are within the
15943 the range [0-255]. */
15944 emit_move_insn (scratch, operands[2]);
15945 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15946 scratch, 1, OPTAB_DIRECT);
15947 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15948 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15949 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15950 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15951 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15953 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15954 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15955 JUMP_LABEL (insn) = qimode_label;
15957 /* Generate original signed/unsigned divimod. */
15958 div = gen_divmod4_1 (operands[0], operands[1],
15959 operands[2], operands[3]);
15962 /* Branch to the end. */
15963 emit_jump_insn (gen_jump (end_label));
15966 /* Generate 8bit unsigned divide. */
15967 emit_label (qimode_label);
15968 /* Don't use operands[0] for result of 8bit divide since not all
15969 registers support QImode ZERO_EXTRACT. */
15970 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15971 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15972 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15973 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15977 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15978 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15982 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15983 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15986 /* Extract remainder from AH. */
15987 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15988 if (REG_P (operands[1]))
15989 insn = emit_move_insn (operands[1], tmp1);
15992 /* Need a new scratch register since the old one has result
15994 scratch = gen_reg_rtx (mode);
15995 emit_move_insn (scratch, tmp1);
15996 insn = emit_move_insn (operands[1], scratch);
15998 set_unique_reg_note (insn, REG_EQUAL, mod);
16000 /* Zero extend quotient from AL. */
16001 tmp1 = gen_lowpart (QImode, tmp0);
16002 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16003 set_unique_reg_note (insn, REG_EQUAL, div);
16005 emit_label (end_label);
16008 #define LEA_SEARCH_THRESHOLD 12
16010 /* Search backward for non-agu definition of register number REGNO1
16011 or register number REGNO2 in INSN's basic block until
16012 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16013 2. Reach BB boundary, or
16014 3. Reach agu definition.
16015 Returns the distance between the non-agu definition point and INSN.
16016 If no definition point, returns -1. */
16019 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16022 basic_block bb = BLOCK_FOR_INSN (insn);
16025 enum attr_type insn_type;
16027 if (insn != BB_HEAD (bb))
16029 rtx prev = PREV_INSN (insn);
16030 while (prev && distance < LEA_SEARCH_THRESHOLD)
16032 if (NONDEBUG_INSN_P (prev))
16035 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16036 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16037 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16038 && (regno1 == DF_REF_REGNO (*def_rec)
16039 || regno2 == DF_REF_REGNO (*def_rec)))
16041 insn_type = get_attr_type (prev);
16042 if (insn_type != TYPE_LEA)
16046 if (prev == BB_HEAD (bb))
16048 prev = PREV_INSN (prev);
16052 if (distance < LEA_SEARCH_THRESHOLD)
16056 bool simple_loop = false;
16058 FOR_EACH_EDGE (e, ei, bb->preds)
16061 simple_loop = true;
16067 rtx prev = BB_END (bb);
16070 && distance < LEA_SEARCH_THRESHOLD)
16072 if (NONDEBUG_INSN_P (prev))
16075 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16076 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16077 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16078 && (regno1 == DF_REF_REGNO (*def_rec)
16079 || regno2 == DF_REF_REGNO (*def_rec)))
16081 insn_type = get_attr_type (prev);
16082 if (insn_type != TYPE_LEA)
16086 prev = PREV_INSN (prev);
16094 /* get_attr_type may modify recog data. We want to make sure
16095 that recog data is valid for instruction INSN, on which
16096 distance_non_agu_define is called. INSN is unchanged here. */
16097 extract_insn_cached (insn);
16101 /* Return the distance between INSN and the next insn that uses
16102 register number REGNO0 in memory address. Return -1 if no such
16103 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16106 distance_agu_use (unsigned int regno0, rtx insn)
16108 basic_block bb = BLOCK_FOR_INSN (insn);
16113 if (insn != BB_END (bb))
16115 rtx next = NEXT_INSN (insn);
16116 while (next && distance < LEA_SEARCH_THRESHOLD)
16118 if (NONDEBUG_INSN_P (next))
16122 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16123 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16124 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16125 && regno0 == DF_REF_REGNO (*use_rec))
16127 /* Return DISTANCE if OP0 is used in memory
16128 address in NEXT. */
16132 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16133 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16134 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16135 && regno0 == DF_REF_REGNO (*def_rec))
16137 /* Return -1 if OP0 is set in NEXT. */
16141 if (next == BB_END (bb))
16143 next = NEXT_INSN (next);
16147 if (distance < LEA_SEARCH_THRESHOLD)
16151 bool simple_loop = false;
16153 FOR_EACH_EDGE (e, ei, bb->succs)
16156 simple_loop = true;
16162 rtx next = BB_HEAD (bb);
16165 && distance < LEA_SEARCH_THRESHOLD)
16167 if (NONDEBUG_INSN_P (next))
16171 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16172 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16173 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16174 && regno0 == DF_REF_REGNO (*use_rec))
16176 /* Return DISTANCE if OP0 is used in memory
16177 address in NEXT. */
16181 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16182 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16183 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16184 && regno0 == DF_REF_REGNO (*def_rec))
16186 /* Return -1 if OP0 is set in NEXT. */
16191 next = NEXT_INSN (next);
16199 /* Define this macro to tune LEA priority vs ADD, it take effect when
16200 there is a dilemma of choicing LEA or ADD
16201 Negative value: ADD is more preferred than LEA
16203 Positive value: LEA is more preferred than ADD*/
16204 #define IX86_LEA_PRIORITY 2
16206 /* Return true if it is ok to optimize an ADD operation to LEA
16207 operation to avoid flag register consumation. For most processors,
16208 ADD is faster than LEA. For the processors like ATOM, if the
16209 destination register of LEA holds an actual address which will be
16210 used soon, LEA is better and otherwise ADD is better. */
16213 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16215 unsigned int regno0 = true_regnum (operands[0]);
16216 unsigned int regno1 = true_regnum (operands[1]);
16217 unsigned int regno2 = true_regnum (operands[2]);
16219 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16220 if (regno0 != regno1 && regno0 != regno2)
16223 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16227 int dist_define, dist_use;
16229 /* Return false if REGNO0 isn't used in memory address. */
16230 dist_use = distance_agu_use (regno0, insn);
16234 dist_define = distance_non_agu_define (regno1, regno2, insn);
16235 if (dist_define <= 0)
16238 /* If this insn has both backward non-agu dependence and forward
16239 agu dependence, the one with short distance take effect. */
16240 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16247 /* Return true if destination reg of SET_BODY is shift count of
16251 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16257 /* Retrieve destination of SET_BODY. */
16258 switch (GET_CODE (set_body))
16261 set_dest = SET_DEST (set_body);
16262 if (!set_dest || !REG_P (set_dest))
16266 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16267 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16275 /* Retrieve shift count of USE_BODY. */
16276 switch (GET_CODE (use_body))
16279 shift_rtx = XEXP (use_body, 1);
16282 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16283 if (ix86_dep_by_shift_count_body (set_body,
16284 XVECEXP (use_body, 0, i)))
16292 && (GET_CODE (shift_rtx) == ASHIFT
16293 || GET_CODE (shift_rtx) == LSHIFTRT
16294 || GET_CODE (shift_rtx) == ASHIFTRT
16295 || GET_CODE (shift_rtx) == ROTATE
16296 || GET_CODE (shift_rtx) == ROTATERT))
16298 rtx shift_count = XEXP (shift_rtx, 1);
16300 /* Return true if shift count is dest of SET_BODY. */
16301 if (REG_P (shift_count)
16302 && true_regnum (set_dest) == true_regnum (shift_count))
16309 /* Return true if destination reg of SET_INSN is shift count of
16313 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16315 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16316 PATTERN (use_insn));
16319 /* Return TRUE or FALSE depending on whether the unary operator meets the
16320 appropriate constraints. */
16323 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16324 enum machine_mode mode ATTRIBUTE_UNUSED,
16325 rtx operands[2] ATTRIBUTE_UNUSED)
16327 /* If one of operands is memory, source and destination must match. */
16328 if ((MEM_P (operands[0])
16329 || MEM_P (operands[1]))
16330 && ! rtx_equal_p (operands[0], operands[1]))
16335 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16336 are ok, keeping in mind the possible movddup alternative. */
16339 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16341 if (MEM_P (operands[0]))
16342 return rtx_equal_p (operands[0], operands[1 + high]);
16343 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16344 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16348 /* Post-reload splitter for converting an SF or DFmode value in an
16349 SSE register into an unsigned SImode. */
16352 ix86_split_convert_uns_si_sse (rtx operands[])
16354 enum machine_mode vecmode;
16355 rtx value, large, zero_or_two31, input, two31, x;
16357 large = operands[1];
16358 zero_or_two31 = operands[2];
16359 input = operands[3];
16360 two31 = operands[4];
16361 vecmode = GET_MODE (large);
16362 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16364 /* Load up the value into the low element. We must ensure that the other
16365 elements are valid floats -- zero is the easiest such value. */
16368 if (vecmode == V4SFmode)
16369 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16371 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16375 input = gen_rtx_REG (vecmode, REGNO (input));
16376 emit_move_insn (value, CONST0_RTX (vecmode));
16377 if (vecmode == V4SFmode)
16378 emit_insn (gen_sse_movss (value, value, input));
16380 emit_insn (gen_sse2_movsd (value, value, input));
16383 emit_move_insn (large, two31);
16384 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16386 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16387 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16389 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16390 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16392 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16393 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16395 large = gen_rtx_REG (V4SImode, REGNO (large));
16396 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16398 x = gen_rtx_REG (V4SImode, REGNO (value));
16399 if (vecmode == V4SFmode)
16400 emit_insn (gen_sse2_cvttps2dq (x, value));
16402 emit_insn (gen_sse2_cvttpd2dq (x, value));
16405 emit_insn (gen_xorv4si3 (value, value, large));
16408 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16409 Expects the 64-bit DImode to be supplied in a pair of integral
16410 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16411 -mfpmath=sse, !optimize_size only. */
16414 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16416 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16417 rtx int_xmm, fp_xmm;
16418 rtx biases, exponents;
16421 int_xmm = gen_reg_rtx (V4SImode);
16422 if (TARGET_INTER_UNIT_MOVES)
16423 emit_insn (gen_movdi_to_sse (int_xmm, input));
16424 else if (TARGET_SSE_SPLIT_REGS)
16426 emit_clobber (int_xmm);
16427 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16431 x = gen_reg_rtx (V2DImode);
16432 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16433 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16436 x = gen_rtx_CONST_VECTOR (V4SImode,
16437 gen_rtvec (4, GEN_INT (0x43300000UL),
16438 GEN_INT (0x45300000UL),
16439 const0_rtx, const0_rtx));
16440 exponents = validize_mem (force_const_mem (V4SImode, x));
16442 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16443 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16445 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16446 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16447 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16448 (0x1.0p84 + double(fp_value_hi_xmm)).
16449 Note these exponents differ by 32. */
16451 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16453 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16454 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16455 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16456 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16457 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16458 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16459 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16460 biases = validize_mem (force_const_mem (V2DFmode, biases));
16461 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16463 /* Add the upper and lower DFmode values together. */
16465 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16468 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16469 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16470 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16473 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16476 /* Not used, but eases macroization of patterns. */
16478 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16479 rtx input ATTRIBUTE_UNUSED)
16481 gcc_unreachable ();
16484 /* Convert an unsigned SImode value into a DFmode. Only currently used
16485 for SSE, but applicable anywhere. */
16488 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16490 REAL_VALUE_TYPE TWO31r;
16493 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16494 NULL, 1, OPTAB_DIRECT);
16496 fp = gen_reg_rtx (DFmode);
16497 emit_insn (gen_floatsidf2 (fp, x));
16499 real_ldexp (&TWO31r, &dconst1, 31);
16500 x = const_double_from_real_value (TWO31r, DFmode);
16502 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16504 emit_move_insn (target, x);
16507 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16508 32-bit mode; otherwise we have a direct convert instruction. */
16511 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16513 REAL_VALUE_TYPE TWO32r;
16514 rtx fp_lo, fp_hi, x;
16516 fp_lo = gen_reg_rtx (DFmode);
16517 fp_hi = gen_reg_rtx (DFmode);
16519 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16521 real_ldexp (&TWO32r, &dconst1, 32);
16522 x = const_double_from_real_value (TWO32r, DFmode);
16523 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16525 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16527 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16530 emit_move_insn (target, x);
16533 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16534 For x86_32, -mfpmath=sse, !optimize_size only. */
16536 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16538 REAL_VALUE_TYPE ONE16r;
16539 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16541 real_ldexp (&ONE16r, &dconst1, 16);
16542 x = const_double_from_real_value (ONE16r, SFmode);
16543 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16544 NULL, 0, OPTAB_DIRECT);
16545 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16546 NULL, 0, OPTAB_DIRECT);
16547 fp_hi = gen_reg_rtx (SFmode);
16548 fp_lo = gen_reg_rtx (SFmode);
16549 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16550 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16551 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16553 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16555 if (!rtx_equal_p (target, fp_hi))
16556 emit_move_insn (target, fp_hi);
16559 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16560 then replicate the value for all elements of the vector
16564 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16571 v = gen_rtvec (4, value, value, value, value);
16572 return gen_rtx_CONST_VECTOR (V4SImode, v);
16576 v = gen_rtvec (2, value, value);
16577 return gen_rtx_CONST_VECTOR (V2DImode, v);
16581 v = gen_rtvec (8, value, value, value, value,
16582 value, value, value, value);
16584 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16585 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16586 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16587 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16588 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16592 v = gen_rtvec (4, value, value, value, value);
16594 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16595 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16596 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16600 v = gen_rtvec (4, value, value, value, value);
16602 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16603 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16604 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16608 v = gen_rtvec (2, value, value);
16610 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16611 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16614 gcc_unreachable ();
16618 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16619 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16620 for an SSE register. If VECT is true, then replicate the mask for
16621 all elements of the vector register. If INVERT is true, then create
16622 a mask excluding the sign bit. */
16625 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16627 enum machine_mode vec_mode, imode;
16628 HOST_WIDE_INT hi, lo;
16633 /* Find the sign bit, sign extended to 2*HWI. */
16640 mode = GET_MODE_INNER (mode);
16642 lo = 0x80000000, hi = lo < 0;
16649 mode = GET_MODE_INNER (mode);
16651 if (HOST_BITS_PER_WIDE_INT >= 64)
16652 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16654 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16659 vec_mode = VOIDmode;
16660 if (HOST_BITS_PER_WIDE_INT >= 64)
16663 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16670 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16674 lo = ~lo, hi = ~hi;
16680 mask = immed_double_const (lo, hi, imode);
16682 vec = gen_rtvec (2, v, mask);
16683 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16684 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16691 gcc_unreachable ();
16695 lo = ~lo, hi = ~hi;
16697 /* Force this value into the low part of a fp vector constant. */
16698 mask = immed_double_const (lo, hi, imode);
16699 mask = gen_lowpart (mode, mask);
16701 if (vec_mode == VOIDmode)
16702 return force_reg (mode, mask);
16704 v = ix86_build_const_vector (vec_mode, vect, mask);
16705 return force_reg (vec_mode, v);
16708 /* Generate code for floating point ABS or NEG. */
16711 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16714 rtx mask, set, dst, src;
16715 bool use_sse = false;
16716 bool vector_mode = VECTOR_MODE_P (mode);
16717 enum machine_mode vmode = mode;
16721 else if (mode == TFmode)
16723 else if (TARGET_SSE_MATH)
16725 use_sse = SSE_FLOAT_MODE_P (mode);
16726 if (mode == SFmode)
16728 else if (mode == DFmode)
16732 /* NEG and ABS performed with SSE use bitwise mask operations.
16733 Create the appropriate mask now. */
16735 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16742 set = gen_rtx_fmt_e (code, mode, src);
16743 set = gen_rtx_SET (VOIDmode, dst, set);
16750 use = gen_rtx_USE (VOIDmode, mask);
16752 par = gen_rtvec (2, set, use);
16755 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16756 par = gen_rtvec (3, set, use, clob);
16758 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16764 /* Expand a copysign operation. Special case operand 0 being a constant. */
16767 ix86_expand_copysign (rtx operands[])
16769 enum machine_mode mode, vmode;
16770 rtx dest, op0, op1, mask, nmask;
16772 dest = operands[0];
16776 mode = GET_MODE (dest);
16778 if (mode == SFmode)
16780 else if (mode == DFmode)
16785 if (GET_CODE (op0) == CONST_DOUBLE)
16787 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16789 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16790 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16792 if (mode == SFmode || mode == DFmode)
16794 if (op0 == CONST0_RTX (mode))
16795 op0 = CONST0_RTX (vmode);
16798 rtx v = ix86_build_const_vector (vmode, false, op0);
16800 op0 = force_reg (vmode, v);
16803 else if (op0 != CONST0_RTX (mode))
16804 op0 = force_reg (mode, op0);
16806 mask = ix86_build_signbit_mask (vmode, 0, 0);
16808 if (mode == SFmode)
16809 copysign_insn = gen_copysignsf3_const;
16810 else if (mode == DFmode)
16811 copysign_insn = gen_copysigndf3_const;
16813 copysign_insn = gen_copysigntf3_const;
16815 emit_insn (copysign_insn (dest, op0, op1, mask));
16819 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16821 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16822 mask = ix86_build_signbit_mask (vmode, 0, 0);
16824 if (mode == SFmode)
16825 copysign_insn = gen_copysignsf3_var;
16826 else if (mode == DFmode)
16827 copysign_insn = gen_copysigndf3_var;
16829 copysign_insn = gen_copysigntf3_var;
16831 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16835 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16836 be a constant, and so has already been expanded into a vector constant. */
16839 ix86_split_copysign_const (rtx operands[])
16841 enum machine_mode mode, vmode;
16842 rtx dest, op0, mask, x;
16844 dest = operands[0];
16846 mask = operands[3];
16848 mode = GET_MODE (dest);
16849 vmode = GET_MODE (mask);
16851 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16852 x = gen_rtx_AND (vmode, dest, mask);
16853 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16855 if (op0 != CONST0_RTX (vmode))
16857 x = gen_rtx_IOR (vmode, dest, op0);
16858 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16862 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16863 so we have to do two masks. */
16866 ix86_split_copysign_var (rtx operands[])
16868 enum machine_mode mode, vmode;
16869 rtx dest, scratch, op0, op1, mask, nmask, x;
16871 dest = operands[0];
16872 scratch = operands[1];
16875 nmask = operands[4];
16876 mask = operands[5];
16878 mode = GET_MODE (dest);
16879 vmode = GET_MODE (mask);
16881 if (rtx_equal_p (op0, op1))
16883 /* Shouldn't happen often (it's useless, obviously), but when it does
16884 we'd generate incorrect code if we continue below. */
16885 emit_move_insn (dest, op0);
16889 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16891 gcc_assert (REGNO (op1) == REGNO (scratch));
16893 x = gen_rtx_AND (vmode, scratch, mask);
16894 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16897 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16898 x = gen_rtx_NOT (vmode, dest);
16899 x = gen_rtx_AND (vmode, x, op0);
16900 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16904 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16906 x = gen_rtx_AND (vmode, scratch, mask);
16908 else /* alternative 2,4 */
16910 gcc_assert (REGNO (mask) == REGNO (scratch));
16911 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16912 x = gen_rtx_AND (vmode, scratch, op1);
16914 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16916 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16918 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16919 x = gen_rtx_AND (vmode, dest, nmask);
16921 else /* alternative 3,4 */
16923 gcc_assert (REGNO (nmask) == REGNO (dest));
16925 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16926 x = gen_rtx_AND (vmode, dest, op0);
16928 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16931 x = gen_rtx_IOR (vmode, dest, scratch);
16932 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16935 /* Return TRUE or FALSE depending on whether the first SET in INSN
16936 has source and destination with matching CC modes, and that the
16937 CC mode is at least as constrained as REQ_MODE. */
16940 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16943 enum machine_mode set_mode;
16945 set = PATTERN (insn);
16946 if (GET_CODE (set) == PARALLEL)
16947 set = XVECEXP (set, 0, 0);
16948 gcc_assert (GET_CODE (set) == SET);
16949 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16951 set_mode = GET_MODE (SET_DEST (set));
16955 if (req_mode != CCNOmode
16956 && (req_mode != CCmode
16957 || XEXP (SET_SRC (set), 1) != const0_rtx))
16961 if (req_mode == CCGCmode)
16965 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16969 if (req_mode == CCZmode)
16980 gcc_unreachable ();
16983 return GET_MODE (SET_SRC (set)) == set_mode;
16986 /* Generate insn patterns to do an integer compare of OPERANDS. */
16989 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16991 enum machine_mode cmpmode;
16994 cmpmode = SELECT_CC_MODE (code, op0, op1);
16995 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16997 /* This is very simple, but making the interface the same as in the
16998 FP case makes the rest of the code easier. */
16999 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17000 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17002 /* Return the test that should be put into the flags user, i.e.
17003 the bcc, scc, or cmov instruction. */
17004 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17007 /* Figure out whether to use ordered or unordered fp comparisons.
17008 Return the appropriate mode to use. */
17011 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17013 /* ??? In order to make all comparisons reversible, we do all comparisons
17014 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17015 all forms trapping and nontrapping comparisons, we can make inequality
17016 comparisons trapping again, since it results in better code when using
17017 FCOM based compares. */
17018 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17022 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17024 enum machine_mode mode = GET_MODE (op0);
17026 if (SCALAR_FLOAT_MODE_P (mode))
17028 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17029 return ix86_fp_compare_mode (code);
17034 /* Only zero flag is needed. */
17035 case EQ: /* ZF=0 */
17036 case NE: /* ZF!=0 */
17038 /* Codes needing carry flag. */
17039 case GEU: /* CF=0 */
17040 case LTU: /* CF=1 */
17041 /* Detect overflow checks. They need just the carry flag. */
17042 if (GET_CODE (op0) == PLUS
17043 && rtx_equal_p (op1, XEXP (op0, 0)))
17047 case GTU: /* CF=0 & ZF=0 */
17048 case LEU: /* CF=1 | ZF=1 */
17049 /* Detect overflow checks. They need just the carry flag. */
17050 if (GET_CODE (op0) == MINUS
17051 && rtx_equal_p (op1, XEXP (op0, 0)))
17055 /* Codes possibly doable only with sign flag when
17056 comparing against zero. */
17057 case GE: /* SF=OF or SF=0 */
17058 case LT: /* SF<>OF or SF=1 */
17059 if (op1 == const0_rtx)
17062 /* For other cases Carry flag is not required. */
17064 /* Codes doable only with sign flag when comparing
17065 against zero, but we miss jump instruction for it
17066 so we need to use relational tests against overflow
17067 that thus needs to be zero. */
17068 case GT: /* ZF=0 & SF=OF */
17069 case LE: /* ZF=1 | SF<>OF */
17070 if (op1 == const0_rtx)
17074 /* strcmp pattern do (use flags) and combine may ask us for proper
17079 gcc_unreachable ();
17083 /* Return the fixed registers used for condition codes. */
17086 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17093 /* If two condition code modes are compatible, return a condition code
17094 mode which is compatible with both. Otherwise, return
17097 static enum machine_mode
17098 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17103 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17106 if ((m1 == CCGCmode && m2 == CCGOCmode)
17107 || (m1 == CCGOCmode && m2 == CCGCmode))
17113 gcc_unreachable ();
17143 /* These are only compatible with themselves, which we already
17150 /* Return a comparison we can do and that it is equivalent to
17151 swap_condition (code) apart possibly from orderedness.
17152 But, never change orderedness if TARGET_IEEE_FP, returning
17153 UNKNOWN in that case if necessary. */
17155 static enum rtx_code
17156 ix86_fp_swap_condition (enum rtx_code code)
17160 case GT: /* GTU - CF=0 & ZF=0 */
17161 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17162 case GE: /* GEU - CF=0 */
17163 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17164 case UNLT: /* LTU - CF=1 */
17165 return TARGET_IEEE_FP ? UNKNOWN : GT;
17166 case UNLE: /* LEU - CF=1 | ZF=1 */
17167 return TARGET_IEEE_FP ? UNKNOWN : GE;
17169 return swap_condition (code);
17173 /* Return cost of comparison CODE using the best strategy for performance.
17174 All following functions do use number of instructions as a cost metrics.
17175 In future this should be tweaked to compute bytes for optimize_size and
17176 take into account performance of various instructions on various CPUs. */
17179 ix86_fp_comparison_cost (enum rtx_code code)
17183 /* The cost of code using bit-twiddling on %ah. */
17200 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17204 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17207 gcc_unreachable ();
17210 switch (ix86_fp_comparison_strategy (code))
17212 case IX86_FPCMP_COMI:
17213 return arith_cost > 4 ? 3 : 2;
17214 case IX86_FPCMP_SAHF:
17215 return arith_cost > 4 ? 4 : 3;
17221 /* Return strategy to use for floating-point. We assume that fcomi is always
17222 preferrable where available, since that is also true when looking at size
17223 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17225 enum ix86_fpcmp_strategy
17226 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17228 /* Do fcomi/sahf based test when profitable. */
17231 return IX86_FPCMP_COMI;
17233 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17234 return IX86_FPCMP_SAHF;
17236 return IX86_FPCMP_ARITH;
17239 /* Swap, force into registers, or otherwise massage the two operands
17240 to a fp comparison. The operands are updated in place; the new
17241 comparison code is returned. */
17243 static enum rtx_code
17244 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17246 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17247 rtx op0 = *pop0, op1 = *pop1;
17248 enum machine_mode op_mode = GET_MODE (op0);
17249 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17251 /* All of the unordered compare instructions only work on registers.
17252 The same is true of the fcomi compare instructions. The XFmode
17253 compare instructions require registers except when comparing
17254 against zero or when converting operand 1 from fixed point to
17258 && (fpcmp_mode == CCFPUmode
17259 || (op_mode == XFmode
17260 && ! (standard_80387_constant_p (op0) == 1
17261 || standard_80387_constant_p (op1) == 1)
17262 && GET_CODE (op1) != FLOAT)
17263 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17265 op0 = force_reg (op_mode, op0);
17266 op1 = force_reg (op_mode, op1);
17270 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17271 things around if they appear profitable, otherwise force op0
17272 into a register. */
17274 if (standard_80387_constant_p (op0) == 0
17276 && ! (standard_80387_constant_p (op1) == 0
17279 enum rtx_code new_code = ix86_fp_swap_condition (code);
17280 if (new_code != UNKNOWN)
17283 tmp = op0, op0 = op1, op1 = tmp;
17289 op0 = force_reg (op_mode, op0);
17291 if (CONSTANT_P (op1))
17293 int tmp = standard_80387_constant_p (op1);
17295 op1 = validize_mem (force_const_mem (op_mode, op1));
17299 op1 = force_reg (op_mode, op1);
17302 op1 = force_reg (op_mode, op1);
17306 /* Try to rearrange the comparison to make it cheaper. */
17307 if (ix86_fp_comparison_cost (code)
17308 > ix86_fp_comparison_cost (swap_condition (code))
17309 && (REG_P (op1) || can_create_pseudo_p ()))
17312 tmp = op0, op0 = op1, op1 = tmp;
17313 code = swap_condition (code);
17315 op0 = force_reg (op_mode, op0);
17323 /* Convert comparison codes we use to represent FP comparison to integer
17324 code that will result in proper branch. Return UNKNOWN if no such code
17328 ix86_fp_compare_code_to_integer (enum rtx_code code)
17357 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17360 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17362 enum machine_mode fpcmp_mode, intcmp_mode;
17365 fpcmp_mode = ix86_fp_compare_mode (code);
17366 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17368 /* Do fcomi/sahf based test when profitable. */
17369 switch (ix86_fp_comparison_strategy (code))
17371 case IX86_FPCMP_COMI:
17372 intcmp_mode = fpcmp_mode;
17373 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17374 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17379 case IX86_FPCMP_SAHF:
17380 intcmp_mode = fpcmp_mode;
17381 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17382 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17386 scratch = gen_reg_rtx (HImode);
17387 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17388 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17391 case IX86_FPCMP_ARITH:
17392 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17393 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17394 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17396 scratch = gen_reg_rtx (HImode);
17397 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17399 /* In the unordered case, we have to check C2 for NaN's, which
17400 doesn't happen to work out to anything nice combination-wise.
17401 So do some bit twiddling on the value we've got in AH to come
17402 up with an appropriate set of condition codes. */
17404 intcmp_mode = CCNOmode;
17409 if (code == GT || !TARGET_IEEE_FP)
17411 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17416 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17417 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17418 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17419 intcmp_mode = CCmode;
17425 if (code == LT && TARGET_IEEE_FP)
17427 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17428 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17429 intcmp_mode = CCmode;
17434 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17440 if (code == GE || !TARGET_IEEE_FP)
17442 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17447 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17448 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17454 if (code == LE && TARGET_IEEE_FP)
17456 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17457 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17458 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17459 intcmp_mode = CCmode;
17464 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17470 if (code == EQ && TARGET_IEEE_FP)
17472 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17473 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17474 intcmp_mode = CCmode;
17479 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17485 if (code == NE && TARGET_IEEE_FP)
17487 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17488 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17494 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17500 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17504 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17509 gcc_unreachable ();
17517 /* Return the test that should be put into the flags user, i.e.
17518 the bcc, scc, or cmov instruction. */
17519 return gen_rtx_fmt_ee (code, VOIDmode,
17520 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17525 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17529 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17530 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17532 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17534 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17535 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17538 ret = ix86_expand_int_compare (code, op0, op1);
17544 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17546 enum machine_mode mode = GET_MODE (op0);
17558 tmp = ix86_expand_compare (code, op0, op1);
17559 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17560 gen_rtx_LABEL_REF (VOIDmode, label),
17562 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17569 /* Expand DImode branch into multiple compare+branch. */
17571 rtx lo[2], hi[2], label2;
17572 enum rtx_code code1, code2, code3;
17573 enum machine_mode submode;
17575 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17577 tmp = op0, op0 = op1, op1 = tmp;
17578 code = swap_condition (code);
17581 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17582 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17584 submode = mode == DImode ? SImode : DImode;
17586 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17587 avoid two branches. This costs one extra insn, so disable when
17588 optimizing for size. */
17590 if ((code == EQ || code == NE)
17591 && (!optimize_insn_for_size_p ()
17592 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17597 if (hi[1] != const0_rtx)
17598 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17599 NULL_RTX, 0, OPTAB_WIDEN);
17602 if (lo[1] != const0_rtx)
17603 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17604 NULL_RTX, 0, OPTAB_WIDEN);
17606 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17607 NULL_RTX, 0, OPTAB_WIDEN);
17609 ix86_expand_branch (code, tmp, const0_rtx, label);
17613 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17614 op1 is a constant and the low word is zero, then we can just
17615 examine the high word. Similarly for low word -1 and
17616 less-or-equal-than or greater-than. */
17618 if (CONST_INT_P (hi[1]))
17621 case LT: case LTU: case GE: case GEU:
17622 if (lo[1] == const0_rtx)
17624 ix86_expand_branch (code, hi[0], hi[1], label);
17628 case LE: case LEU: case GT: case GTU:
17629 if (lo[1] == constm1_rtx)
17631 ix86_expand_branch (code, hi[0], hi[1], label);
17639 /* Otherwise, we need two or three jumps. */
17641 label2 = gen_label_rtx ();
17644 code2 = swap_condition (code);
17645 code3 = unsigned_condition (code);
17649 case LT: case GT: case LTU: case GTU:
17652 case LE: code1 = LT; code2 = GT; break;
17653 case GE: code1 = GT; code2 = LT; break;
17654 case LEU: code1 = LTU; code2 = GTU; break;
17655 case GEU: code1 = GTU; code2 = LTU; break;
17657 case EQ: code1 = UNKNOWN; code2 = NE; break;
17658 case NE: code2 = UNKNOWN; break;
17661 gcc_unreachable ();
17666 * if (hi(a) < hi(b)) goto true;
17667 * if (hi(a) > hi(b)) goto false;
17668 * if (lo(a) < lo(b)) goto true;
17672 if (code1 != UNKNOWN)
17673 ix86_expand_branch (code1, hi[0], hi[1], label);
17674 if (code2 != UNKNOWN)
17675 ix86_expand_branch (code2, hi[0], hi[1], label2);
17677 ix86_expand_branch (code3, lo[0], lo[1], label);
17679 if (code2 != UNKNOWN)
17680 emit_label (label2);
17685 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17690 /* Split branch based on floating point condition. */
17692 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17693 rtx target1, rtx target2, rtx tmp, rtx pushed)
17698 if (target2 != pc_rtx)
17701 code = reverse_condition_maybe_unordered (code);
17706 condition = ix86_expand_fp_compare (code, op1, op2,
17709 /* Remove pushed operand from stack. */
17711 ix86_free_from_memory (GET_MODE (pushed));
17713 i = emit_jump_insn (gen_rtx_SET
17715 gen_rtx_IF_THEN_ELSE (VOIDmode,
17716 condition, target1, target2)));
17717 if (split_branch_probability >= 0)
17718 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17722 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17726 gcc_assert (GET_MODE (dest) == QImode);
17728 ret = ix86_expand_compare (code, op0, op1);
17729 PUT_MODE (ret, QImode);
17730 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17733 /* Expand comparison setting or clearing carry flag. Return true when
17734 successful and set pop for the operation. */
17736 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17738 enum machine_mode mode =
17739 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17741 /* Do not handle double-mode compares that go through special path. */
17742 if (mode == (TARGET_64BIT ? TImode : DImode))
17745 if (SCALAR_FLOAT_MODE_P (mode))
17747 rtx compare_op, compare_seq;
17749 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17751 /* Shortcut: following common codes never translate
17752 into carry flag compares. */
17753 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17754 || code == ORDERED || code == UNORDERED)
17757 /* These comparisons require zero flag; swap operands so they won't. */
17758 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17759 && !TARGET_IEEE_FP)
17764 code = swap_condition (code);
17767 /* Try to expand the comparison and verify that we end up with
17768 carry flag based comparison. This fails to be true only when
17769 we decide to expand comparison using arithmetic that is not
17770 too common scenario. */
17772 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17773 compare_seq = get_insns ();
17776 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17777 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17778 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17780 code = GET_CODE (compare_op);
17782 if (code != LTU && code != GEU)
17785 emit_insn (compare_seq);
17790 if (!INTEGRAL_MODE_P (mode))
17799 /* Convert a==0 into (unsigned)a<1. */
17802 if (op1 != const0_rtx)
17805 code = (code == EQ ? LTU : GEU);
17808 /* Convert a>b into b<a or a>=b-1. */
17811 if (CONST_INT_P (op1))
17813 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17814 /* Bail out on overflow. We still can swap operands but that
17815 would force loading of the constant into register. */
17816 if (op1 == const0_rtx
17817 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17819 code = (code == GTU ? GEU : LTU);
17826 code = (code == GTU ? LTU : GEU);
17830 /* Convert a>=0 into (unsigned)a<0x80000000. */
17833 if (mode == DImode || op1 != const0_rtx)
17835 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17836 code = (code == LT ? GEU : LTU);
17840 if (mode == DImode || op1 != constm1_rtx)
17842 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17843 code = (code == LE ? GEU : LTU);
17849 /* Swapping operands may cause constant to appear as first operand. */
17850 if (!nonimmediate_operand (op0, VOIDmode))
17852 if (!can_create_pseudo_p ())
17854 op0 = force_reg (mode, op0);
17856 *pop = ix86_expand_compare (code, op0, op1);
17857 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17862 ix86_expand_int_movcc (rtx operands[])
17864 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17865 rtx compare_seq, compare_op;
17866 enum machine_mode mode = GET_MODE (operands[0]);
17867 bool sign_bit_compare_p = false;
17868 rtx op0 = XEXP (operands[1], 0);
17869 rtx op1 = XEXP (operands[1], 1);
17872 compare_op = ix86_expand_compare (code, op0, op1);
17873 compare_seq = get_insns ();
17876 compare_code = GET_CODE (compare_op);
17878 if ((op1 == const0_rtx && (code == GE || code == LT))
17879 || (op1 == constm1_rtx && (code == GT || code == LE)))
17880 sign_bit_compare_p = true;
17882 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17883 HImode insns, we'd be swallowed in word prefix ops. */
17885 if ((mode != HImode || TARGET_FAST_PREFIX)
17886 && (mode != (TARGET_64BIT ? TImode : DImode))
17887 && CONST_INT_P (operands[2])
17888 && CONST_INT_P (operands[3]))
17890 rtx out = operands[0];
17891 HOST_WIDE_INT ct = INTVAL (operands[2]);
17892 HOST_WIDE_INT cf = INTVAL (operands[3]);
17893 HOST_WIDE_INT diff;
17896 /* Sign bit compares are better done using shifts than we do by using
17898 if (sign_bit_compare_p
17899 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17901 /* Detect overlap between destination and compare sources. */
17904 if (!sign_bit_compare_p)
17907 bool fpcmp = false;
17909 compare_code = GET_CODE (compare_op);
17911 flags = XEXP (compare_op, 0);
17913 if (GET_MODE (flags) == CCFPmode
17914 || GET_MODE (flags) == CCFPUmode)
17918 = ix86_fp_compare_code_to_integer (compare_code);
17921 /* To simplify rest of code, restrict to the GEU case. */
17922 if (compare_code == LTU)
17924 HOST_WIDE_INT tmp = ct;
17927 compare_code = reverse_condition (compare_code);
17928 code = reverse_condition (code);
17933 PUT_CODE (compare_op,
17934 reverse_condition_maybe_unordered
17935 (GET_CODE (compare_op)));
17937 PUT_CODE (compare_op,
17938 reverse_condition (GET_CODE (compare_op)));
17942 if (reg_overlap_mentioned_p (out, op0)
17943 || reg_overlap_mentioned_p (out, op1))
17944 tmp = gen_reg_rtx (mode);
17946 if (mode == DImode)
17947 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17949 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17950 flags, compare_op));
17954 if (code == GT || code == GE)
17955 code = reverse_condition (code);
17958 HOST_WIDE_INT tmp = ct;
17963 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17976 tmp = expand_simple_binop (mode, PLUS,
17978 copy_rtx (tmp), 1, OPTAB_DIRECT);
17989 tmp = expand_simple_binop (mode, IOR,
17991 copy_rtx (tmp), 1, OPTAB_DIRECT);
17993 else if (diff == -1 && ct)
18003 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18005 tmp = expand_simple_binop (mode, PLUS,
18006 copy_rtx (tmp), GEN_INT (cf),
18007 copy_rtx (tmp), 1, OPTAB_DIRECT);
18015 * andl cf - ct, dest
18025 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18028 tmp = expand_simple_binop (mode, AND,
18030 gen_int_mode (cf - ct, mode),
18031 copy_rtx (tmp), 1, OPTAB_DIRECT);
18033 tmp = expand_simple_binop (mode, PLUS,
18034 copy_rtx (tmp), GEN_INT (ct),
18035 copy_rtx (tmp), 1, OPTAB_DIRECT);
18038 if (!rtx_equal_p (tmp, out))
18039 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18046 enum machine_mode cmp_mode = GET_MODE (op0);
18049 tmp = ct, ct = cf, cf = tmp;
18052 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18054 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18056 /* We may be reversing unordered compare to normal compare, that
18057 is not valid in general (we may convert non-trapping condition
18058 to trapping one), however on i386 we currently emit all
18059 comparisons unordered. */
18060 compare_code = reverse_condition_maybe_unordered (compare_code);
18061 code = reverse_condition_maybe_unordered (code);
18065 compare_code = reverse_condition (compare_code);
18066 code = reverse_condition (code);
18070 compare_code = UNKNOWN;
18071 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18072 && CONST_INT_P (op1))
18074 if (op1 == const0_rtx
18075 && (code == LT || code == GE))
18076 compare_code = code;
18077 else if (op1 == constm1_rtx)
18081 else if (code == GT)
18086 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18087 if (compare_code != UNKNOWN
18088 && GET_MODE (op0) == GET_MODE (out)
18089 && (cf == -1 || ct == -1))
18091 /* If lea code below could be used, only optimize
18092 if it results in a 2 insn sequence. */
18094 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18095 || diff == 3 || diff == 5 || diff == 9)
18096 || (compare_code == LT && ct == -1)
18097 || (compare_code == GE && cf == -1))
18100 * notl op1 (if necessary)
18108 code = reverse_condition (code);
18111 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18113 out = expand_simple_binop (mode, IOR,
18115 out, 1, OPTAB_DIRECT);
18116 if (out != operands[0])
18117 emit_move_insn (operands[0], out);
18124 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18125 || diff == 3 || diff == 5 || diff == 9)
18126 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18128 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18134 * lea cf(dest*(ct-cf)),dest
18138 * This also catches the degenerate setcc-only case.
18144 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18147 /* On x86_64 the lea instruction operates on Pmode, so we need
18148 to get arithmetics done in proper mode to match. */
18150 tmp = copy_rtx (out);
18154 out1 = copy_rtx (out);
18155 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18159 tmp = gen_rtx_PLUS (mode, tmp, out1);
18165 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18168 if (!rtx_equal_p (tmp, out))
18171 out = force_operand (tmp, copy_rtx (out));
18173 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18175 if (!rtx_equal_p (out, operands[0]))
18176 emit_move_insn (operands[0], copy_rtx (out));
18182 * General case: Jumpful:
18183 * xorl dest,dest cmpl op1, op2
18184 * cmpl op1, op2 movl ct, dest
18185 * setcc dest jcc 1f
18186 * decl dest movl cf, dest
18187 * andl (cf-ct),dest 1:
18190 * Size 20. Size 14.
18192 * This is reasonably steep, but branch mispredict costs are
18193 * high on modern cpus, so consider failing only if optimizing
18197 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18198 && BRANCH_COST (optimize_insn_for_speed_p (),
18203 enum machine_mode cmp_mode = GET_MODE (op0);
18208 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18210 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18212 /* We may be reversing unordered compare to normal compare,
18213 that is not valid in general (we may convert non-trapping
18214 condition to trapping one), however on i386 we currently
18215 emit all comparisons unordered. */
18216 code = reverse_condition_maybe_unordered (code);
18220 code = reverse_condition (code);
18221 if (compare_code != UNKNOWN)
18222 compare_code = reverse_condition (compare_code);
18226 if (compare_code != UNKNOWN)
18228 /* notl op1 (if needed)
18233 For x < 0 (resp. x <= -1) there will be no notl,
18234 so if possible swap the constants to get rid of the
18236 True/false will be -1/0 while code below (store flag
18237 followed by decrement) is 0/-1, so the constants need
18238 to be exchanged once more. */
18240 if (compare_code == GE || !cf)
18242 code = reverse_condition (code);
18247 HOST_WIDE_INT tmp = cf;
18252 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18256 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18258 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18260 copy_rtx (out), 1, OPTAB_DIRECT);
18263 out = expand_simple_binop (mode, AND, copy_rtx (out),
18264 gen_int_mode (cf - ct, mode),
18265 copy_rtx (out), 1, OPTAB_DIRECT);
18267 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18268 copy_rtx (out), 1, OPTAB_DIRECT);
18269 if (!rtx_equal_p (out, operands[0]))
18270 emit_move_insn (operands[0], copy_rtx (out));
18276 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18278 /* Try a few things more with specific constants and a variable. */
18281 rtx var, orig_out, out, tmp;
18283 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18286 /* If one of the two operands is an interesting constant, load a
18287 constant with the above and mask it in with a logical operation. */
18289 if (CONST_INT_P (operands[2]))
18292 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18293 operands[3] = constm1_rtx, op = and_optab;
18294 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18295 operands[3] = const0_rtx, op = ior_optab;
18299 else if (CONST_INT_P (operands[3]))
18302 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18303 operands[2] = constm1_rtx, op = and_optab;
18304 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18305 operands[2] = const0_rtx, op = ior_optab;
18312 orig_out = operands[0];
18313 tmp = gen_reg_rtx (mode);
18316 /* Recurse to get the constant loaded. */
18317 if (ix86_expand_int_movcc (operands) == 0)
18320 /* Mask in the interesting variable. */
18321 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18323 if (!rtx_equal_p (out, orig_out))
18324 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18330 * For comparison with above,
18340 if (! nonimmediate_operand (operands[2], mode))
18341 operands[2] = force_reg (mode, operands[2]);
18342 if (! nonimmediate_operand (operands[3], mode))
18343 operands[3] = force_reg (mode, operands[3]);
18345 if (! register_operand (operands[2], VOIDmode)
18347 || ! register_operand (operands[3], VOIDmode)))
18348 operands[2] = force_reg (mode, operands[2]);
18351 && ! register_operand (operands[3], VOIDmode))
18352 operands[3] = force_reg (mode, operands[3]);
18354 emit_insn (compare_seq);
18355 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18356 gen_rtx_IF_THEN_ELSE (mode,
18357 compare_op, operands[2],
18362 /* Swap, force into registers, or otherwise massage the two operands
18363 to an sse comparison with a mask result. Thus we differ a bit from
18364 ix86_prepare_fp_compare_args which expects to produce a flags result.
18366 The DEST operand exists to help determine whether to commute commutative
18367 operators. The POP0/POP1 operands are updated in place. The new
18368 comparison code is returned, or UNKNOWN if not implementable. */
18370 static enum rtx_code
18371 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18372 rtx *pop0, rtx *pop1)
18380 /* We have no LTGT as an operator. We could implement it with
18381 NE & ORDERED, but this requires an extra temporary. It's
18382 not clear that it's worth it. */
18389 /* These are supported directly. */
18396 /* For commutative operators, try to canonicalize the destination
18397 operand to be first in the comparison - this helps reload to
18398 avoid extra moves. */
18399 if (!dest || !rtx_equal_p (dest, *pop1))
18407 /* These are not supported directly. Swap the comparison operands
18408 to transform into something that is supported. */
18412 code = swap_condition (code);
18416 gcc_unreachable ();
18422 /* Detect conditional moves that exactly match min/max operational
18423 semantics. Note that this is IEEE safe, as long as we don't
18424 interchange the operands.
18426 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18427 and TRUE if the operation is successful and instructions are emitted. */
18430 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18431 rtx cmp_op1, rtx if_true, rtx if_false)
18433 enum machine_mode mode;
18439 else if (code == UNGE)
18442 if_true = if_false;
18448 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18450 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18455 mode = GET_MODE (dest);
18457 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18458 but MODE may be a vector mode and thus not appropriate. */
18459 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18461 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18464 if_true = force_reg (mode, if_true);
18465 v = gen_rtvec (2, if_true, if_false);
18466 tmp = gen_rtx_UNSPEC (mode, v, u);
18470 code = is_min ? SMIN : SMAX;
18471 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18474 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18478 /* Expand an sse vector comparison. Return the register with the result. */
18481 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18482 rtx op_true, rtx op_false)
18484 enum machine_mode mode = GET_MODE (dest);
18487 cmp_op0 = force_reg (mode, cmp_op0);
18488 if (!nonimmediate_operand (cmp_op1, mode))
18489 cmp_op1 = force_reg (mode, cmp_op1);
18492 || reg_overlap_mentioned_p (dest, op_true)
18493 || reg_overlap_mentioned_p (dest, op_false))
18494 dest = gen_reg_rtx (mode);
18496 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18497 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18502 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18503 operations. This is used for both scalar and vector conditional moves. */
18506 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18508 enum machine_mode mode = GET_MODE (dest);
18511 if (op_false == CONST0_RTX (mode))
18513 op_true = force_reg (mode, op_true);
18514 x = gen_rtx_AND (mode, cmp, op_true);
18515 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18517 else if (op_true == CONST0_RTX (mode))
18519 op_false = force_reg (mode, op_false);
18520 x = gen_rtx_NOT (mode, cmp);
18521 x = gen_rtx_AND (mode, x, op_false);
18522 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18524 else if (TARGET_XOP)
18526 rtx pcmov = gen_rtx_SET (mode, dest,
18527 gen_rtx_IF_THEN_ELSE (mode, cmp,
18534 op_true = force_reg (mode, op_true);
18535 op_false = force_reg (mode, op_false);
18537 t2 = gen_reg_rtx (mode);
18539 t3 = gen_reg_rtx (mode);
18543 x = gen_rtx_AND (mode, op_true, cmp);
18544 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18546 x = gen_rtx_NOT (mode, cmp);
18547 x = gen_rtx_AND (mode, x, op_false);
18548 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18550 x = gen_rtx_IOR (mode, t3, t2);
18551 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18555 /* Expand a floating-point conditional move. Return true if successful. */
18558 ix86_expand_fp_movcc (rtx operands[])
18560 enum machine_mode mode = GET_MODE (operands[0]);
18561 enum rtx_code code = GET_CODE (operands[1]);
18562 rtx tmp, compare_op;
18563 rtx op0 = XEXP (operands[1], 0);
18564 rtx op1 = XEXP (operands[1], 1);
18566 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18568 enum machine_mode cmode;
18570 /* Since we've no cmove for sse registers, don't force bad register
18571 allocation just to gain access to it. Deny movcc when the
18572 comparison mode doesn't match the move mode. */
18573 cmode = GET_MODE (op0);
18574 if (cmode == VOIDmode)
18575 cmode = GET_MODE (op1);
18579 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18580 if (code == UNKNOWN)
18583 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18584 operands[2], operands[3]))
18587 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18588 operands[2], operands[3]);
18589 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18593 /* The floating point conditional move instructions don't directly
18594 support conditions resulting from a signed integer comparison. */
18596 compare_op = ix86_expand_compare (code, op0, op1);
18597 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18599 tmp = gen_reg_rtx (QImode);
18600 ix86_expand_setcc (tmp, code, op0, op1);
18602 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18605 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18606 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18607 operands[2], operands[3])));
18612 /* Expand a floating-point vector conditional move; a vcond operation
18613 rather than a movcc operation. */
18616 ix86_expand_fp_vcond (rtx operands[])
18618 enum rtx_code code = GET_CODE (operands[3]);
18621 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18622 &operands[4], &operands[5]);
18623 if (code == UNKNOWN)
18626 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18627 operands[5], operands[1], operands[2]))
18630 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18631 operands[1], operands[2]);
18632 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18636 /* Expand a signed/unsigned integral vector conditional move. */
18639 ix86_expand_int_vcond (rtx operands[])
18641 enum machine_mode mode = GET_MODE (operands[0]);
18642 enum rtx_code code = GET_CODE (operands[3]);
18643 bool negate = false;
18646 cop0 = operands[4];
18647 cop1 = operands[5];
18649 /* XOP supports all of the comparisons on all vector int types. */
18652 /* Canonicalize the comparison to EQ, GT, GTU. */
18663 code = reverse_condition (code);
18669 code = reverse_condition (code);
18675 code = swap_condition (code);
18676 x = cop0, cop0 = cop1, cop1 = x;
18680 gcc_unreachable ();
18683 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18684 if (mode == V2DImode)
18689 /* SSE4.1 supports EQ. */
18690 if (!TARGET_SSE4_1)
18696 /* SSE4.2 supports GT/GTU. */
18697 if (!TARGET_SSE4_2)
18702 gcc_unreachable ();
18706 /* Unsigned parallel compare is not supported by the hardware.
18707 Play some tricks to turn this into a signed comparison
18711 cop0 = force_reg (mode, cop0);
18719 rtx (*gen_sub3) (rtx, rtx, rtx);
18721 /* Subtract (-(INT MAX) - 1) from both operands to make
18723 mask = ix86_build_signbit_mask (mode, true, false);
18724 gen_sub3 = (mode == V4SImode
18725 ? gen_subv4si3 : gen_subv2di3);
18726 t1 = gen_reg_rtx (mode);
18727 emit_insn (gen_sub3 (t1, cop0, mask));
18729 t2 = gen_reg_rtx (mode);
18730 emit_insn (gen_sub3 (t2, cop1, mask));
18740 /* Perform a parallel unsigned saturating subtraction. */
18741 x = gen_reg_rtx (mode);
18742 emit_insn (gen_rtx_SET (VOIDmode, x,
18743 gen_rtx_US_MINUS (mode, cop0, cop1)));
18746 cop1 = CONST0_RTX (mode);
18752 gcc_unreachable ();
18757 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18758 operands[1+negate], operands[2-negate]);
18760 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18761 operands[2-negate]);
18765 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18766 true if we should do zero extension, else sign extension. HIGH_P is
18767 true if we want the N/2 high elements, else the low elements. */
18770 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18772 enum machine_mode imode = GET_MODE (operands[1]);
18773 rtx (*unpack)(rtx, rtx, rtx);
18780 unpack = gen_vec_interleave_highv16qi;
18782 unpack = gen_vec_interleave_lowv16qi;
18786 unpack = gen_vec_interleave_highv8hi;
18788 unpack = gen_vec_interleave_lowv8hi;
18792 unpack = gen_vec_interleave_highv4si;
18794 unpack = gen_vec_interleave_lowv4si;
18797 gcc_unreachable ();
18800 dest = gen_lowpart (imode, operands[0]);
18803 se = force_reg (imode, CONST0_RTX (imode));
18805 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18806 operands[1], pc_rtx, pc_rtx);
18808 emit_insn (unpack (dest, operands[1], se));
18811 /* This function performs the same task as ix86_expand_sse_unpack,
18812 but with SSE4.1 instructions. */
18815 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18817 enum machine_mode imode = GET_MODE (operands[1]);
18818 rtx (*unpack)(rtx, rtx);
18825 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18827 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18831 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18833 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18837 unpack = gen_sse4_1_zero_extendv2siv2di2;
18839 unpack = gen_sse4_1_sign_extendv2siv2di2;
18842 gcc_unreachable ();
18845 dest = operands[0];
18848 /* Shift higher 8 bytes to lower 8 bytes. */
18849 src = gen_reg_rtx (imode);
18850 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18851 gen_lowpart (V1TImode, operands[1]),
18857 emit_insn (unpack (dest, src));
18860 /* Expand conditional increment or decrement using adb/sbb instructions.
18861 The default case using setcc followed by the conditional move can be
18862 done by generic code. */
18864 ix86_expand_int_addcc (rtx operands[])
18866 enum rtx_code code = GET_CODE (operands[1]);
18868 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18870 rtx val = const0_rtx;
18871 bool fpcmp = false;
18872 enum machine_mode mode;
18873 rtx op0 = XEXP (operands[1], 0);
18874 rtx op1 = XEXP (operands[1], 1);
18876 if (operands[3] != const1_rtx
18877 && operands[3] != constm1_rtx)
18879 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18881 code = GET_CODE (compare_op);
18883 flags = XEXP (compare_op, 0);
18885 if (GET_MODE (flags) == CCFPmode
18886 || GET_MODE (flags) == CCFPUmode)
18889 code = ix86_fp_compare_code_to_integer (code);
18896 PUT_CODE (compare_op,
18897 reverse_condition_maybe_unordered
18898 (GET_CODE (compare_op)));
18900 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18903 mode = GET_MODE (operands[0]);
18905 /* Construct either adc or sbb insn. */
18906 if ((code == LTU) == (operands[3] == constm1_rtx))
18911 insn = gen_subqi3_carry;
18914 insn = gen_subhi3_carry;
18917 insn = gen_subsi3_carry;
18920 insn = gen_subdi3_carry;
18923 gcc_unreachable ();
18931 insn = gen_addqi3_carry;
18934 insn = gen_addhi3_carry;
18937 insn = gen_addsi3_carry;
18940 insn = gen_adddi3_carry;
18943 gcc_unreachable ();
18946 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18952 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18953 but works for floating pointer parameters and nonoffsetable memories.
18954 For pushes, it returns just stack offsets; the values will be saved
18955 in the right order. Maximally three parts are generated. */
18958 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18963 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18965 size = (GET_MODE_SIZE (mode) + 4) / 8;
18967 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18968 gcc_assert (size >= 2 && size <= 4);
18970 /* Optimize constant pool reference to immediates. This is used by fp
18971 moves, that force all constants to memory to allow combining. */
18972 if (MEM_P (operand) && MEM_READONLY_P (operand))
18974 rtx tmp = maybe_get_pool_constant (operand);
18979 if (MEM_P (operand) && !offsettable_memref_p (operand))
18981 /* The only non-offsetable memories we handle are pushes. */
18982 int ok = push_operand (operand, VOIDmode);
18986 operand = copy_rtx (operand);
18987 PUT_MODE (operand, Pmode);
18988 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18992 if (GET_CODE (operand) == CONST_VECTOR)
18994 enum machine_mode imode = int_mode_for_mode (mode);
18995 /* Caution: if we looked through a constant pool memory above,
18996 the operand may actually have a different mode now. That's
18997 ok, since we want to pun this all the way back to an integer. */
18998 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18999 gcc_assert (operand != NULL);
19005 if (mode == DImode)
19006 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19011 if (REG_P (operand))
19013 gcc_assert (reload_completed);
19014 for (i = 0; i < size; i++)
19015 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19017 else if (offsettable_memref_p (operand))
19019 operand = adjust_address (operand, SImode, 0);
19020 parts[0] = operand;
19021 for (i = 1; i < size; i++)
19022 parts[i] = adjust_address (operand, SImode, 4 * i);
19024 else if (GET_CODE (operand) == CONST_DOUBLE)
19029 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19033 real_to_target (l, &r, mode);
19034 parts[3] = gen_int_mode (l[3], SImode);
19035 parts[2] = gen_int_mode (l[2], SImode);
19038 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19039 parts[2] = gen_int_mode (l[2], SImode);
19042 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19045 gcc_unreachable ();
19047 parts[1] = gen_int_mode (l[1], SImode);
19048 parts[0] = gen_int_mode (l[0], SImode);
19051 gcc_unreachable ();
19056 if (mode == TImode)
19057 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19058 if (mode == XFmode || mode == TFmode)
19060 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19061 if (REG_P (operand))
19063 gcc_assert (reload_completed);
19064 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19065 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19067 else if (offsettable_memref_p (operand))
19069 operand = adjust_address (operand, DImode, 0);
19070 parts[0] = operand;
19071 parts[1] = adjust_address (operand, upper_mode, 8);
19073 else if (GET_CODE (operand) == CONST_DOUBLE)
19078 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19079 real_to_target (l, &r, mode);
19081 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19082 if (HOST_BITS_PER_WIDE_INT >= 64)
19085 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19086 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19089 parts[0] = immed_double_const (l[0], l[1], DImode);
19091 if (upper_mode == SImode)
19092 parts[1] = gen_int_mode (l[2], SImode);
19093 else if (HOST_BITS_PER_WIDE_INT >= 64)
19096 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19097 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19100 parts[1] = immed_double_const (l[2], l[3], DImode);
19103 gcc_unreachable ();
19110 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19111 Return false when normal moves are needed; true when all required
19112 insns have been emitted. Operands 2-4 contain the input values
19113 int the correct order; operands 5-7 contain the output values. */
19116 ix86_split_long_move (rtx operands[])
19121 int collisions = 0;
19122 enum machine_mode mode = GET_MODE (operands[0]);
19123 bool collisionparts[4];
19125 /* The DFmode expanders may ask us to move double.
19126 For 64bit target this is single move. By hiding the fact
19127 here we simplify i386.md splitters. */
19128 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19130 /* Optimize constant pool reference to immediates. This is used by
19131 fp moves, that force all constants to memory to allow combining. */
19133 if (MEM_P (operands[1])
19134 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19135 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19136 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19137 if (push_operand (operands[0], VOIDmode))
19139 operands[0] = copy_rtx (operands[0]);
19140 PUT_MODE (operands[0], Pmode);
19143 operands[0] = gen_lowpart (DImode, operands[0]);
19144 operands[1] = gen_lowpart (DImode, operands[1]);
19145 emit_move_insn (operands[0], operands[1]);
19149 /* The only non-offsettable memory we handle is push. */
19150 if (push_operand (operands[0], VOIDmode))
19153 gcc_assert (!MEM_P (operands[0])
19154 || offsettable_memref_p (operands[0]));
19156 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19157 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19159 /* When emitting push, take care for source operands on the stack. */
19160 if (push && MEM_P (operands[1])
19161 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19163 rtx src_base = XEXP (part[1][nparts - 1], 0);
19165 /* Compensate for the stack decrement by 4. */
19166 if (!TARGET_64BIT && nparts == 3
19167 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19168 src_base = plus_constant (src_base, 4);
19170 /* src_base refers to the stack pointer and is
19171 automatically decreased by emitted push. */
19172 for (i = 0; i < nparts; i++)
19173 part[1][i] = change_address (part[1][i],
19174 GET_MODE (part[1][i]), src_base);
19177 /* We need to do copy in the right order in case an address register
19178 of the source overlaps the destination. */
19179 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19183 for (i = 0; i < nparts; i++)
19186 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19187 if (collisionparts[i])
19191 /* Collision in the middle part can be handled by reordering. */
19192 if (collisions == 1 && nparts == 3 && collisionparts [1])
19194 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19195 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19197 else if (collisions == 1
19199 && (collisionparts [1] || collisionparts [2]))
19201 if (collisionparts [1])
19203 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19204 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19208 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19209 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19213 /* If there are more collisions, we can't handle it by reordering.
19214 Do an lea to the last part and use only one colliding move. */
19215 else if (collisions > 1)
19221 base = part[0][nparts - 1];
19223 /* Handle the case when the last part isn't valid for lea.
19224 Happens in 64-bit mode storing the 12-byte XFmode. */
19225 if (GET_MODE (base) != Pmode)
19226 base = gen_rtx_REG (Pmode, REGNO (base));
19228 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19229 part[1][0] = replace_equiv_address (part[1][0], base);
19230 for (i = 1; i < nparts; i++)
19232 tmp = plus_constant (base, UNITS_PER_WORD * i);
19233 part[1][i] = replace_equiv_address (part[1][i], tmp);
19244 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19245 emit_insn (gen_addsi3 (stack_pointer_rtx,
19246 stack_pointer_rtx, GEN_INT (-4)));
19247 emit_move_insn (part[0][2], part[1][2]);
19249 else if (nparts == 4)
19251 emit_move_insn (part[0][3], part[1][3]);
19252 emit_move_insn (part[0][2], part[1][2]);
19257 /* In 64bit mode we don't have 32bit push available. In case this is
19258 register, it is OK - we will just use larger counterpart. We also
19259 retype memory - these comes from attempt to avoid REX prefix on
19260 moving of second half of TFmode value. */
19261 if (GET_MODE (part[1][1]) == SImode)
19263 switch (GET_CODE (part[1][1]))
19266 part[1][1] = adjust_address (part[1][1], DImode, 0);
19270 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19274 gcc_unreachable ();
19277 if (GET_MODE (part[1][0]) == SImode)
19278 part[1][0] = part[1][1];
19281 emit_move_insn (part[0][1], part[1][1]);
19282 emit_move_insn (part[0][0], part[1][0]);
19286 /* Choose correct order to not overwrite the source before it is copied. */
19287 if ((REG_P (part[0][0])
19288 && REG_P (part[1][1])
19289 && (REGNO (part[0][0]) == REGNO (part[1][1])
19291 && REGNO (part[0][0]) == REGNO (part[1][2]))
19293 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19295 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19297 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19299 operands[2 + i] = part[0][j];
19300 operands[6 + i] = part[1][j];
19305 for (i = 0; i < nparts; i++)
19307 operands[2 + i] = part[0][i];
19308 operands[6 + i] = part[1][i];
19312 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19313 if (optimize_insn_for_size_p ())
19315 for (j = 0; j < nparts - 1; j++)
19316 if (CONST_INT_P (operands[6 + j])
19317 && operands[6 + j] != const0_rtx
19318 && REG_P (operands[2 + j]))
19319 for (i = j; i < nparts - 1; i++)
19320 if (CONST_INT_P (operands[7 + i])
19321 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19322 operands[7 + i] = operands[2 + j];
19325 for (i = 0; i < nparts; i++)
19326 emit_move_insn (operands[2 + i], operands[6 + i]);
19331 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19332 left shift by a constant, either using a single shift or
19333 a sequence of add instructions. */
19336 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19338 rtx (*insn)(rtx, rtx, rtx);
19341 || (count * ix86_cost->add <= ix86_cost->shift_const
19342 && !optimize_insn_for_size_p ()))
19344 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19345 while (count-- > 0)
19346 emit_insn (insn (operand, operand, operand));
19350 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19351 emit_insn (insn (operand, operand, GEN_INT (count)));
19356 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19358 rtx (*gen_ashl3)(rtx, rtx, rtx);
19359 rtx (*gen_shld)(rtx, rtx, rtx);
19360 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19362 rtx low[2], high[2];
19365 if (CONST_INT_P (operands[2]))
19367 split_double_mode (mode, operands, 2, low, high);
19368 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19370 if (count >= half_width)
19372 emit_move_insn (high[0], low[1]);
19373 emit_move_insn (low[0], const0_rtx);
19375 if (count > half_width)
19376 ix86_expand_ashl_const (high[0], count - half_width, mode);
19380 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19382 if (!rtx_equal_p (operands[0], operands[1]))
19383 emit_move_insn (operands[0], operands[1]);
19385 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19386 ix86_expand_ashl_const (low[0], count, mode);
19391 split_double_mode (mode, operands, 1, low, high);
19393 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19395 if (operands[1] == const1_rtx)
19397 /* Assuming we've chosen a QImode capable registers, then 1 << N
19398 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19399 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19401 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19403 ix86_expand_clear (low[0]);
19404 ix86_expand_clear (high[0]);
19405 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19407 d = gen_lowpart (QImode, low[0]);
19408 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19409 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19410 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19412 d = gen_lowpart (QImode, high[0]);
19413 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19414 s = gen_rtx_NE (QImode, flags, const0_rtx);
19415 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19418 /* Otherwise, we can get the same results by manually performing
19419 a bit extract operation on bit 5/6, and then performing the two
19420 shifts. The two methods of getting 0/1 into low/high are exactly
19421 the same size. Avoiding the shift in the bit extract case helps
19422 pentium4 a bit; no one else seems to care much either way. */
19425 enum machine_mode half_mode;
19426 rtx (*gen_lshr3)(rtx, rtx, rtx);
19427 rtx (*gen_and3)(rtx, rtx, rtx);
19428 rtx (*gen_xor3)(rtx, rtx, rtx);
19429 HOST_WIDE_INT bits;
19432 if (mode == DImode)
19434 half_mode = SImode;
19435 gen_lshr3 = gen_lshrsi3;
19436 gen_and3 = gen_andsi3;
19437 gen_xor3 = gen_xorsi3;
19442 half_mode = DImode;
19443 gen_lshr3 = gen_lshrdi3;
19444 gen_and3 = gen_anddi3;
19445 gen_xor3 = gen_xordi3;
19449 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19450 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19452 x = gen_lowpart (half_mode, operands[2]);
19453 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19455 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19456 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19457 emit_move_insn (low[0], high[0]);
19458 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19461 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19462 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19466 if (operands[1] == constm1_rtx)
19468 /* For -1 << N, we can avoid the shld instruction, because we
19469 know that we're shifting 0...31/63 ones into a -1. */
19470 emit_move_insn (low[0], constm1_rtx);
19471 if (optimize_insn_for_size_p ())
19472 emit_move_insn (high[0], low[0]);
19474 emit_move_insn (high[0], constm1_rtx);
19478 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19480 if (!rtx_equal_p (operands[0], operands[1]))
19481 emit_move_insn (operands[0], operands[1]);
19483 split_double_mode (mode, operands, 1, low, high);
19484 emit_insn (gen_shld (high[0], low[0], operands[2]));
19487 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19489 if (TARGET_CMOVE && scratch)
19491 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19492 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19494 ix86_expand_clear (scratch);
19495 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19499 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19500 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19502 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19507 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19509 rtx (*gen_ashr3)(rtx, rtx, rtx)
19510 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19511 rtx (*gen_shrd)(rtx, rtx, rtx);
19512 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19514 rtx low[2], high[2];
19517 if (CONST_INT_P (operands[2]))
19519 split_double_mode (mode, operands, 2, low, high);
19520 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19522 if (count == GET_MODE_BITSIZE (mode) - 1)
19524 emit_move_insn (high[0], high[1]);
19525 emit_insn (gen_ashr3 (high[0], high[0],
19526 GEN_INT (half_width - 1)));
19527 emit_move_insn (low[0], high[0]);
19530 else if (count >= half_width)
19532 emit_move_insn (low[0], high[1]);
19533 emit_move_insn (high[0], low[0]);
19534 emit_insn (gen_ashr3 (high[0], high[0],
19535 GEN_INT (half_width - 1)));
19537 if (count > half_width)
19538 emit_insn (gen_ashr3 (low[0], low[0],
19539 GEN_INT (count - half_width)));
19543 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19545 if (!rtx_equal_p (operands[0], operands[1]))
19546 emit_move_insn (operands[0], operands[1]);
19548 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19549 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19554 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19556 if (!rtx_equal_p (operands[0], operands[1]))
19557 emit_move_insn (operands[0], operands[1]);
19559 split_double_mode (mode, operands, 1, low, high);
19561 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19562 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19564 if (TARGET_CMOVE && scratch)
19566 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19567 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19569 emit_move_insn (scratch, high[0]);
19570 emit_insn (gen_ashr3 (scratch, scratch,
19571 GEN_INT (half_width - 1)));
19572 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19577 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19578 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19580 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19586 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19588 rtx (*gen_lshr3)(rtx, rtx, rtx)
19589 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19590 rtx (*gen_shrd)(rtx, rtx, rtx);
19591 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19593 rtx low[2], high[2];
19596 if (CONST_INT_P (operands[2]))
19598 split_double_mode (mode, operands, 2, low, high);
19599 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19601 if (count >= half_width)
19603 emit_move_insn (low[0], high[1]);
19604 ix86_expand_clear (high[0]);
19606 if (count > half_width)
19607 emit_insn (gen_lshr3 (low[0], low[0],
19608 GEN_INT (count - half_width)));
19612 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19614 if (!rtx_equal_p (operands[0], operands[1]))
19615 emit_move_insn (operands[0], operands[1]);
19617 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19618 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19623 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19625 if (!rtx_equal_p (operands[0], operands[1]))
19626 emit_move_insn (operands[0], operands[1]);
19628 split_double_mode (mode, operands, 1, low, high);
19630 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19631 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19633 if (TARGET_CMOVE && scratch)
19635 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19636 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19638 ix86_expand_clear (scratch);
19639 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19644 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19645 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19647 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19652 /* Predict just emitted jump instruction to be taken with probability PROB. */
19654 predict_jump (int prob)
19656 rtx insn = get_last_insn ();
19657 gcc_assert (JUMP_P (insn));
19658 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19661 /* Helper function for the string operations below. Dest VARIABLE whether
19662 it is aligned to VALUE bytes. If true, jump to the label. */
19664 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19666 rtx label = gen_label_rtx ();
19667 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19668 if (GET_MODE (variable) == DImode)
19669 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19671 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19672 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19675 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19677 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19681 /* Adjust COUNTER by the VALUE. */
19683 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19685 rtx (*gen_add)(rtx, rtx, rtx)
19686 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19688 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19691 /* Zero extend possibly SImode EXP to Pmode register. */
19693 ix86_zero_extend_to_Pmode (rtx exp)
19696 if (GET_MODE (exp) == VOIDmode)
19697 return force_reg (Pmode, exp);
19698 if (GET_MODE (exp) == Pmode)
19699 return copy_to_mode_reg (Pmode, exp);
19700 r = gen_reg_rtx (Pmode);
19701 emit_insn (gen_zero_extendsidi2 (r, exp));
19705 /* Divide COUNTREG by SCALE. */
19707 scale_counter (rtx countreg, int scale)
19713 if (CONST_INT_P (countreg))
19714 return GEN_INT (INTVAL (countreg) / scale);
19715 gcc_assert (REG_P (countreg));
19717 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19718 GEN_INT (exact_log2 (scale)),
19719 NULL, 1, OPTAB_DIRECT);
19723 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19724 DImode for constant loop counts. */
19726 static enum machine_mode
19727 counter_mode (rtx count_exp)
19729 if (GET_MODE (count_exp) != VOIDmode)
19730 return GET_MODE (count_exp);
19731 if (!CONST_INT_P (count_exp))
19733 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19738 /* When SRCPTR is non-NULL, output simple loop to move memory
19739 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19740 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19741 equivalent loop to set memory by VALUE (supposed to be in MODE).
19743 The size is rounded down to whole number of chunk size moved at once.
19744 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19748 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19749 rtx destptr, rtx srcptr, rtx value,
19750 rtx count, enum machine_mode mode, int unroll,
19753 rtx out_label, top_label, iter, tmp;
19754 enum machine_mode iter_mode = counter_mode (count);
19755 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19756 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19762 top_label = gen_label_rtx ();
19763 out_label = gen_label_rtx ();
19764 iter = gen_reg_rtx (iter_mode);
19766 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19767 NULL, 1, OPTAB_DIRECT);
19768 /* Those two should combine. */
19769 if (piece_size == const1_rtx)
19771 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19773 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19775 emit_move_insn (iter, const0_rtx);
19777 emit_label (top_label);
19779 tmp = convert_modes (Pmode, iter_mode, iter, true);
19780 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19781 destmem = change_address (destmem, mode, x_addr);
19785 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19786 srcmem = change_address (srcmem, mode, y_addr);
19788 /* When unrolling for chips that reorder memory reads and writes,
19789 we can save registers by using single temporary.
19790 Also using 4 temporaries is overkill in 32bit mode. */
19791 if (!TARGET_64BIT && 0)
19793 for (i = 0; i < unroll; i++)
19798 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19800 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19802 emit_move_insn (destmem, srcmem);
19808 gcc_assert (unroll <= 4);
19809 for (i = 0; i < unroll; i++)
19811 tmpreg[i] = gen_reg_rtx (mode);
19815 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19817 emit_move_insn (tmpreg[i], srcmem);
19819 for (i = 0; i < unroll; i++)
19824 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19826 emit_move_insn (destmem, tmpreg[i]);
19831 for (i = 0; i < unroll; i++)
19835 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19836 emit_move_insn (destmem, value);
19839 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19840 true, OPTAB_LIB_WIDEN);
19842 emit_move_insn (iter, tmp);
19844 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19846 if (expected_size != -1)
19848 expected_size /= GET_MODE_SIZE (mode) * unroll;
19849 if (expected_size == 0)
19851 else if (expected_size > REG_BR_PROB_BASE)
19852 predict_jump (REG_BR_PROB_BASE - 1);
19854 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19857 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19858 iter = ix86_zero_extend_to_Pmode (iter);
19859 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19860 true, OPTAB_LIB_WIDEN);
19861 if (tmp != destptr)
19862 emit_move_insn (destptr, tmp);
19865 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19866 true, OPTAB_LIB_WIDEN);
19868 emit_move_insn (srcptr, tmp);
19870 emit_label (out_label);
19873 /* Output "rep; mov" instruction.
19874 Arguments have same meaning as for previous function */
19876 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19877 rtx destptr, rtx srcptr,
19879 enum machine_mode mode)
19885 /* If the size is known, it is shorter to use rep movs. */
19886 if (mode == QImode && CONST_INT_P (count)
19887 && !(INTVAL (count) & 3))
19890 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19891 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19892 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19893 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19894 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19895 if (mode != QImode)
19897 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19898 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19899 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19900 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19901 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19902 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19906 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19907 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19909 if (CONST_INT_P (count))
19911 count = GEN_INT (INTVAL (count)
19912 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19913 destmem = shallow_copy_rtx (destmem);
19914 srcmem = shallow_copy_rtx (srcmem);
19915 set_mem_size (destmem, count);
19916 set_mem_size (srcmem, count);
19920 if (MEM_SIZE (destmem))
19921 set_mem_size (destmem, NULL_RTX);
19922 if (MEM_SIZE (srcmem))
19923 set_mem_size (srcmem, NULL_RTX);
19925 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19929 /* Output "rep; stos" instruction.
19930 Arguments have same meaning as for previous function */
19932 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19933 rtx count, enum machine_mode mode,
19939 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19940 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19941 value = force_reg (mode, gen_lowpart (mode, value));
19942 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19943 if (mode != QImode)
19945 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19946 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19947 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19950 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19951 if (orig_value == const0_rtx && CONST_INT_P (count))
19953 count = GEN_INT (INTVAL (count)
19954 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19955 destmem = shallow_copy_rtx (destmem);
19956 set_mem_size (destmem, count);
19958 else if (MEM_SIZE (destmem))
19959 set_mem_size (destmem, NULL_RTX);
19960 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19964 emit_strmov (rtx destmem, rtx srcmem,
19965 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19967 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19968 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19969 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19972 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19974 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19975 rtx destptr, rtx srcptr, rtx count, int max_size)
19978 if (CONST_INT_P (count))
19980 HOST_WIDE_INT countval = INTVAL (count);
19983 if ((countval & 0x10) && max_size > 16)
19987 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19988 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19991 gcc_unreachable ();
19994 if ((countval & 0x08) && max_size > 8)
19997 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20000 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20001 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20005 if ((countval & 0x04) && max_size > 4)
20007 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20010 if ((countval & 0x02) && max_size > 2)
20012 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20015 if ((countval & 0x01) && max_size > 1)
20017 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20024 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20025 count, 1, OPTAB_DIRECT);
20026 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20027 count, QImode, 1, 4);
20031 /* When there are stringops, we can cheaply increase dest and src pointers.
20032 Otherwise we save code size by maintaining offset (zero is readily
20033 available from preceding rep operation) and using x86 addressing modes.
20035 if (TARGET_SINGLE_STRINGOP)
20039 rtx label = ix86_expand_aligntest (count, 4, true);
20040 src = change_address (srcmem, SImode, srcptr);
20041 dest = change_address (destmem, SImode, destptr);
20042 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20043 emit_label (label);
20044 LABEL_NUSES (label) = 1;
20048 rtx label = ix86_expand_aligntest (count, 2, true);
20049 src = change_address (srcmem, HImode, srcptr);
20050 dest = change_address (destmem, HImode, destptr);
20051 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20052 emit_label (label);
20053 LABEL_NUSES (label) = 1;
20057 rtx label = ix86_expand_aligntest (count, 1, true);
20058 src = change_address (srcmem, QImode, srcptr);
20059 dest = change_address (destmem, QImode, destptr);
20060 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20061 emit_label (label);
20062 LABEL_NUSES (label) = 1;
20067 rtx offset = force_reg (Pmode, const0_rtx);
20072 rtx label = ix86_expand_aligntest (count, 4, true);
20073 src = change_address (srcmem, SImode, srcptr);
20074 dest = change_address (destmem, SImode, destptr);
20075 emit_move_insn (dest, src);
20076 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20077 true, OPTAB_LIB_WIDEN);
20079 emit_move_insn (offset, tmp);
20080 emit_label (label);
20081 LABEL_NUSES (label) = 1;
20085 rtx label = ix86_expand_aligntest (count, 2, true);
20086 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20087 src = change_address (srcmem, HImode, tmp);
20088 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20089 dest = change_address (destmem, HImode, tmp);
20090 emit_move_insn (dest, src);
20091 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20092 true, OPTAB_LIB_WIDEN);
20094 emit_move_insn (offset, tmp);
20095 emit_label (label);
20096 LABEL_NUSES (label) = 1;
20100 rtx label = ix86_expand_aligntest (count, 1, true);
20101 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20102 src = change_address (srcmem, QImode, tmp);
20103 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20104 dest = change_address (destmem, QImode, tmp);
20105 emit_move_insn (dest, src);
20106 emit_label (label);
20107 LABEL_NUSES (label) = 1;
20112 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20114 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20115 rtx count, int max_size)
20118 expand_simple_binop (counter_mode (count), AND, count,
20119 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20120 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20121 gen_lowpart (QImode, value), count, QImode,
20125 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20127 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20131 if (CONST_INT_P (count))
20133 HOST_WIDE_INT countval = INTVAL (count);
20136 if ((countval & 0x10) && max_size > 16)
20140 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20141 emit_insn (gen_strset (destptr, dest, value));
20142 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20143 emit_insn (gen_strset (destptr, dest, value));
20146 gcc_unreachable ();
20149 if ((countval & 0x08) && max_size > 8)
20153 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20154 emit_insn (gen_strset (destptr, dest, value));
20158 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20159 emit_insn (gen_strset (destptr, dest, value));
20160 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20161 emit_insn (gen_strset (destptr, dest, value));
20165 if ((countval & 0x04) && max_size > 4)
20167 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20168 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20171 if ((countval & 0x02) && max_size > 2)
20173 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20174 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20177 if ((countval & 0x01) && max_size > 1)
20179 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20180 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20187 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20192 rtx label = ix86_expand_aligntest (count, 16, true);
20195 dest = change_address (destmem, DImode, destptr);
20196 emit_insn (gen_strset (destptr, dest, value));
20197 emit_insn (gen_strset (destptr, dest, value));
20201 dest = change_address (destmem, SImode, destptr);
20202 emit_insn (gen_strset (destptr, dest, value));
20203 emit_insn (gen_strset (destptr, dest, value));
20204 emit_insn (gen_strset (destptr, dest, value));
20205 emit_insn (gen_strset (destptr, dest, value));
20207 emit_label (label);
20208 LABEL_NUSES (label) = 1;
20212 rtx label = ix86_expand_aligntest (count, 8, true);
20215 dest = change_address (destmem, DImode, destptr);
20216 emit_insn (gen_strset (destptr, dest, value));
20220 dest = change_address (destmem, SImode, destptr);
20221 emit_insn (gen_strset (destptr, dest, value));
20222 emit_insn (gen_strset (destptr, dest, value));
20224 emit_label (label);
20225 LABEL_NUSES (label) = 1;
20229 rtx label = ix86_expand_aligntest (count, 4, true);
20230 dest = change_address (destmem, SImode, destptr);
20231 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20232 emit_label (label);
20233 LABEL_NUSES (label) = 1;
20237 rtx label = ix86_expand_aligntest (count, 2, true);
20238 dest = change_address (destmem, HImode, destptr);
20239 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20240 emit_label (label);
20241 LABEL_NUSES (label) = 1;
20245 rtx label = ix86_expand_aligntest (count, 1, true);
20246 dest = change_address (destmem, QImode, destptr);
20247 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20248 emit_label (label);
20249 LABEL_NUSES (label) = 1;
20253 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20254 DESIRED_ALIGNMENT. */
20256 expand_movmem_prologue (rtx destmem, rtx srcmem,
20257 rtx destptr, rtx srcptr, rtx count,
20258 int align, int desired_alignment)
20260 if (align <= 1 && desired_alignment > 1)
20262 rtx label = ix86_expand_aligntest (destptr, 1, false);
20263 srcmem = change_address (srcmem, QImode, srcptr);
20264 destmem = change_address (destmem, QImode, destptr);
20265 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20266 ix86_adjust_counter (count, 1);
20267 emit_label (label);
20268 LABEL_NUSES (label) = 1;
20270 if (align <= 2 && desired_alignment > 2)
20272 rtx label = ix86_expand_aligntest (destptr, 2, false);
20273 srcmem = change_address (srcmem, HImode, srcptr);
20274 destmem = change_address (destmem, HImode, destptr);
20275 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20276 ix86_adjust_counter (count, 2);
20277 emit_label (label);
20278 LABEL_NUSES (label) = 1;
20280 if (align <= 4 && desired_alignment > 4)
20282 rtx label = ix86_expand_aligntest (destptr, 4, false);
20283 srcmem = change_address (srcmem, SImode, srcptr);
20284 destmem = change_address (destmem, SImode, destptr);
20285 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20286 ix86_adjust_counter (count, 4);
20287 emit_label (label);
20288 LABEL_NUSES (label) = 1;
20290 gcc_assert (desired_alignment <= 8);
20293 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20294 ALIGN_BYTES is how many bytes need to be copied. */
20296 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20297 int desired_align, int align_bytes)
20300 rtx src_size, dst_size;
20302 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20303 if (src_align_bytes >= 0)
20304 src_align_bytes = desired_align - src_align_bytes;
20305 src_size = MEM_SIZE (src);
20306 dst_size = MEM_SIZE (dst);
20307 if (align_bytes & 1)
20309 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20310 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20312 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20314 if (align_bytes & 2)
20316 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20317 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20318 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20319 set_mem_align (dst, 2 * BITS_PER_UNIT);
20320 if (src_align_bytes >= 0
20321 && (src_align_bytes & 1) == (align_bytes & 1)
20322 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20323 set_mem_align (src, 2 * BITS_PER_UNIT);
20325 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20327 if (align_bytes & 4)
20329 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20330 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20331 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20332 set_mem_align (dst, 4 * BITS_PER_UNIT);
20333 if (src_align_bytes >= 0)
20335 unsigned int src_align = 0;
20336 if ((src_align_bytes & 3) == (align_bytes & 3))
20338 else if ((src_align_bytes & 1) == (align_bytes & 1))
20340 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20341 set_mem_align (src, src_align * BITS_PER_UNIT);
20344 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20346 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20347 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20348 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20349 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20350 if (src_align_bytes >= 0)
20352 unsigned int src_align = 0;
20353 if ((src_align_bytes & 7) == (align_bytes & 7))
20355 else if ((src_align_bytes & 3) == (align_bytes & 3))
20357 else if ((src_align_bytes & 1) == (align_bytes & 1))
20359 if (src_align > (unsigned int) desired_align)
20360 src_align = desired_align;
20361 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20362 set_mem_align (src, src_align * BITS_PER_UNIT);
20365 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20367 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20372 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20373 DESIRED_ALIGNMENT. */
20375 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20376 int align, int desired_alignment)
20378 if (align <= 1 && desired_alignment > 1)
20380 rtx label = ix86_expand_aligntest (destptr, 1, false);
20381 destmem = change_address (destmem, QImode, destptr);
20382 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20383 ix86_adjust_counter (count, 1);
20384 emit_label (label);
20385 LABEL_NUSES (label) = 1;
20387 if (align <= 2 && desired_alignment > 2)
20389 rtx label = ix86_expand_aligntest (destptr, 2, false);
20390 destmem = change_address (destmem, HImode, destptr);
20391 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20392 ix86_adjust_counter (count, 2);
20393 emit_label (label);
20394 LABEL_NUSES (label) = 1;
20396 if (align <= 4 && desired_alignment > 4)
20398 rtx label = ix86_expand_aligntest (destptr, 4, false);
20399 destmem = change_address (destmem, SImode, destptr);
20400 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20401 ix86_adjust_counter (count, 4);
20402 emit_label (label);
20403 LABEL_NUSES (label) = 1;
20405 gcc_assert (desired_alignment <= 8);
20408 /* Set enough from DST to align DST known to by aligned by ALIGN to
20409 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20411 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20412 int desired_align, int align_bytes)
20415 rtx dst_size = MEM_SIZE (dst);
20416 if (align_bytes & 1)
20418 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20420 emit_insn (gen_strset (destreg, dst,
20421 gen_lowpart (QImode, value)));
20423 if (align_bytes & 2)
20425 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20426 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20427 set_mem_align (dst, 2 * BITS_PER_UNIT);
20429 emit_insn (gen_strset (destreg, dst,
20430 gen_lowpart (HImode, value)));
20432 if (align_bytes & 4)
20434 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20435 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20436 set_mem_align (dst, 4 * BITS_PER_UNIT);
20438 emit_insn (gen_strset (destreg, dst,
20439 gen_lowpart (SImode, value)));
20441 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20442 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20443 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20445 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20449 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20450 static enum stringop_alg
20451 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20452 int *dynamic_check)
20454 const struct stringop_algs * algs;
20455 bool optimize_for_speed;
20456 /* Algorithms using the rep prefix want at least edi and ecx;
20457 additionally, memset wants eax and memcpy wants esi. Don't
20458 consider such algorithms if the user has appropriated those
20459 registers for their own purposes. */
20460 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20462 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20464 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20465 || (alg != rep_prefix_1_byte \
20466 && alg != rep_prefix_4_byte \
20467 && alg != rep_prefix_8_byte))
20468 const struct processor_costs *cost;
20470 /* Even if the string operation call is cold, we still might spend a lot
20471 of time processing large blocks. */
20472 if (optimize_function_for_size_p (cfun)
20473 || (optimize_insn_for_size_p ()
20474 && expected_size != -1 && expected_size < 256))
20475 optimize_for_speed = false;
20477 optimize_for_speed = true;
20479 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20481 *dynamic_check = -1;
20483 algs = &cost->memset[TARGET_64BIT != 0];
20485 algs = &cost->memcpy[TARGET_64BIT != 0];
20486 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20487 return stringop_alg;
20488 /* rep; movq or rep; movl is the smallest variant. */
20489 else if (!optimize_for_speed)
20491 if (!count || (count & 3))
20492 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20494 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20496 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20498 else if (expected_size != -1 && expected_size < 4)
20499 return loop_1_byte;
20500 else if (expected_size != -1)
20503 enum stringop_alg alg = libcall;
20504 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20506 /* We get here if the algorithms that were not libcall-based
20507 were rep-prefix based and we are unable to use rep prefixes
20508 based on global register usage. Break out of the loop and
20509 use the heuristic below. */
20510 if (algs->size[i].max == 0)
20512 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20514 enum stringop_alg candidate = algs->size[i].alg;
20516 if (candidate != libcall && ALG_USABLE_P (candidate))
20518 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20519 last non-libcall inline algorithm. */
20520 if (TARGET_INLINE_ALL_STRINGOPS)
20522 /* When the current size is best to be copied by a libcall,
20523 but we are still forced to inline, run the heuristic below
20524 that will pick code for medium sized blocks. */
20525 if (alg != libcall)
20529 else if (ALG_USABLE_P (candidate))
20533 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20535 /* When asked to inline the call anyway, try to pick meaningful choice.
20536 We look for maximal size of block that is faster to copy by hand and
20537 take blocks of at most of that size guessing that average size will
20538 be roughly half of the block.
20540 If this turns out to be bad, we might simply specify the preferred
20541 choice in ix86_costs. */
20542 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20543 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20546 enum stringop_alg alg;
20548 bool any_alg_usable_p = true;
20550 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20552 enum stringop_alg candidate = algs->size[i].alg;
20553 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20555 if (candidate != libcall && candidate
20556 && ALG_USABLE_P (candidate))
20557 max = algs->size[i].max;
20559 /* If there aren't any usable algorithms, then recursing on
20560 smaller sizes isn't going to find anything. Just return the
20561 simple byte-at-a-time copy loop. */
20562 if (!any_alg_usable_p)
20564 /* Pick something reasonable. */
20565 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20566 *dynamic_check = 128;
20567 return loop_1_byte;
20571 alg = decide_alg (count, max / 2, memset, dynamic_check);
20572 gcc_assert (*dynamic_check == -1);
20573 gcc_assert (alg != libcall);
20574 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20575 *dynamic_check = max;
20578 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20579 #undef ALG_USABLE_P
20582 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20583 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20585 decide_alignment (int align,
20586 enum stringop_alg alg,
20589 int desired_align = 0;
20593 gcc_unreachable ();
20595 case unrolled_loop:
20596 desired_align = GET_MODE_SIZE (Pmode);
20598 case rep_prefix_8_byte:
20601 case rep_prefix_4_byte:
20602 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20603 copying whole cacheline at once. */
20604 if (TARGET_PENTIUMPRO)
20609 case rep_prefix_1_byte:
20610 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20611 copying whole cacheline at once. */
20612 if (TARGET_PENTIUMPRO)
20626 if (desired_align < align)
20627 desired_align = align;
20628 if (expected_size != -1 && expected_size < 4)
20629 desired_align = align;
20630 return desired_align;
20633 /* Return the smallest power of 2 greater than VAL. */
20635 smallest_pow2_greater_than (int val)
20643 /* Expand string move (memcpy) operation. Use i386 string operations when
20644 profitable. expand_setmem contains similar code. The code depends upon
20645 architecture, block size and alignment, but always has the same
20648 1) Prologue guard: Conditional that jumps up to epilogues for small
20649 blocks that can be handled by epilogue alone. This is faster but
20650 also needed for correctness, since prologue assume the block is larger
20651 than the desired alignment.
20653 Optional dynamic check for size and libcall for large
20654 blocks is emitted here too, with -minline-stringops-dynamically.
20656 2) Prologue: copy first few bytes in order to get destination aligned
20657 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20658 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20659 We emit either a jump tree on power of two sized blocks, or a byte loop.
20661 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20662 with specified algorithm.
20664 4) Epilogue: code copying tail of the block that is too small to be
20665 handled by main body (or up to size guarded by prologue guard). */
20668 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20669 rtx expected_align_exp, rtx expected_size_exp)
20675 rtx jump_around_label = NULL;
20676 HOST_WIDE_INT align = 1;
20677 unsigned HOST_WIDE_INT count = 0;
20678 HOST_WIDE_INT expected_size = -1;
20679 int size_needed = 0, epilogue_size_needed;
20680 int desired_align = 0, align_bytes = 0;
20681 enum stringop_alg alg;
20683 bool need_zero_guard = false;
20685 if (CONST_INT_P (align_exp))
20686 align = INTVAL (align_exp);
20687 /* i386 can do misaligned access on reasonably increased cost. */
20688 if (CONST_INT_P (expected_align_exp)
20689 && INTVAL (expected_align_exp) > align)
20690 align = INTVAL (expected_align_exp);
20691 /* ALIGN is the minimum of destination and source alignment, but we care here
20692 just about destination alignment. */
20693 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20694 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20696 if (CONST_INT_P (count_exp))
20697 count = expected_size = INTVAL (count_exp);
20698 if (CONST_INT_P (expected_size_exp) && count == 0)
20699 expected_size = INTVAL (expected_size_exp);
20701 /* Make sure we don't need to care about overflow later on. */
20702 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20705 /* Step 0: Decide on preferred algorithm, desired alignment and
20706 size of chunks to be copied by main loop. */
20708 alg = decide_alg (count, expected_size, false, &dynamic_check);
20709 desired_align = decide_alignment (align, alg, expected_size);
20711 if (!TARGET_ALIGN_STRINGOPS)
20712 align = desired_align;
20714 if (alg == libcall)
20716 gcc_assert (alg != no_stringop);
20718 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20719 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20720 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20725 gcc_unreachable ();
20727 need_zero_guard = true;
20728 size_needed = GET_MODE_SIZE (Pmode);
20730 case unrolled_loop:
20731 need_zero_guard = true;
20732 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20734 case rep_prefix_8_byte:
20737 case rep_prefix_4_byte:
20740 case rep_prefix_1_byte:
20744 need_zero_guard = true;
20749 epilogue_size_needed = size_needed;
20751 /* Step 1: Prologue guard. */
20753 /* Alignment code needs count to be in register. */
20754 if (CONST_INT_P (count_exp) && desired_align > align)
20756 if (INTVAL (count_exp) > desired_align
20757 && INTVAL (count_exp) > size_needed)
20760 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20761 if (align_bytes <= 0)
20764 align_bytes = desired_align - align_bytes;
20766 if (align_bytes == 0)
20767 count_exp = force_reg (counter_mode (count_exp), count_exp);
20769 gcc_assert (desired_align >= 1 && align >= 1);
20771 /* Ensure that alignment prologue won't copy past end of block. */
20772 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20774 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20775 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20776 Make sure it is power of 2. */
20777 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20781 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20783 /* If main algorithm works on QImode, no epilogue is needed.
20784 For small sizes just don't align anything. */
20785 if (size_needed == 1)
20786 desired_align = align;
20793 label = gen_label_rtx ();
20794 emit_cmp_and_jump_insns (count_exp,
20795 GEN_INT (epilogue_size_needed),
20796 LTU, 0, counter_mode (count_exp), 1, label);
20797 if (expected_size == -1 || expected_size < epilogue_size_needed)
20798 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20800 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20804 /* Emit code to decide on runtime whether library call or inline should be
20806 if (dynamic_check != -1)
20808 if (CONST_INT_P (count_exp))
20810 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20812 emit_block_move_via_libcall (dst, src, count_exp, false);
20813 count_exp = const0_rtx;
20819 rtx hot_label = gen_label_rtx ();
20820 jump_around_label = gen_label_rtx ();
20821 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20822 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20823 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20824 emit_block_move_via_libcall (dst, src, count_exp, false);
20825 emit_jump (jump_around_label);
20826 emit_label (hot_label);
20830 /* Step 2: Alignment prologue. */
20832 if (desired_align > align)
20834 if (align_bytes == 0)
20836 /* Except for the first move in epilogue, we no longer know
20837 constant offset in aliasing info. It don't seems to worth
20838 the pain to maintain it for the first move, so throw away
20840 src = change_address (src, BLKmode, srcreg);
20841 dst = change_address (dst, BLKmode, destreg);
20842 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20847 /* If we know how many bytes need to be stored before dst is
20848 sufficiently aligned, maintain aliasing info accurately. */
20849 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20850 desired_align, align_bytes);
20851 count_exp = plus_constant (count_exp, -align_bytes);
20852 count -= align_bytes;
20854 if (need_zero_guard
20855 && (count < (unsigned HOST_WIDE_INT) size_needed
20856 || (align_bytes == 0
20857 && count < ((unsigned HOST_WIDE_INT) size_needed
20858 + desired_align - align))))
20860 /* It is possible that we copied enough so the main loop will not
20862 gcc_assert (size_needed > 1);
20863 if (label == NULL_RTX)
20864 label = gen_label_rtx ();
20865 emit_cmp_and_jump_insns (count_exp,
20866 GEN_INT (size_needed),
20867 LTU, 0, counter_mode (count_exp), 1, label);
20868 if (expected_size == -1
20869 || expected_size < (desired_align - align) / 2 + size_needed)
20870 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20872 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20875 if (label && size_needed == 1)
20877 emit_label (label);
20878 LABEL_NUSES (label) = 1;
20880 epilogue_size_needed = 1;
20882 else if (label == NULL_RTX)
20883 epilogue_size_needed = size_needed;
20885 /* Step 3: Main loop. */
20891 gcc_unreachable ();
20893 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20894 count_exp, QImode, 1, expected_size);
20897 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20898 count_exp, Pmode, 1, expected_size);
20900 case unrolled_loop:
20901 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20902 registers for 4 temporaries anyway. */
20903 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20904 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20907 case rep_prefix_8_byte:
20908 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20911 case rep_prefix_4_byte:
20912 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20915 case rep_prefix_1_byte:
20916 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20920 /* Adjust properly the offset of src and dest memory for aliasing. */
20921 if (CONST_INT_P (count_exp))
20923 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20924 (count / size_needed) * size_needed);
20925 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20926 (count / size_needed) * size_needed);
20930 src = change_address (src, BLKmode, srcreg);
20931 dst = change_address (dst, BLKmode, destreg);
20934 /* Step 4: Epilogue to copy the remaining bytes. */
20938 /* When the main loop is done, COUNT_EXP might hold original count,
20939 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20940 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20941 bytes. Compensate if needed. */
20943 if (size_needed < epilogue_size_needed)
20946 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20947 GEN_INT (size_needed - 1), count_exp, 1,
20949 if (tmp != count_exp)
20950 emit_move_insn (count_exp, tmp);
20952 emit_label (label);
20953 LABEL_NUSES (label) = 1;
20956 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20957 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20958 epilogue_size_needed);
20959 if (jump_around_label)
20960 emit_label (jump_around_label);
20964 /* Helper function for memcpy. For QImode value 0xXY produce
20965 0xXYXYXYXY of wide specified by MODE. This is essentially
20966 a * 0x10101010, but we can do slightly better than
20967 synth_mult by unwinding the sequence by hand on CPUs with
20970 promote_duplicated_reg (enum machine_mode mode, rtx val)
20972 enum machine_mode valmode = GET_MODE (val);
20974 int nops = mode == DImode ? 3 : 2;
20976 gcc_assert (mode == SImode || mode == DImode);
20977 if (val == const0_rtx)
20978 return copy_to_mode_reg (mode, const0_rtx);
20979 if (CONST_INT_P (val))
20981 HOST_WIDE_INT v = INTVAL (val) & 255;
20985 if (mode == DImode)
20986 v |= (v << 16) << 16;
20987 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20990 if (valmode == VOIDmode)
20992 if (valmode != QImode)
20993 val = gen_lowpart (QImode, val);
20994 if (mode == QImode)
20996 if (!TARGET_PARTIAL_REG_STALL)
20998 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20999 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21000 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21001 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21003 rtx reg = convert_modes (mode, QImode, val, true);
21004 tmp = promote_duplicated_reg (mode, const1_rtx);
21005 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21010 rtx reg = convert_modes (mode, QImode, val, true);
21012 if (!TARGET_PARTIAL_REG_STALL)
21013 if (mode == SImode)
21014 emit_insn (gen_movsi_insv_1 (reg, reg));
21016 emit_insn (gen_movdi_insv_1 (reg, reg));
21019 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21020 NULL, 1, OPTAB_DIRECT);
21022 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21024 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21025 NULL, 1, OPTAB_DIRECT);
21026 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21027 if (mode == SImode)
21029 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21030 NULL, 1, OPTAB_DIRECT);
21031 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21036 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21037 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21038 alignment from ALIGN to DESIRED_ALIGN. */
21040 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21045 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21046 promoted_val = promote_duplicated_reg (DImode, val);
21047 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21048 promoted_val = promote_duplicated_reg (SImode, val);
21049 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21050 promoted_val = promote_duplicated_reg (HImode, val);
21052 promoted_val = val;
21054 return promoted_val;
21057 /* Expand string clear operation (bzero). Use i386 string operations when
21058 profitable. See expand_movmem comment for explanation of individual
21059 steps performed. */
21061 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21062 rtx expected_align_exp, rtx expected_size_exp)
21067 rtx jump_around_label = NULL;
21068 HOST_WIDE_INT align = 1;
21069 unsigned HOST_WIDE_INT count = 0;
21070 HOST_WIDE_INT expected_size = -1;
21071 int size_needed = 0, epilogue_size_needed;
21072 int desired_align = 0, align_bytes = 0;
21073 enum stringop_alg alg;
21074 rtx promoted_val = NULL;
21075 bool force_loopy_epilogue = false;
21077 bool need_zero_guard = false;
21079 if (CONST_INT_P (align_exp))
21080 align = INTVAL (align_exp);
21081 /* i386 can do misaligned access on reasonably increased cost. */
21082 if (CONST_INT_P (expected_align_exp)
21083 && INTVAL (expected_align_exp) > align)
21084 align = INTVAL (expected_align_exp);
21085 if (CONST_INT_P (count_exp))
21086 count = expected_size = INTVAL (count_exp);
21087 if (CONST_INT_P (expected_size_exp) && count == 0)
21088 expected_size = INTVAL (expected_size_exp);
21090 /* Make sure we don't need to care about overflow later on. */
21091 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21094 /* Step 0: Decide on preferred algorithm, desired alignment and
21095 size of chunks to be copied by main loop. */
21097 alg = decide_alg (count, expected_size, true, &dynamic_check);
21098 desired_align = decide_alignment (align, alg, expected_size);
21100 if (!TARGET_ALIGN_STRINGOPS)
21101 align = desired_align;
21103 if (alg == libcall)
21105 gcc_assert (alg != no_stringop);
21107 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21108 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21113 gcc_unreachable ();
21115 need_zero_guard = true;
21116 size_needed = GET_MODE_SIZE (Pmode);
21118 case unrolled_loop:
21119 need_zero_guard = true;
21120 size_needed = GET_MODE_SIZE (Pmode) * 4;
21122 case rep_prefix_8_byte:
21125 case rep_prefix_4_byte:
21128 case rep_prefix_1_byte:
21132 need_zero_guard = true;
21136 epilogue_size_needed = size_needed;
21138 /* Step 1: Prologue guard. */
21140 /* Alignment code needs count to be in register. */
21141 if (CONST_INT_P (count_exp) && desired_align > align)
21143 if (INTVAL (count_exp) > desired_align
21144 && INTVAL (count_exp) > size_needed)
21147 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21148 if (align_bytes <= 0)
21151 align_bytes = desired_align - align_bytes;
21153 if (align_bytes == 0)
21155 enum machine_mode mode = SImode;
21156 if (TARGET_64BIT && (count & ~0xffffffff))
21158 count_exp = force_reg (mode, count_exp);
21161 /* Do the cheap promotion to allow better CSE across the
21162 main loop and epilogue (ie one load of the big constant in the
21163 front of all code. */
21164 if (CONST_INT_P (val_exp))
21165 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21166 desired_align, align);
21167 /* Ensure that alignment prologue won't copy past end of block. */
21168 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21170 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21171 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21172 Make sure it is power of 2. */
21173 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21175 /* To improve performance of small blocks, we jump around the VAL
21176 promoting mode. This mean that if the promoted VAL is not constant,
21177 we might not use it in the epilogue and have to use byte
21179 if (epilogue_size_needed > 2 && !promoted_val)
21180 force_loopy_epilogue = true;
21183 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21185 /* If main algorithm works on QImode, no epilogue is needed.
21186 For small sizes just don't align anything. */
21187 if (size_needed == 1)
21188 desired_align = align;
21195 label = gen_label_rtx ();
21196 emit_cmp_and_jump_insns (count_exp,
21197 GEN_INT (epilogue_size_needed),
21198 LTU, 0, counter_mode (count_exp), 1, label);
21199 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21200 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21202 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21205 if (dynamic_check != -1)
21207 rtx hot_label = gen_label_rtx ();
21208 jump_around_label = gen_label_rtx ();
21209 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21210 LEU, 0, counter_mode (count_exp), 1, hot_label);
21211 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21212 set_storage_via_libcall (dst, count_exp, val_exp, false);
21213 emit_jump (jump_around_label);
21214 emit_label (hot_label);
21217 /* Step 2: Alignment prologue. */
21219 /* Do the expensive promotion once we branched off the small blocks. */
21221 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21222 desired_align, align);
21223 gcc_assert (desired_align >= 1 && align >= 1);
21225 if (desired_align > align)
21227 if (align_bytes == 0)
21229 /* Except for the first move in epilogue, we no longer know
21230 constant offset in aliasing info. It don't seems to worth
21231 the pain to maintain it for the first move, so throw away
21233 dst = change_address (dst, BLKmode, destreg);
21234 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21239 /* If we know how many bytes need to be stored before dst is
21240 sufficiently aligned, maintain aliasing info accurately. */
21241 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21242 desired_align, align_bytes);
21243 count_exp = plus_constant (count_exp, -align_bytes);
21244 count -= align_bytes;
21246 if (need_zero_guard
21247 && (count < (unsigned HOST_WIDE_INT) size_needed
21248 || (align_bytes == 0
21249 && count < ((unsigned HOST_WIDE_INT) size_needed
21250 + desired_align - align))))
21252 /* It is possible that we copied enough so the main loop will not
21254 gcc_assert (size_needed > 1);
21255 if (label == NULL_RTX)
21256 label = gen_label_rtx ();
21257 emit_cmp_and_jump_insns (count_exp,
21258 GEN_INT (size_needed),
21259 LTU, 0, counter_mode (count_exp), 1, label);
21260 if (expected_size == -1
21261 || expected_size < (desired_align - align) / 2 + size_needed)
21262 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21264 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21267 if (label && size_needed == 1)
21269 emit_label (label);
21270 LABEL_NUSES (label) = 1;
21272 promoted_val = val_exp;
21273 epilogue_size_needed = 1;
21275 else if (label == NULL_RTX)
21276 epilogue_size_needed = size_needed;
21278 /* Step 3: Main loop. */
21284 gcc_unreachable ();
21286 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21287 count_exp, QImode, 1, expected_size);
21290 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21291 count_exp, Pmode, 1, expected_size);
21293 case unrolled_loop:
21294 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21295 count_exp, Pmode, 4, expected_size);
21297 case rep_prefix_8_byte:
21298 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21301 case rep_prefix_4_byte:
21302 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21305 case rep_prefix_1_byte:
21306 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21310 /* Adjust properly the offset of src and dest memory for aliasing. */
21311 if (CONST_INT_P (count_exp))
21312 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21313 (count / size_needed) * size_needed);
21315 dst = change_address (dst, BLKmode, destreg);
21317 /* Step 4: Epilogue to copy the remaining bytes. */
21321 /* When the main loop is done, COUNT_EXP might hold original count,
21322 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21323 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21324 bytes. Compensate if needed. */
21326 if (size_needed < epilogue_size_needed)
21329 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21330 GEN_INT (size_needed - 1), count_exp, 1,
21332 if (tmp != count_exp)
21333 emit_move_insn (count_exp, tmp);
21335 emit_label (label);
21336 LABEL_NUSES (label) = 1;
21339 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21341 if (force_loopy_epilogue)
21342 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21343 epilogue_size_needed);
21345 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21346 epilogue_size_needed);
21348 if (jump_around_label)
21349 emit_label (jump_around_label);
21353 /* Expand the appropriate insns for doing strlen if not just doing
21356 out = result, initialized with the start address
21357 align_rtx = alignment of the address.
21358 scratch = scratch register, initialized with the startaddress when
21359 not aligned, otherwise undefined
21361 This is just the body. It needs the initializations mentioned above and
21362 some address computing at the end. These things are done in i386.md. */
21365 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21369 rtx align_2_label = NULL_RTX;
21370 rtx align_3_label = NULL_RTX;
21371 rtx align_4_label = gen_label_rtx ();
21372 rtx end_0_label = gen_label_rtx ();
21374 rtx tmpreg = gen_reg_rtx (SImode);
21375 rtx scratch = gen_reg_rtx (SImode);
21379 if (CONST_INT_P (align_rtx))
21380 align = INTVAL (align_rtx);
21382 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21384 /* Is there a known alignment and is it less than 4? */
21387 rtx scratch1 = gen_reg_rtx (Pmode);
21388 emit_move_insn (scratch1, out);
21389 /* Is there a known alignment and is it not 2? */
21392 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21393 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21395 /* Leave just the 3 lower bits. */
21396 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21397 NULL_RTX, 0, OPTAB_WIDEN);
21399 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21400 Pmode, 1, align_4_label);
21401 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21402 Pmode, 1, align_2_label);
21403 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21404 Pmode, 1, align_3_label);
21408 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21409 check if is aligned to 4 - byte. */
21411 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21412 NULL_RTX, 0, OPTAB_WIDEN);
21414 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21415 Pmode, 1, align_4_label);
21418 mem = change_address (src, QImode, out);
21420 /* Now compare the bytes. */
21422 /* Compare the first n unaligned byte on a byte per byte basis. */
21423 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21424 QImode, 1, end_0_label);
21426 /* Increment the address. */
21427 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21429 /* Not needed with an alignment of 2 */
21432 emit_label (align_2_label);
21434 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21437 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21439 emit_label (align_3_label);
21442 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21445 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21448 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21449 align this loop. It gives only huge programs, but does not help to
21451 emit_label (align_4_label);
21453 mem = change_address (src, SImode, out);
21454 emit_move_insn (scratch, mem);
21455 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21457 /* This formula yields a nonzero result iff one of the bytes is zero.
21458 This saves three branches inside loop and many cycles. */
21460 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21461 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21462 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21463 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21464 gen_int_mode (0x80808080, SImode)));
21465 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21470 rtx reg = gen_reg_rtx (SImode);
21471 rtx reg2 = gen_reg_rtx (Pmode);
21472 emit_move_insn (reg, tmpreg);
21473 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21475 /* If zero is not in the first two bytes, move two bytes forward. */
21476 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21477 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21478 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21479 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21480 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21483 /* Emit lea manually to avoid clobbering of flags. */
21484 emit_insn (gen_rtx_SET (SImode, reg2,
21485 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21487 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21488 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21489 emit_insn (gen_rtx_SET (VOIDmode, out,
21490 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21496 rtx end_2_label = gen_label_rtx ();
21497 /* Is zero in the first two bytes? */
21499 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21500 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21501 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21502 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21503 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21505 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21506 JUMP_LABEL (tmp) = end_2_label;
21508 /* Not in the first two. Move two bytes forward. */
21509 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21510 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21512 emit_label (end_2_label);
21516 /* Avoid branch in fixing the byte. */
21517 tmpreg = gen_lowpart (QImode, tmpreg);
21518 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21519 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21520 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21521 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21523 emit_label (end_0_label);
21526 /* Expand strlen. */
21529 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21531 rtx addr, scratch1, scratch2, scratch3, scratch4;
21533 /* The generic case of strlen expander is long. Avoid it's
21534 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21536 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21537 && !TARGET_INLINE_ALL_STRINGOPS
21538 && !optimize_insn_for_size_p ()
21539 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21542 addr = force_reg (Pmode, XEXP (src, 0));
21543 scratch1 = gen_reg_rtx (Pmode);
21545 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21546 && !optimize_insn_for_size_p ())
21548 /* Well it seems that some optimizer does not combine a call like
21549 foo(strlen(bar), strlen(bar));
21550 when the move and the subtraction is done here. It does calculate
21551 the length just once when these instructions are done inside of
21552 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21553 often used and I use one fewer register for the lifetime of
21554 output_strlen_unroll() this is better. */
21556 emit_move_insn (out, addr);
21558 ix86_expand_strlensi_unroll_1 (out, src, align);
21560 /* strlensi_unroll_1 returns the address of the zero at the end of
21561 the string, like memchr(), so compute the length by subtracting
21562 the start address. */
21563 emit_insn (ix86_gen_sub3 (out, out, addr));
21569 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21570 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21573 scratch2 = gen_reg_rtx (Pmode);
21574 scratch3 = gen_reg_rtx (Pmode);
21575 scratch4 = force_reg (Pmode, constm1_rtx);
21577 emit_move_insn (scratch3, addr);
21578 eoschar = force_reg (QImode, eoschar);
21580 src = replace_equiv_address_nv (src, scratch3);
21582 /* If .md starts supporting :P, this can be done in .md. */
21583 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21584 scratch4), UNSPEC_SCAS);
21585 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21586 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21587 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21592 /* For given symbol (function) construct code to compute address of it's PLT
21593 entry in large x86-64 PIC model. */
21595 construct_plt_address (rtx symbol)
21597 rtx tmp = gen_reg_rtx (Pmode);
21598 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21600 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21601 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21603 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21604 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21609 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21611 rtx pop, int sibcall)
21613 rtx use = NULL, call;
21615 if (pop == const0_rtx)
21617 gcc_assert (!TARGET_64BIT || !pop);
21619 if (TARGET_MACHO && !TARGET_64BIT)
21622 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21623 fnaddr = machopic_indirect_call_target (fnaddr);
21628 /* Static functions and indirect calls don't need the pic register. */
21629 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21630 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21631 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21632 use_reg (&use, pic_offset_table_rtx);
21635 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21637 rtx al = gen_rtx_REG (QImode, AX_REG);
21638 emit_move_insn (al, callarg2);
21639 use_reg (&use, al);
21642 if (ix86_cmodel == CM_LARGE_PIC
21644 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21645 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21646 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21648 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21649 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21651 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21652 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21655 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21657 call = gen_rtx_SET (VOIDmode, retval, call);
21660 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21661 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21662 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21665 && ix86_cfun_abi () == MS_ABI
21666 && (!callarg2 || INTVAL (callarg2) != -2))
21668 /* We need to represent that SI and DI registers are clobbered
21670 static int clobbered_registers[] = {
21671 XMM6_REG, XMM7_REG, XMM8_REG,
21672 XMM9_REG, XMM10_REG, XMM11_REG,
21673 XMM12_REG, XMM13_REG, XMM14_REG,
21674 XMM15_REG, SI_REG, DI_REG
21677 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21678 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21679 UNSPEC_MS_TO_SYSV_CALL);
21683 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21684 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21687 (SSE_REGNO_P (clobbered_registers[i])
21689 clobbered_registers[i]));
21691 call = gen_rtx_PARALLEL (VOIDmode,
21692 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21696 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21697 if (TARGET_VZEROUPPER)
21702 if (cfun->machine->callee_pass_avx256_p)
21704 if (cfun->machine->callee_return_avx256_p)
21705 avx256 = callee_return_pass_avx256;
21707 avx256 = callee_pass_avx256;
21709 else if (cfun->machine->callee_return_avx256_p)
21710 avx256 = callee_return_avx256;
21712 avx256 = call_no_avx256;
21714 if (reload_completed)
21715 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21718 unspec = gen_rtx_UNSPEC (VOIDmode,
21719 gen_rtvec (1, GEN_INT (avx256)),
21720 UNSPEC_CALL_NEEDS_VZEROUPPER);
21721 call = gen_rtx_PARALLEL (VOIDmode,
21722 gen_rtvec (2, call, unspec));
21726 call = emit_call_insn (call);
21728 CALL_INSN_FUNCTION_USAGE (call) = use;
21734 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21736 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21737 emit_insn (gen_avx_vzeroupper (vzeroupper));
21738 emit_call_insn (call);
21741 /* Output the assembly for a call instruction. */
21744 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21746 bool direct_p = constant_call_address_operand (call_op, Pmode);
21747 bool seh_nop_p = false;
21749 gcc_assert (addr_op == 0 || addr_op == 1);
21751 if (SIBLING_CALL_P (insn))
21754 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21755 /* SEH epilogue detection requires the indirect branch case
21756 to include REX.W. */
21757 else if (TARGET_SEH)
21758 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21760 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21763 /* SEH unwinding can require an extra nop to be emitted in several
21764 circumstances. Determine if we have one of those. */
21769 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21771 /* If we get to another real insn, we don't need the nop. */
21775 /* If we get to the epilogue note, prevent a catch region from
21776 being adjacent to the standard epilogue sequence. If non-
21777 call-exceptions, we'll have done this during epilogue emission. */
21778 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21779 && !flag_non_call_exceptions
21780 && !can_throw_internal (insn))
21787 /* If we didn't find a real insn following the call, prevent the
21788 unwinder from looking into the next function. */
21796 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21798 return addr_op ? "call\t%P1" : "call\t%P0";
21803 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
21805 return addr_op ? "call\t%A1" : "call\t%A0";
21809 /* Clear stack slot assignments remembered from previous functions.
21810 This is called from INIT_EXPANDERS once before RTL is emitted for each
21813 static struct machine_function *
21814 ix86_init_machine_status (void)
21816 struct machine_function *f;
21818 f = ggc_alloc_cleared_machine_function ();
21819 f->use_fast_prologue_epilogue_nregs = -1;
21820 f->tls_descriptor_call_expanded_p = 0;
21821 f->call_abi = ix86_abi;
21826 /* Return a MEM corresponding to a stack slot with mode MODE.
21827 Allocate a new slot if necessary.
21829 The RTL for a function can have several slots available: N is
21830 which slot to use. */
21833 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21835 struct stack_local_entry *s;
21837 gcc_assert (n < MAX_386_STACK_LOCALS);
21839 /* Virtual slot is valid only before vregs are instantiated. */
21840 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21842 for (s = ix86_stack_locals; s; s = s->next)
21843 if (s->mode == mode && s->n == n)
21844 return copy_rtx (s->rtl);
21846 s = ggc_alloc_stack_local_entry ();
21849 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21851 s->next = ix86_stack_locals;
21852 ix86_stack_locals = s;
21856 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21858 static GTY(()) rtx ix86_tls_symbol;
21860 ix86_tls_get_addr (void)
21863 if (!ix86_tls_symbol)
21865 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
21866 (TARGET_ANY_GNU_TLS
21868 ? "___tls_get_addr"
21869 : "__tls_get_addr");
21872 return ix86_tls_symbol;
21875 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21877 static GTY(()) rtx ix86_tls_module_base_symbol;
21879 ix86_tls_module_base (void)
21882 if (!ix86_tls_module_base_symbol)
21884 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
21885 "_TLS_MODULE_BASE_");
21886 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
21887 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
21890 return ix86_tls_module_base_symbol;
21893 /* Calculate the length of the memory address in the instruction
21894 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21897 memory_address_length (rtx addr)
21899 struct ix86_address parts;
21900 rtx base, index, disp;
21904 if (GET_CODE (addr) == PRE_DEC
21905 || GET_CODE (addr) == POST_INC
21906 || GET_CODE (addr) == PRE_MODIFY
21907 || GET_CODE (addr) == POST_MODIFY)
21910 ok = ix86_decompose_address (addr, &parts);
21913 if (parts.base && GET_CODE (parts.base) == SUBREG)
21914 parts.base = SUBREG_REG (parts.base);
21915 if (parts.index && GET_CODE (parts.index) == SUBREG)
21916 parts.index = SUBREG_REG (parts.index);
21919 index = parts.index;
21924 - esp as the base always wants an index,
21925 - ebp as the base always wants a displacement,
21926 - r12 as the base always wants an index,
21927 - r13 as the base always wants a displacement. */
21929 /* Register Indirect. */
21930 if (base && !index && !disp)
21932 /* esp (for its index) and ebp (for its displacement) need
21933 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21936 && (addr == arg_pointer_rtx
21937 || addr == frame_pointer_rtx
21938 || REGNO (addr) == SP_REG
21939 || REGNO (addr) == BP_REG
21940 || REGNO (addr) == R12_REG
21941 || REGNO (addr) == R13_REG))
21945 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21946 is not disp32, but disp32(%rip), so for disp32
21947 SIB byte is needed, unless print_operand_address
21948 optimizes it into disp32(%rip) or (%rip) is implied
21950 else if (disp && !base && !index)
21957 if (GET_CODE (disp) == CONST)
21958 symbol = XEXP (disp, 0);
21959 if (GET_CODE (symbol) == PLUS
21960 && CONST_INT_P (XEXP (symbol, 1)))
21961 symbol = XEXP (symbol, 0);
21963 if (GET_CODE (symbol) != LABEL_REF
21964 && (GET_CODE (symbol) != SYMBOL_REF
21965 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21966 && (GET_CODE (symbol) != UNSPEC
21967 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21968 && XINT (symbol, 1) != UNSPEC_PCREL
21969 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21976 /* Find the length of the displacement constant. */
21979 if (base && satisfies_constraint_K (disp))
21984 /* ebp always wants a displacement. Similarly r13. */
21985 else if (base && REG_P (base)
21986 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21989 /* An index requires the two-byte modrm form.... */
21991 /* ...like esp (or r12), which always wants an index. */
21992 || base == arg_pointer_rtx
21993 || base == frame_pointer_rtx
21994 || (base && REG_P (base)
21995 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22012 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22013 is set, expect that insn have 8bit immediate alternative. */
22015 ix86_attr_length_immediate_default (rtx insn, int shortform)
22019 extract_insn_cached (insn);
22020 for (i = recog_data.n_operands - 1; i >= 0; --i)
22021 if (CONSTANT_P (recog_data.operand[i]))
22023 enum attr_mode mode = get_attr_mode (insn);
22026 if (shortform && CONST_INT_P (recog_data.operand[i]))
22028 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22035 ival = trunc_int_for_mode (ival, HImode);
22038 ival = trunc_int_for_mode (ival, SImode);
22043 if (IN_RANGE (ival, -128, 127))
22060 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22065 fatal_insn ("unknown insn mode", insn);
22070 /* Compute default value for "length_address" attribute. */
22072 ix86_attr_length_address_default (rtx insn)
22076 if (get_attr_type (insn) == TYPE_LEA)
22078 rtx set = PATTERN (insn), addr;
22080 if (GET_CODE (set) == PARALLEL)
22081 set = XVECEXP (set, 0, 0);
22083 gcc_assert (GET_CODE (set) == SET);
22085 addr = SET_SRC (set);
22086 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22088 if (GET_CODE (addr) == ZERO_EXTEND)
22089 addr = XEXP (addr, 0);
22090 if (GET_CODE (addr) == SUBREG)
22091 addr = SUBREG_REG (addr);
22094 return memory_address_length (addr);
22097 extract_insn_cached (insn);
22098 for (i = recog_data.n_operands - 1; i >= 0; --i)
22099 if (MEM_P (recog_data.operand[i]))
22101 constrain_operands_cached (reload_completed);
22102 if (which_alternative != -1)
22104 const char *constraints = recog_data.constraints[i];
22105 int alt = which_alternative;
22107 while (*constraints == '=' || *constraints == '+')
22110 while (*constraints++ != ',')
22112 /* Skip ignored operands. */
22113 if (*constraints == 'X')
22116 return memory_address_length (XEXP (recog_data.operand[i], 0));
22121 /* Compute default value for "length_vex" attribute. It includes
22122 2 or 3 byte VEX prefix and 1 opcode byte. */
22125 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22130 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22131 byte VEX prefix. */
22132 if (!has_0f_opcode || has_vex_w)
22135 /* We can always use 2 byte VEX prefix in 32bit. */
22139 extract_insn_cached (insn);
22141 for (i = recog_data.n_operands - 1; i >= 0; --i)
22142 if (REG_P (recog_data.operand[i]))
22144 /* REX.W bit uses 3 byte VEX prefix. */
22145 if (GET_MODE (recog_data.operand[i]) == DImode
22146 && GENERAL_REG_P (recog_data.operand[i]))
22151 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22152 if (MEM_P (recog_data.operand[i])
22153 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22160 /* Return the maximum number of instructions a cpu can issue. */
22163 ix86_issue_rate (void)
22167 case PROCESSOR_PENTIUM:
22168 case PROCESSOR_ATOM:
22172 case PROCESSOR_PENTIUMPRO:
22173 case PROCESSOR_PENTIUM4:
22174 case PROCESSOR_CORE2_32:
22175 case PROCESSOR_CORE2_64:
22176 case PROCESSOR_COREI7_32:
22177 case PROCESSOR_COREI7_64:
22178 case PROCESSOR_ATHLON:
22180 case PROCESSOR_AMDFAM10:
22181 case PROCESSOR_NOCONA:
22182 case PROCESSOR_GENERIC32:
22183 case PROCESSOR_GENERIC64:
22184 case PROCESSOR_BDVER1:
22192 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22193 by DEP_INSN and nothing set by DEP_INSN. */
22196 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22200 /* Simplify the test for uninteresting insns. */
22201 if (insn_type != TYPE_SETCC
22202 && insn_type != TYPE_ICMOV
22203 && insn_type != TYPE_FCMOV
22204 && insn_type != TYPE_IBR)
22207 if ((set = single_set (dep_insn)) != 0)
22209 set = SET_DEST (set);
22212 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22213 && XVECLEN (PATTERN (dep_insn), 0) == 2
22214 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22215 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22217 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22218 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22223 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22226 /* This test is true if the dependent insn reads the flags but
22227 not any other potentially set register. */
22228 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22231 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22237 /* Return true iff USE_INSN has a memory address with operands set by
22241 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22244 extract_insn_cached (use_insn);
22245 for (i = recog_data.n_operands - 1; i >= 0; --i)
22246 if (MEM_P (recog_data.operand[i]))
22248 rtx addr = XEXP (recog_data.operand[i], 0);
22249 return modified_in_p (addr, set_insn) != 0;
22255 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22257 enum attr_type insn_type, dep_insn_type;
22258 enum attr_memory memory;
22260 int dep_insn_code_number;
22262 /* Anti and output dependencies have zero cost on all CPUs. */
22263 if (REG_NOTE_KIND (link) != 0)
22266 dep_insn_code_number = recog_memoized (dep_insn);
22268 /* If we can't recognize the insns, we can't really do anything. */
22269 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22272 insn_type = get_attr_type (insn);
22273 dep_insn_type = get_attr_type (dep_insn);
22277 case PROCESSOR_PENTIUM:
22278 /* Address Generation Interlock adds a cycle of latency. */
22279 if (insn_type == TYPE_LEA)
22281 rtx addr = PATTERN (insn);
22283 if (GET_CODE (addr) == PARALLEL)
22284 addr = XVECEXP (addr, 0, 0);
22286 gcc_assert (GET_CODE (addr) == SET);
22288 addr = SET_SRC (addr);
22289 if (modified_in_p (addr, dep_insn))
22292 else if (ix86_agi_dependent (dep_insn, insn))
22295 /* ??? Compares pair with jump/setcc. */
22296 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22299 /* Floating point stores require value to be ready one cycle earlier. */
22300 if (insn_type == TYPE_FMOV
22301 && get_attr_memory (insn) == MEMORY_STORE
22302 && !ix86_agi_dependent (dep_insn, insn))
22306 case PROCESSOR_PENTIUMPRO:
22307 memory = get_attr_memory (insn);
22309 /* INT->FP conversion is expensive. */
22310 if (get_attr_fp_int_src (dep_insn))
22313 /* There is one cycle extra latency between an FP op and a store. */
22314 if (insn_type == TYPE_FMOV
22315 && (set = single_set (dep_insn)) != NULL_RTX
22316 && (set2 = single_set (insn)) != NULL_RTX
22317 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22318 && MEM_P (SET_DEST (set2)))
22321 /* Show ability of reorder buffer to hide latency of load by executing
22322 in parallel with previous instruction in case
22323 previous instruction is not needed to compute the address. */
22324 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22325 && !ix86_agi_dependent (dep_insn, insn))
22327 /* Claim moves to take one cycle, as core can issue one load
22328 at time and the next load can start cycle later. */
22329 if (dep_insn_type == TYPE_IMOV
22330 || dep_insn_type == TYPE_FMOV)
22338 memory = get_attr_memory (insn);
22340 /* The esp dependency is resolved before the instruction is really
22342 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22343 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22346 /* INT->FP conversion is expensive. */
22347 if (get_attr_fp_int_src (dep_insn))
22350 /* Show ability of reorder buffer to hide latency of load by executing
22351 in parallel with previous instruction in case
22352 previous instruction is not needed to compute the address. */
22353 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22354 && !ix86_agi_dependent (dep_insn, insn))
22356 /* Claim moves to take one cycle, as core can issue one load
22357 at time and the next load can start cycle later. */
22358 if (dep_insn_type == TYPE_IMOV
22359 || dep_insn_type == TYPE_FMOV)
22368 case PROCESSOR_ATHLON:
22370 case PROCESSOR_AMDFAM10:
22371 case PROCESSOR_BDVER1:
22372 case PROCESSOR_ATOM:
22373 case PROCESSOR_GENERIC32:
22374 case PROCESSOR_GENERIC64:
22375 memory = get_attr_memory (insn);
22377 /* Show ability of reorder buffer to hide latency of load by executing
22378 in parallel with previous instruction in case
22379 previous instruction is not needed to compute the address. */
22380 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22381 && !ix86_agi_dependent (dep_insn, insn))
22383 enum attr_unit unit = get_attr_unit (insn);
22386 /* Because of the difference between the length of integer and
22387 floating unit pipeline preparation stages, the memory operands
22388 for floating point are cheaper.
22390 ??? For Athlon it the difference is most probably 2. */
22391 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22394 loadcost = TARGET_ATHLON ? 2 : 0;
22396 if (cost >= loadcost)
22409 /* How many alternative schedules to try. This should be as wide as the
22410 scheduling freedom in the DFA, but no wider. Making this value too
22411 large results extra work for the scheduler. */
22414 ia32_multipass_dfa_lookahead (void)
22418 case PROCESSOR_PENTIUM:
22421 case PROCESSOR_PENTIUMPRO:
22425 case PROCESSOR_CORE2_32:
22426 case PROCESSOR_CORE2_64:
22427 case PROCESSOR_COREI7_32:
22428 case PROCESSOR_COREI7_64:
22429 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22430 as many instructions can be executed on a cycle, i.e.,
22431 issue_rate. I wonder why tuning for many CPUs does not do this. */
22432 return ix86_issue_rate ();
22441 /* Model decoder of Core 2/i7.
22442 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22443 track the instruction fetch block boundaries and make sure that long
22444 (9+ bytes) instructions are assigned to D0. */
22446 /* Maximum length of an insn that can be handled by
22447 a secondary decoder unit. '8' for Core 2/i7. */
22448 static int core2i7_secondary_decoder_max_insn_size;
22450 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22451 '16' for Core 2/i7. */
22452 static int core2i7_ifetch_block_size;
22454 /* Maximum number of instructions decoder can handle per cycle.
22455 '6' for Core 2/i7. */
22456 static int core2i7_ifetch_block_max_insns;
22458 typedef struct ix86_first_cycle_multipass_data_ *
22459 ix86_first_cycle_multipass_data_t;
22460 typedef const struct ix86_first_cycle_multipass_data_ *
22461 const_ix86_first_cycle_multipass_data_t;
22463 /* A variable to store target state across calls to max_issue within
22465 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22466 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22468 /* Initialize DATA. */
22470 core2i7_first_cycle_multipass_init (void *_data)
22472 ix86_first_cycle_multipass_data_t data
22473 = (ix86_first_cycle_multipass_data_t) _data;
22475 data->ifetch_block_len = 0;
22476 data->ifetch_block_n_insns = 0;
22477 data->ready_try_change = NULL;
22478 data->ready_try_change_size = 0;
22481 /* Advancing the cycle; reset ifetch block counts. */
22483 core2i7_dfa_post_advance_cycle (void)
22485 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22487 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22489 data->ifetch_block_len = 0;
22490 data->ifetch_block_n_insns = 0;
22493 static int min_insn_size (rtx);
22495 /* Filter out insns from ready_try that the core will not be able to issue
22496 on current cycle due to decoder. */
22498 core2i7_first_cycle_multipass_filter_ready_try
22499 (const_ix86_first_cycle_multipass_data_t data,
22500 char *ready_try, int n_ready, bool first_cycle_insn_p)
22507 if (ready_try[n_ready])
22510 insn = get_ready_element (n_ready);
22511 insn_size = min_insn_size (insn);
22513 if (/* If this is a too long an insn for a secondary decoder ... */
22514 (!first_cycle_insn_p
22515 && insn_size > core2i7_secondary_decoder_max_insn_size)
22516 /* ... or it would not fit into the ifetch block ... */
22517 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22518 /* ... or the decoder is full already ... */
22519 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22520 /* ... mask the insn out. */
22522 ready_try[n_ready] = 1;
22524 if (data->ready_try_change)
22525 SET_BIT (data->ready_try_change, n_ready);
22530 /* Prepare for a new round of multipass lookahead scheduling. */
22532 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22533 bool first_cycle_insn_p)
22535 ix86_first_cycle_multipass_data_t data
22536 = (ix86_first_cycle_multipass_data_t) _data;
22537 const_ix86_first_cycle_multipass_data_t prev_data
22538 = ix86_first_cycle_multipass_data;
22540 /* Restore the state from the end of the previous round. */
22541 data->ifetch_block_len = prev_data->ifetch_block_len;
22542 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22544 /* Filter instructions that cannot be issued on current cycle due to
22545 decoder restrictions. */
22546 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22547 first_cycle_insn_p);
22550 /* INSN is being issued in current solution. Account for its impact on
22551 the decoder model. */
22553 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22554 rtx insn, const void *_prev_data)
22556 ix86_first_cycle_multipass_data_t data
22557 = (ix86_first_cycle_multipass_data_t) _data;
22558 const_ix86_first_cycle_multipass_data_t prev_data
22559 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22561 int insn_size = min_insn_size (insn);
22563 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22564 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22565 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22566 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22568 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22569 if (!data->ready_try_change)
22571 data->ready_try_change = sbitmap_alloc (n_ready);
22572 data->ready_try_change_size = n_ready;
22574 else if (data->ready_try_change_size < n_ready)
22576 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22578 data->ready_try_change_size = n_ready;
22580 sbitmap_zero (data->ready_try_change);
22582 /* Filter out insns from ready_try that the core will not be able to issue
22583 on current cycle due to decoder. */
22584 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22588 /* Revert the effect on ready_try. */
22590 core2i7_first_cycle_multipass_backtrack (const void *_data,
22592 int n_ready ATTRIBUTE_UNUSED)
22594 const_ix86_first_cycle_multipass_data_t data
22595 = (const_ix86_first_cycle_multipass_data_t) _data;
22596 unsigned int i = 0;
22597 sbitmap_iterator sbi;
22599 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22600 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22606 /* Save the result of multipass lookahead scheduling for the next round. */
22608 core2i7_first_cycle_multipass_end (const void *_data)
22610 const_ix86_first_cycle_multipass_data_t data
22611 = (const_ix86_first_cycle_multipass_data_t) _data;
22612 ix86_first_cycle_multipass_data_t next_data
22613 = ix86_first_cycle_multipass_data;
22617 next_data->ifetch_block_len = data->ifetch_block_len;
22618 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22622 /* Deallocate target data. */
22624 core2i7_first_cycle_multipass_fini (void *_data)
22626 ix86_first_cycle_multipass_data_t data
22627 = (ix86_first_cycle_multipass_data_t) _data;
22629 if (data->ready_try_change)
22631 sbitmap_free (data->ready_try_change);
22632 data->ready_try_change = NULL;
22633 data->ready_try_change_size = 0;
22637 /* Prepare for scheduling pass. */
22639 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22640 int verbose ATTRIBUTE_UNUSED,
22641 int max_uid ATTRIBUTE_UNUSED)
22643 /* Install scheduling hooks for current CPU. Some of these hooks are used
22644 in time-critical parts of the scheduler, so we only set them up when
22645 they are actually used. */
22648 case PROCESSOR_CORE2_32:
22649 case PROCESSOR_CORE2_64:
22650 case PROCESSOR_COREI7_32:
22651 case PROCESSOR_COREI7_64:
22652 targetm.sched.dfa_post_advance_cycle
22653 = core2i7_dfa_post_advance_cycle;
22654 targetm.sched.first_cycle_multipass_init
22655 = core2i7_first_cycle_multipass_init;
22656 targetm.sched.first_cycle_multipass_begin
22657 = core2i7_first_cycle_multipass_begin;
22658 targetm.sched.first_cycle_multipass_issue
22659 = core2i7_first_cycle_multipass_issue;
22660 targetm.sched.first_cycle_multipass_backtrack
22661 = core2i7_first_cycle_multipass_backtrack;
22662 targetm.sched.first_cycle_multipass_end
22663 = core2i7_first_cycle_multipass_end;
22664 targetm.sched.first_cycle_multipass_fini
22665 = core2i7_first_cycle_multipass_fini;
22667 /* Set decoder parameters. */
22668 core2i7_secondary_decoder_max_insn_size = 8;
22669 core2i7_ifetch_block_size = 16;
22670 core2i7_ifetch_block_max_insns = 6;
22674 targetm.sched.dfa_post_advance_cycle = NULL;
22675 targetm.sched.first_cycle_multipass_init = NULL;
22676 targetm.sched.first_cycle_multipass_begin = NULL;
22677 targetm.sched.first_cycle_multipass_issue = NULL;
22678 targetm.sched.first_cycle_multipass_backtrack = NULL;
22679 targetm.sched.first_cycle_multipass_end = NULL;
22680 targetm.sched.first_cycle_multipass_fini = NULL;
22686 /* Compute the alignment given to a constant that is being placed in memory.
22687 EXP is the constant and ALIGN is the alignment that the object would
22689 The value of this function is used instead of that alignment to align
22693 ix86_constant_alignment (tree exp, int align)
22695 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22696 || TREE_CODE (exp) == INTEGER_CST)
22698 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22700 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22703 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22704 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22705 return BITS_PER_WORD;
22710 /* Compute the alignment for a static variable.
22711 TYPE is the data type, and ALIGN is the alignment that
22712 the object would ordinarily have. The value of this function is used
22713 instead of that alignment to align the object. */
22716 ix86_data_alignment (tree type, int align)
22718 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22720 if (AGGREGATE_TYPE_P (type)
22721 && TYPE_SIZE (type)
22722 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22723 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22724 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22725 && align < max_align)
22728 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22729 to 16byte boundary. */
22732 if (AGGREGATE_TYPE_P (type)
22733 && TYPE_SIZE (type)
22734 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22735 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22736 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22740 if (TREE_CODE (type) == ARRAY_TYPE)
22742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22747 else if (TREE_CODE (type) == COMPLEX_TYPE)
22750 if (TYPE_MODE (type) == DCmode && align < 64)
22752 if ((TYPE_MODE (type) == XCmode
22753 || TYPE_MODE (type) == TCmode) && align < 128)
22756 else if ((TREE_CODE (type) == RECORD_TYPE
22757 || TREE_CODE (type) == UNION_TYPE
22758 || TREE_CODE (type) == QUAL_UNION_TYPE)
22759 && TYPE_FIELDS (type))
22761 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22763 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22766 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22767 || TREE_CODE (type) == INTEGER_TYPE)
22769 if (TYPE_MODE (type) == DFmode && align < 64)
22771 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22778 /* Compute the alignment for a local variable or a stack slot. EXP is
22779 the data type or decl itself, MODE is the widest mode available and
22780 ALIGN is the alignment that the object would ordinarily have. The
22781 value of this macro is used instead of that alignment to align the
22785 ix86_local_alignment (tree exp, enum machine_mode mode,
22786 unsigned int align)
22790 if (exp && DECL_P (exp))
22792 type = TREE_TYPE (exp);
22801 /* Don't do dynamic stack realignment for long long objects with
22802 -mpreferred-stack-boundary=2. */
22805 && ix86_preferred_stack_boundary < 64
22806 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22807 && (!type || !TYPE_USER_ALIGN (type))
22808 && (!decl || !DECL_USER_ALIGN (decl)))
22811 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22812 register in MODE. We will return the largest alignment of XF
22816 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22817 align = GET_MODE_ALIGNMENT (DFmode);
22821 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22822 to 16byte boundary. Exact wording is:
22824 An array uses the same alignment as its elements, except that a local or
22825 global array variable of length at least 16 bytes or
22826 a C99 variable-length array variable always has alignment of at least 16 bytes.
22828 This was added to allow use of aligned SSE instructions at arrays. This
22829 rule is meant for static storage (where compiler can not do the analysis
22830 by itself). We follow it for automatic variables only when convenient.
22831 We fully control everything in the function compiled and functions from
22832 other unit can not rely on the alignment.
22834 Exclude va_list type. It is the common case of local array where
22835 we can not benefit from the alignment. */
22836 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22839 if (AGGREGATE_TYPE_P (type)
22840 && (TYPE_MAIN_VARIANT (type)
22841 != TYPE_MAIN_VARIANT (va_list_type_node))
22842 && TYPE_SIZE (type)
22843 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22844 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22845 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22848 if (TREE_CODE (type) == ARRAY_TYPE)
22850 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22852 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22855 else if (TREE_CODE (type) == COMPLEX_TYPE)
22857 if (TYPE_MODE (type) == DCmode && align < 64)
22859 if ((TYPE_MODE (type) == XCmode
22860 || TYPE_MODE (type) == TCmode) && align < 128)
22863 else if ((TREE_CODE (type) == RECORD_TYPE
22864 || TREE_CODE (type) == UNION_TYPE
22865 || TREE_CODE (type) == QUAL_UNION_TYPE)
22866 && TYPE_FIELDS (type))
22868 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22870 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22873 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22874 || TREE_CODE (type) == INTEGER_TYPE)
22877 if (TYPE_MODE (type) == DFmode && align < 64)
22879 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22885 /* Compute the minimum required alignment for dynamic stack realignment
22886 purposes for a local variable, parameter or a stack slot. EXP is
22887 the data type or decl itself, MODE is its mode and ALIGN is the
22888 alignment that the object would ordinarily have. */
22891 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22892 unsigned int align)
22896 if (exp && DECL_P (exp))
22898 type = TREE_TYPE (exp);
22907 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22910 /* Don't do dynamic stack realignment for long long objects with
22911 -mpreferred-stack-boundary=2. */
22912 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22913 && (!type || !TYPE_USER_ALIGN (type))
22914 && (!decl || !DECL_USER_ALIGN (decl)))
22920 /* Find a location for the static chain incoming to a nested function.
22921 This is a register, unless all free registers are used by arguments. */
22924 ix86_static_chain (const_tree fndecl, bool incoming_p)
22928 if (!DECL_STATIC_CHAIN (fndecl))
22933 /* We always use R10 in 64-bit mode. */
22939 /* By default in 32-bit mode we use ECX to pass the static chain. */
22942 fntype = TREE_TYPE (fndecl);
22943 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
22945 /* Fastcall functions use ecx/edx for arguments, which leaves
22946 us with EAX for the static chain. */
22949 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
22951 /* Thiscall functions use ecx for arguments, which leaves
22952 us with EAX for the static chain. */
22955 else if (ix86_function_regparm (fntype, fndecl) == 3)
22957 /* For regparm 3, we have no free call-clobbered registers in
22958 which to store the static chain. In order to implement this,
22959 we have the trampoline push the static chain to the stack.
22960 However, we can't push a value below the return address when
22961 we call the nested function directly, so we have to use an
22962 alternate entry point. For this we use ESI, and have the
22963 alternate entry point push ESI, so that things appear the
22964 same once we're executing the nested function. */
22967 if (fndecl == current_function_decl)
22968 ix86_static_chain_on_stack = true;
22969 return gen_frame_mem (SImode,
22970 plus_constant (arg_pointer_rtx, -8));
22976 return gen_rtx_REG (Pmode, regno);
22979 /* Emit RTL insns to initialize the variable parts of a trampoline.
22980 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22981 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22982 to be passed to the target function. */
22985 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22989 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22996 /* Depending on the static chain location, either load a register
22997 with a constant, or push the constant to the stack. All of the
22998 instructions are the same size. */
22999 chain = ix86_static_chain (fndecl, true);
23002 if (REGNO (chain) == CX_REG)
23004 else if (REGNO (chain) == AX_REG)
23007 gcc_unreachable ();
23012 mem = adjust_address (m_tramp, QImode, 0);
23013 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23015 mem = adjust_address (m_tramp, SImode, 1);
23016 emit_move_insn (mem, chain_value);
23018 /* Compute offset from the end of the jmp to the target function.
23019 In the case in which the trampoline stores the static chain on
23020 the stack, we need to skip the first insn which pushes the
23021 (call-saved) register static chain; this push is 1 byte. */
23022 disp = expand_binop (SImode, sub_optab, fnaddr,
23023 plus_constant (XEXP (m_tramp, 0),
23024 MEM_P (chain) ? 9 : 10),
23025 NULL_RTX, 1, OPTAB_DIRECT);
23027 mem = adjust_address (m_tramp, QImode, 5);
23028 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23030 mem = adjust_address (m_tramp, SImode, 6);
23031 emit_move_insn (mem, disp);
23037 /* Load the function address to r11. Try to load address using
23038 the shorter movl instead of movabs. We may want to support
23039 movq for kernel mode, but kernel does not use trampolines at
23041 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23043 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23045 mem = adjust_address (m_tramp, HImode, offset);
23046 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23048 mem = adjust_address (m_tramp, SImode, offset + 2);
23049 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23054 mem = adjust_address (m_tramp, HImode, offset);
23055 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23057 mem = adjust_address (m_tramp, DImode, offset + 2);
23058 emit_move_insn (mem, fnaddr);
23062 /* Load static chain using movabs to r10. */
23063 mem = adjust_address (m_tramp, HImode, offset);
23064 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23066 mem = adjust_address (m_tramp, DImode, offset + 2);
23067 emit_move_insn (mem, chain_value);
23070 /* Jump to r11; the last (unused) byte is a nop, only there to
23071 pad the write out to a single 32-bit store. */
23072 mem = adjust_address (m_tramp, SImode, offset);
23073 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23076 gcc_assert (offset <= TRAMPOLINE_SIZE);
23079 #ifdef ENABLE_EXECUTE_STACK
23080 #ifdef CHECK_EXECUTE_STACK_ENABLED
23081 if (CHECK_EXECUTE_STACK_ENABLED)
23083 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23084 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23088 /* The following file contains several enumerations and data structures
23089 built from the definitions in i386-builtin-types.def. */
23091 #include "i386-builtin-types.inc"
23093 /* Table for the ix86 builtin non-function types. */
23094 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23096 /* Retrieve an element from the above table, building some of
23097 the types lazily. */
23100 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23102 unsigned int index;
23105 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23107 type = ix86_builtin_type_tab[(int) tcode];
23111 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23112 if (tcode <= IX86_BT_LAST_VECT)
23114 enum machine_mode mode;
23116 index = tcode - IX86_BT_LAST_PRIM - 1;
23117 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23118 mode = ix86_builtin_type_vect_mode[index];
23120 type = build_vector_type_for_mode (itype, mode);
23126 index = tcode - IX86_BT_LAST_VECT - 1;
23127 if (tcode <= IX86_BT_LAST_PTR)
23128 quals = TYPE_UNQUALIFIED;
23130 quals = TYPE_QUAL_CONST;
23132 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23133 if (quals != TYPE_UNQUALIFIED)
23134 itype = build_qualified_type (itype, quals);
23136 type = build_pointer_type (itype);
23139 ix86_builtin_type_tab[(int) tcode] = type;
23143 /* Table for the ix86 builtin function types. */
23144 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23146 /* Retrieve an element from the above table, building some of
23147 the types lazily. */
23150 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23154 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23156 type = ix86_builtin_func_type_tab[(int) tcode];
23160 if (tcode <= IX86_BT_LAST_FUNC)
23162 unsigned start = ix86_builtin_func_start[(int) tcode];
23163 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23164 tree rtype, atype, args = void_list_node;
23167 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23168 for (i = after - 1; i > start; --i)
23170 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23171 args = tree_cons (NULL, atype, args);
23174 type = build_function_type (rtype, args);
23178 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23179 enum ix86_builtin_func_type icode;
23181 icode = ix86_builtin_func_alias_base[index];
23182 type = ix86_get_builtin_func_type (icode);
23185 ix86_builtin_func_type_tab[(int) tcode] = type;
23190 /* Codes for all the SSE/MMX builtins. */
23193 IX86_BUILTIN_ADDPS,
23194 IX86_BUILTIN_ADDSS,
23195 IX86_BUILTIN_DIVPS,
23196 IX86_BUILTIN_DIVSS,
23197 IX86_BUILTIN_MULPS,
23198 IX86_BUILTIN_MULSS,
23199 IX86_BUILTIN_SUBPS,
23200 IX86_BUILTIN_SUBSS,
23202 IX86_BUILTIN_CMPEQPS,
23203 IX86_BUILTIN_CMPLTPS,
23204 IX86_BUILTIN_CMPLEPS,
23205 IX86_BUILTIN_CMPGTPS,
23206 IX86_BUILTIN_CMPGEPS,
23207 IX86_BUILTIN_CMPNEQPS,
23208 IX86_BUILTIN_CMPNLTPS,
23209 IX86_BUILTIN_CMPNLEPS,
23210 IX86_BUILTIN_CMPNGTPS,
23211 IX86_BUILTIN_CMPNGEPS,
23212 IX86_BUILTIN_CMPORDPS,
23213 IX86_BUILTIN_CMPUNORDPS,
23214 IX86_BUILTIN_CMPEQSS,
23215 IX86_BUILTIN_CMPLTSS,
23216 IX86_BUILTIN_CMPLESS,
23217 IX86_BUILTIN_CMPNEQSS,
23218 IX86_BUILTIN_CMPNLTSS,
23219 IX86_BUILTIN_CMPNLESS,
23220 IX86_BUILTIN_CMPNGTSS,
23221 IX86_BUILTIN_CMPNGESS,
23222 IX86_BUILTIN_CMPORDSS,
23223 IX86_BUILTIN_CMPUNORDSS,
23225 IX86_BUILTIN_COMIEQSS,
23226 IX86_BUILTIN_COMILTSS,
23227 IX86_BUILTIN_COMILESS,
23228 IX86_BUILTIN_COMIGTSS,
23229 IX86_BUILTIN_COMIGESS,
23230 IX86_BUILTIN_COMINEQSS,
23231 IX86_BUILTIN_UCOMIEQSS,
23232 IX86_BUILTIN_UCOMILTSS,
23233 IX86_BUILTIN_UCOMILESS,
23234 IX86_BUILTIN_UCOMIGTSS,
23235 IX86_BUILTIN_UCOMIGESS,
23236 IX86_BUILTIN_UCOMINEQSS,
23238 IX86_BUILTIN_CVTPI2PS,
23239 IX86_BUILTIN_CVTPS2PI,
23240 IX86_BUILTIN_CVTSI2SS,
23241 IX86_BUILTIN_CVTSI642SS,
23242 IX86_BUILTIN_CVTSS2SI,
23243 IX86_BUILTIN_CVTSS2SI64,
23244 IX86_BUILTIN_CVTTPS2PI,
23245 IX86_BUILTIN_CVTTSS2SI,
23246 IX86_BUILTIN_CVTTSS2SI64,
23248 IX86_BUILTIN_MAXPS,
23249 IX86_BUILTIN_MAXSS,
23250 IX86_BUILTIN_MINPS,
23251 IX86_BUILTIN_MINSS,
23253 IX86_BUILTIN_LOADUPS,
23254 IX86_BUILTIN_STOREUPS,
23255 IX86_BUILTIN_MOVSS,
23257 IX86_BUILTIN_MOVHLPS,
23258 IX86_BUILTIN_MOVLHPS,
23259 IX86_BUILTIN_LOADHPS,
23260 IX86_BUILTIN_LOADLPS,
23261 IX86_BUILTIN_STOREHPS,
23262 IX86_BUILTIN_STORELPS,
23264 IX86_BUILTIN_MASKMOVQ,
23265 IX86_BUILTIN_MOVMSKPS,
23266 IX86_BUILTIN_PMOVMSKB,
23268 IX86_BUILTIN_MOVNTPS,
23269 IX86_BUILTIN_MOVNTQ,
23271 IX86_BUILTIN_LOADDQU,
23272 IX86_BUILTIN_STOREDQU,
23274 IX86_BUILTIN_PACKSSWB,
23275 IX86_BUILTIN_PACKSSDW,
23276 IX86_BUILTIN_PACKUSWB,
23278 IX86_BUILTIN_PADDB,
23279 IX86_BUILTIN_PADDW,
23280 IX86_BUILTIN_PADDD,
23281 IX86_BUILTIN_PADDQ,
23282 IX86_BUILTIN_PADDSB,
23283 IX86_BUILTIN_PADDSW,
23284 IX86_BUILTIN_PADDUSB,
23285 IX86_BUILTIN_PADDUSW,
23286 IX86_BUILTIN_PSUBB,
23287 IX86_BUILTIN_PSUBW,
23288 IX86_BUILTIN_PSUBD,
23289 IX86_BUILTIN_PSUBQ,
23290 IX86_BUILTIN_PSUBSB,
23291 IX86_BUILTIN_PSUBSW,
23292 IX86_BUILTIN_PSUBUSB,
23293 IX86_BUILTIN_PSUBUSW,
23296 IX86_BUILTIN_PANDN,
23300 IX86_BUILTIN_PAVGB,
23301 IX86_BUILTIN_PAVGW,
23303 IX86_BUILTIN_PCMPEQB,
23304 IX86_BUILTIN_PCMPEQW,
23305 IX86_BUILTIN_PCMPEQD,
23306 IX86_BUILTIN_PCMPGTB,
23307 IX86_BUILTIN_PCMPGTW,
23308 IX86_BUILTIN_PCMPGTD,
23310 IX86_BUILTIN_PMADDWD,
23312 IX86_BUILTIN_PMAXSW,
23313 IX86_BUILTIN_PMAXUB,
23314 IX86_BUILTIN_PMINSW,
23315 IX86_BUILTIN_PMINUB,
23317 IX86_BUILTIN_PMULHUW,
23318 IX86_BUILTIN_PMULHW,
23319 IX86_BUILTIN_PMULLW,
23321 IX86_BUILTIN_PSADBW,
23322 IX86_BUILTIN_PSHUFW,
23324 IX86_BUILTIN_PSLLW,
23325 IX86_BUILTIN_PSLLD,
23326 IX86_BUILTIN_PSLLQ,
23327 IX86_BUILTIN_PSRAW,
23328 IX86_BUILTIN_PSRAD,
23329 IX86_BUILTIN_PSRLW,
23330 IX86_BUILTIN_PSRLD,
23331 IX86_BUILTIN_PSRLQ,
23332 IX86_BUILTIN_PSLLWI,
23333 IX86_BUILTIN_PSLLDI,
23334 IX86_BUILTIN_PSLLQI,
23335 IX86_BUILTIN_PSRAWI,
23336 IX86_BUILTIN_PSRADI,
23337 IX86_BUILTIN_PSRLWI,
23338 IX86_BUILTIN_PSRLDI,
23339 IX86_BUILTIN_PSRLQI,
23341 IX86_BUILTIN_PUNPCKHBW,
23342 IX86_BUILTIN_PUNPCKHWD,
23343 IX86_BUILTIN_PUNPCKHDQ,
23344 IX86_BUILTIN_PUNPCKLBW,
23345 IX86_BUILTIN_PUNPCKLWD,
23346 IX86_BUILTIN_PUNPCKLDQ,
23348 IX86_BUILTIN_SHUFPS,
23350 IX86_BUILTIN_RCPPS,
23351 IX86_BUILTIN_RCPSS,
23352 IX86_BUILTIN_RSQRTPS,
23353 IX86_BUILTIN_RSQRTPS_NR,
23354 IX86_BUILTIN_RSQRTSS,
23355 IX86_BUILTIN_RSQRTF,
23356 IX86_BUILTIN_SQRTPS,
23357 IX86_BUILTIN_SQRTPS_NR,
23358 IX86_BUILTIN_SQRTSS,
23360 IX86_BUILTIN_UNPCKHPS,
23361 IX86_BUILTIN_UNPCKLPS,
23363 IX86_BUILTIN_ANDPS,
23364 IX86_BUILTIN_ANDNPS,
23366 IX86_BUILTIN_XORPS,
23369 IX86_BUILTIN_LDMXCSR,
23370 IX86_BUILTIN_STMXCSR,
23371 IX86_BUILTIN_SFENCE,
23373 /* 3DNow! Original */
23374 IX86_BUILTIN_FEMMS,
23375 IX86_BUILTIN_PAVGUSB,
23376 IX86_BUILTIN_PF2ID,
23377 IX86_BUILTIN_PFACC,
23378 IX86_BUILTIN_PFADD,
23379 IX86_BUILTIN_PFCMPEQ,
23380 IX86_BUILTIN_PFCMPGE,
23381 IX86_BUILTIN_PFCMPGT,
23382 IX86_BUILTIN_PFMAX,
23383 IX86_BUILTIN_PFMIN,
23384 IX86_BUILTIN_PFMUL,
23385 IX86_BUILTIN_PFRCP,
23386 IX86_BUILTIN_PFRCPIT1,
23387 IX86_BUILTIN_PFRCPIT2,
23388 IX86_BUILTIN_PFRSQIT1,
23389 IX86_BUILTIN_PFRSQRT,
23390 IX86_BUILTIN_PFSUB,
23391 IX86_BUILTIN_PFSUBR,
23392 IX86_BUILTIN_PI2FD,
23393 IX86_BUILTIN_PMULHRW,
23395 /* 3DNow! Athlon Extensions */
23396 IX86_BUILTIN_PF2IW,
23397 IX86_BUILTIN_PFNACC,
23398 IX86_BUILTIN_PFPNACC,
23399 IX86_BUILTIN_PI2FW,
23400 IX86_BUILTIN_PSWAPDSI,
23401 IX86_BUILTIN_PSWAPDSF,
23404 IX86_BUILTIN_ADDPD,
23405 IX86_BUILTIN_ADDSD,
23406 IX86_BUILTIN_DIVPD,
23407 IX86_BUILTIN_DIVSD,
23408 IX86_BUILTIN_MULPD,
23409 IX86_BUILTIN_MULSD,
23410 IX86_BUILTIN_SUBPD,
23411 IX86_BUILTIN_SUBSD,
23413 IX86_BUILTIN_CMPEQPD,
23414 IX86_BUILTIN_CMPLTPD,
23415 IX86_BUILTIN_CMPLEPD,
23416 IX86_BUILTIN_CMPGTPD,
23417 IX86_BUILTIN_CMPGEPD,
23418 IX86_BUILTIN_CMPNEQPD,
23419 IX86_BUILTIN_CMPNLTPD,
23420 IX86_BUILTIN_CMPNLEPD,
23421 IX86_BUILTIN_CMPNGTPD,
23422 IX86_BUILTIN_CMPNGEPD,
23423 IX86_BUILTIN_CMPORDPD,
23424 IX86_BUILTIN_CMPUNORDPD,
23425 IX86_BUILTIN_CMPEQSD,
23426 IX86_BUILTIN_CMPLTSD,
23427 IX86_BUILTIN_CMPLESD,
23428 IX86_BUILTIN_CMPNEQSD,
23429 IX86_BUILTIN_CMPNLTSD,
23430 IX86_BUILTIN_CMPNLESD,
23431 IX86_BUILTIN_CMPORDSD,
23432 IX86_BUILTIN_CMPUNORDSD,
23434 IX86_BUILTIN_COMIEQSD,
23435 IX86_BUILTIN_COMILTSD,
23436 IX86_BUILTIN_COMILESD,
23437 IX86_BUILTIN_COMIGTSD,
23438 IX86_BUILTIN_COMIGESD,
23439 IX86_BUILTIN_COMINEQSD,
23440 IX86_BUILTIN_UCOMIEQSD,
23441 IX86_BUILTIN_UCOMILTSD,
23442 IX86_BUILTIN_UCOMILESD,
23443 IX86_BUILTIN_UCOMIGTSD,
23444 IX86_BUILTIN_UCOMIGESD,
23445 IX86_BUILTIN_UCOMINEQSD,
23447 IX86_BUILTIN_MAXPD,
23448 IX86_BUILTIN_MAXSD,
23449 IX86_BUILTIN_MINPD,
23450 IX86_BUILTIN_MINSD,
23452 IX86_BUILTIN_ANDPD,
23453 IX86_BUILTIN_ANDNPD,
23455 IX86_BUILTIN_XORPD,
23457 IX86_BUILTIN_SQRTPD,
23458 IX86_BUILTIN_SQRTSD,
23460 IX86_BUILTIN_UNPCKHPD,
23461 IX86_BUILTIN_UNPCKLPD,
23463 IX86_BUILTIN_SHUFPD,
23465 IX86_BUILTIN_LOADUPD,
23466 IX86_BUILTIN_STOREUPD,
23467 IX86_BUILTIN_MOVSD,
23469 IX86_BUILTIN_LOADHPD,
23470 IX86_BUILTIN_LOADLPD,
23472 IX86_BUILTIN_CVTDQ2PD,
23473 IX86_BUILTIN_CVTDQ2PS,
23475 IX86_BUILTIN_CVTPD2DQ,
23476 IX86_BUILTIN_CVTPD2PI,
23477 IX86_BUILTIN_CVTPD2PS,
23478 IX86_BUILTIN_CVTTPD2DQ,
23479 IX86_BUILTIN_CVTTPD2PI,
23481 IX86_BUILTIN_CVTPI2PD,
23482 IX86_BUILTIN_CVTSI2SD,
23483 IX86_BUILTIN_CVTSI642SD,
23485 IX86_BUILTIN_CVTSD2SI,
23486 IX86_BUILTIN_CVTSD2SI64,
23487 IX86_BUILTIN_CVTSD2SS,
23488 IX86_BUILTIN_CVTSS2SD,
23489 IX86_BUILTIN_CVTTSD2SI,
23490 IX86_BUILTIN_CVTTSD2SI64,
23492 IX86_BUILTIN_CVTPS2DQ,
23493 IX86_BUILTIN_CVTPS2PD,
23494 IX86_BUILTIN_CVTTPS2DQ,
23496 IX86_BUILTIN_MOVNTI,
23497 IX86_BUILTIN_MOVNTPD,
23498 IX86_BUILTIN_MOVNTDQ,
23500 IX86_BUILTIN_MOVQ128,
23503 IX86_BUILTIN_MASKMOVDQU,
23504 IX86_BUILTIN_MOVMSKPD,
23505 IX86_BUILTIN_PMOVMSKB128,
23507 IX86_BUILTIN_PACKSSWB128,
23508 IX86_BUILTIN_PACKSSDW128,
23509 IX86_BUILTIN_PACKUSWB128,
23511 IX86_BUILTIN_PADDB128,
23512 IX86_BUILTIN_PADDW128,
23513 IX86_BUILTIN_PADDD128,
23514 IX86_BUILTIN_PADDQ128,
23515 IX86_BUILTIN_PADDSB128,
23516 IX86_BUILTIN_PADDSW128,
23517 IX86_BUILTIN_PADDUSB128,
23518 IX86_BUILTIN_PADDUSW128,
23519 IX86_BUILTIN_PSUBB128,
23520 IX86_BUILTIN_PSUBW128,
23521 IX86_BUILTIN_PSUBD128,
23522 IX86_BUILTIN_PSUBQ128,
23523 IX86_BUILTIN_PSUBSB128,
23524 IX86_BUILTIN_PSUBSW128,
23525 IX86_BUILTIN_PSUBUSB128,
23526 IX86_BUILTIN_PSUBUSW128,
23528 IX86_BUILTIN_PAND128,
23529 IX86_BUILTIN_PANDN128,
23530 IX86_BUILTIN_POR128,
23531 IX86_BUILTIN_PXOR128,
23533 IX86_BUILTIN_PAVGB128,
23534 IX86_BUILTIN_PAVGW128,
23536 IX86_BUILTIN_PCMPEQB128,
23537 IX86_BUILTIN_PCMPEQW128,
23538 IX86_BUILTIN_PCMPEQD128,
23539 IX86_BUILTIN_PCMPGTB128,
23540 IX86_BUILTIN_PCMPGTW128,
23541 IX86_BUILTIN_PCMPGTD128,
23543 IX86_BUILTIN_PMADDWD128,
23545 IX86_BUILTIN_PMAXSW128,
23546 IX86_BUILTIN_PMAXUB128,
23547 IX86_BUILTIN_PMINSW128,
23548 IX86_BUILTIN_PMINUB128,
23550 IX86_BUILTIN_PMULUDQ,
23551 IX86_BUILTIN_PMULUDQ128,
23552 IX86_BUILTIN_PMULHUW128,
23553 IX86_BUILTIN_PMULHW128,
23554 IX86_BUILTIN_PMULLW128,
23556 IX86_BUILTIN_PSADBW128,
23557 IX86_BUILTIN_PSHUFHW,
23558 IX86_BUILTIN_PSHUFLW,
23559 IX86_BUILTIN_PSHUFD,
23561 IX86_BUILTIN_PSLLDQI128,
23562 IX86_BUILTIN_PSLLWI128,
23563 IX86_BUILTIN_PSLLDI128,
23564 IX86_BUILTIN_PSLLQI128,
23565 IX86_BUILTIN_PSRAWI128,
23566 IX86_BUILTIN_PSRADI128,
23567 IX86_BUILTIN_PSRLDQI128,
23568 IX86_BUILTIN_PSRLWI128,
23569 IX86_BUILTIN_PSRLDI128,
23570 IX86_BUILTIN_PSRLQI128,
23572 IX86_BUILTIN_PSLLDQ128,
23573 IX86_BUILTIN_PSLLW128,
23574 IX86_BUILTIN_PSLLD128,
23575 IX86_BUILTIN_PSLLQ128,
23576 IX86_BUILTIN_PSRAW128,
23577 IX86_BUILTIN_PSRAD128,
23578 IX86_BUILTIN_PSRLW128,
23579 IX86_BUILTIN_PSRLD128,
23580 IX86_BUILTIN_PSRLQ128,
23582 IX86_BUILTIN_PUNPCKHBW128,
23583 IX86_BUILTIN_PUNPCKHWD128,
23584 IX86_BUILTIN_PUNPCKHDQ128,
23585 IX86_BUILTIN_PUNPCKHQDQ128,
23586 IX86_BUILTIN_PUNPCKLBW128,
23587 IX86_BUILTIN_PUNPCKLWD128,
23588 IX86_BUILTIN_PUNPCKLDQ128,
23589 IX86_BUILTIN_PUNPCKLQDQ128,
23591 IX86_BUILTIN_CLFLUSH,
23592 IX86_BUILTIN_MFENCE,
23593 IX86_BUILTIN_LFENCE,
23595 IX86_BUILTIN_BSRSI,
23596 IX86_BUILTIN_BSRDI,
23597 IX86_BUILTIN_RDPMC,
23598 IX86_BUILTIN_RDTSC,
23599 IX86_BUILTIN_RDTSCP,
23600 IX86_BUILTIN_ROLQI,
23601 IX86_BUILTIN_ROLHI,
23602 IX86_BUILTIN_RORQI,
23603 IX86_BUILTIN_RORHI,
23606 IX86_BUILTIN_ADDSUBPS,
23607 IX86_BUILTIN_HADDPS,
23608 IX86_BUILTIN_HSUBPS,
23609 IX86_BUILTIN_MOVSHDUP,
23610 IX86_BUILTIN_MOVSLDUP,
23611 IX86_BUILTIN_ADDSUBPD,
23612 IX86_BUILTIN_HADDPD,
23613 IX86_BUILTIN_HSUBPD,
23614 IX86_BUILTIN_LDDQU,
23616 IX86_BUILTIN_MONITOR,
23617 IX86_BUILTIN_MWAIT,
23620 IX86_BUILTIN_PHADDW,
23621 IX86_BUILTIN_PHADDD,
23622 IX86_BUILTIN_PHADDSW,
23623 IX86_BUILTIN_PHSUBW,
23624 IX86_BUILTIN_PHSUBD,
23625 IX86_BUILTIN_PHSUBSW,
23626 IX86_BUILTIN_PMADDUBSW,
23627 IX86_BUILTIN_PMULHRSW,
23628 IX86_BUILTIN_PSHUFB,
23629 IX86_BUILTIN_PSIGNB,
23630 IX86_BUILTIN_PSIGNW,
23631 IX86_BUILTIN_PSIGND,
23632 IX86_BUILTIN_PALIGNR,
23633 IX86_BUILTIN_PABSB,
23634 IX86_BUILTIN_PABSW,
23635 IX86_BUILTIN_PABSD,
23637 IX86_BUILTIN_PHADDW128,
23638 IX86_BUILTIN_PHADDD128,
23639 IX86_BUILTIN_PHADDSW128,
23640 IX86_BUILTIN_PHSUBW128,
23641 IX86_BUILTIN_PHSUBD128,
23642 IX86_BUILTIN_PHSUBSW128,
23643 IX86_BUILTIN_PMADDUBSW128,
23644 IX86_BUILTIN_PMULHRSW128,
23645 IX86_BUILTIN_PSHUFB128,
23646 IX86_BUILTIN_PSIGNB128,
23647 IX86_BUILTIN_PSIGNW128,
23648 IX86_BUILTIN_PSIGND128,
23649 IX86_BUILTIN_PALIGNR128,
23650 IX86_BUILTIN_PABSB128,
23651 IX86_BUILTIN_PABSW128,
23652 IX86_BUILTIN_PABSD128,
23654 /* AMDFAM10 - SSE4A New Instructions. */
23655 IX86_BUILTIN_MOVNTSD,
23656 IX86_BUILTIN_MOVNTSS,
23657 IX86_BUILTIN_EXTRQI,
23658 IX86_BUILTIN_EXTRQ,
23659 IX86_BUILTIN_INSERTQI,
23660 IX86_BUILTIN_INSERTQ,
23663 IX86_BUILTIN_BLENDPD,
23664 IX86_BUILTIN_BLENDPS,
23665 IX86_BUILTIN_BLENDVPD,
23666 IX86_BUILTIN_BLENDVPS,
23667 IX86_BUILTIN_PBLENDVB128,
23668 IX86_BUILTIN_PBLENDW128,
23673 IX86_BUILTIN_INSERTPS128,
23675 IX86_BUILTIN_MOVNTDQA,
23676 IX86_BUILTIN_MPSADBW128,
23677 IX86_BUILTIN_PACKUSDW128,
23678 IX86_BUILTIN_PCMPEQQ,
23679 IX86_BUILTIN_PHMINPOSUW128,
23681 IX86_BUILTIN_PMAXSB128,
23682 IX86_BUILTIN_PMAXSD128,
23683 IX86_BUILTIN_PMAXUD128,
23684 IX86_BUILTIN_PMAXUW128,
23686 IX86_BUILTIN_PMINSB128,
23687 IX86_BUILTIN_PMINSD128,
23688 IX86_BUILTIN_PMINUD128,
23689 IX86_BUILTIN_PMINUW128,
23691 IX86_BUILTIN_PMOVSXBW128,
23692 IX86_BUILTIN_PMOVSXBD128,
23693 IX86_BUILTIN_PMOVSXBQ128,
23694 IX86_BUILTIN_PMOVSXWD128,
23695 IX86_BUILTIN_PMOVSXWQ128,
23696 IX86_BUILTIN_PMOVSXDQ128,
23698 IX86_BUILTIN_PMOVZXBW128,
23699 IX86_BUILTIN_PMOVZXBD128,
23700 IX86_BUILTIN_PMOVZXBQ128,
23701 IX86_BUILTIN_PMOVZXWD128,
23702 IX86_BUILTIN_PMOVZXWQ128,
23703 IX86_BUILTIN_PMOVZXDQ128,
23705 IX86_BUILTIN_PMULDQ128,
23706 IX86_BUILTIN_PMULLD128,
23708 IX86_BUILTIN_ROUNDPD,
23709 IX86_BUILTIN_ROUNDPS,
23710 IX86_BUILTIN_ROUNDSD,
23711 IX86_BUILTIN_ROUNDSS,
23713 IX86_BUILTIN_PTESTZ,
23714 IX86_BUILTIN_PTESTC,
23715 IX86_BUILTIN_PTESTNZC,
23717 IX86_BUILTIN_VEC_INIT_V2SI,
23718 IX86_BUILTIN_VEC_INIT_V4HI,
23719 IX86_BUILTIN_VEC_INIT_V8QI,
23720 IX86_BUILTIN_VEC_EXT_V2DF,
23721 IX86_BUILTIN_VEC_EXT_V2DI,
23722 IX86_BUILTIN_VEC_EXT_V4SF,
23723 IX86_BUILTIN_VEC_EXT_V4SI,
23724 IX86_BUILTIN_VEC_EXT_V8HI,
23725 IX86_BUILTIN_VEC_EXT_V2SI,
23726 IX86_BUILTIN_VEC_EXT_V4HI,
23727 IX86_BUILTIN_VEC_EXT_V16QI,
23728 IX86_BUILTIN_VEC_SET_V2DI,
23729 IX86_BUILTIN_VEC_SET_V4SF,
23730 IX86_BUILTIN_VEC_SET_V4SI,
23731 IX86_BUILTIN_VEC_SET_V8HI,
23732 IX86_BUILTIN_VEC_SET_V4HI,
23733 IX86_BUILTIN_VEC_SET_V16QI,
23735 IX86_BUILTIN_VEC_PACK_SFIX,
23738 IX86_BUILTIN_CRC32QI,
23739 IX86_BUILTIN_CRC32HI,
23740 IX86_BUILTIN_CRC32SI,
23741 IX86_BUILTIN_CRC32DI,
23743 IX86_BUILTIN_PCMPESTRI128,
23744 IX86_BUILTIN_PCMPESTRM128,
23745 IX86_BUILTIN_PCMPESTRA128,
23746 IX86_BUILTIN_PCMPESTRC128,
23747 IX86_BUILTIN_PCMPESTRO128,
23748 IX86_BUILTIN_PCMPESTRS128,
23749 IX86_BUILTIN_PCMPESTRZ128,
23750 IX86_BUILTIN_PCMPISTRI128,
23751 IX86_BUILTIN_PCMPISTRM128,
23752 IX86_BUILTIN_PCMPISTRA128,
23753 IX86_BUILTIN_PCMPISTRC128,
23754 IX86_BUILTIN_PCMPISTRO128,
23755 IX86_BUILTIN_PCMPISTRS128,
23756 IX86_BUILTIN_PCMPISTRZ128,
23758 IX86_BUILTIN_PCMPGTQ,
23760 /* AES instructions */
23761 IX86_BUILTIN_AESENC128,
23762 IX86_BUILTIN_AESENCLAST128,
23763 IX86_BUILTIN_AESDEC128,
23764 IX86_BUILTIN_AESDECLAST128,
23765 IX86_BUILTIN_AESIMC128,
23766 IX86_BUILTIN_AESKEYGENASSIST128,
23768 /* PCLMUL instruction */
23769 IX86_BUILTIN_PCLMULQDQ128,
23772 IX86_BUILTIN_ADDPD256,
23773 IX86_BUILTIN_ADDPS256,
23774 IX86_BUILTIN_ADDSUBPD256,
23775 IX86_BUILTIN_ADDSUBPS256,
23776 IX86_BUILTIN_ANDPD256,
23777 IX86_BUILTIN_ANDPS256,
23778 IX86_BUILTIN_ANDNPD256,
23779 IX86_BUILTIN_ANDNPS256,
23780 IX86_BUILTIN_BLENDPD256,
23781 IX86_BUILTIN_BLENDPS256,
23782 IX86_BUILTIN_BLENDVPD256,
23783 IX86_BUILTIN_BLENDVPS256,
23784 IX86_BUILTIN_DIVPD256,
23785 IX86_BUILTIN_DIVPS256,
23786 IX86_BUILTIN_DPPS256,
23787 IX86_BUILTIN_HADDPD256,
23788 IX86_BUILTIN_HADDPS256,
23789 IX86_BUILTIN_HSUBPD256,
23790 IX86_BUILTIN_HSUBPS256,
23791 IX86_BUILTIN_MAXPD256,
23792 IX86_BUILTIN_MAXPS256,
23793 IX86_BUILTIN_MINPD256,
23794 IX86_BUILTIN_MINPS256,
23795 IX86_BUILTIN_MULPD256,
23796 IX86_BUILTIN_MULPS256,
23797 IX86_BUILTIN_ORPD256,
23798 IX86_BUILTIN_ORPS256,
23799 IX86_BUILTIN_SHUFPD256,
23800 IX86_BUILTIN_SHUFPS256,
23801 IX86_BUILTIN_SUBPD256,
23802 IX86_BUILTIN_SUBPS256,
23803 IX86_BUILTIN_XORPD256,
23804 IX86_BUILTIN_XORPS256,
23805 IX86_BUILTIN_CMPSD,
23806 IX86_BUILTIN_CMPSS,
23807 IX86_BUILTIN_CMPPD,
23808 IX86_BUILTIN_CMPPS,
23809 IX86_BUILTIN_CMPPD256,
23810 IX86_BUILTIN_CMPPS256,
23811 IX86_BUILTIN_CVTDQ2PD256,
23812 IX86_BUILTIN_CVTDQ2PS256,
23813 IX86_BUILTIN_CVTPD2PS256,
23814 IX86_BUILTIN_CVTPS2DQ256,
23815 IX86_BUILTIN_CVTPS2PD256,
23816 IX86_BUILTIN_CVTTPD2DQ256,
23817 IX86_BUILTIN_CVTPD2DQ256,
23818 IX86_BUILTIN_CVTTPS2DQ256,
23819 IX86_BUILTIN_EXTRACTF128PD256,
23820 IX86_BUILTIN_EXTRACTF128PS256,
23821 IX86_BUILTIN_EXTRACTF128SI256,
23822 IX86_BUILTIN_VZEROALL,
23823 IX86_BUILTIN_VZEROUPPER,
23824 IX86_BUILTIN_VPERMILVARPD,
23825 IX86_BUILTIN_VPERMILVARPS,
23826 IX86_BUILTIN_VPERMILVARPD256,
23827 IX86_BUILTIN_VPERMILVARPS256,
23828 IX86_BUILTIN_VPERMILPD,
23829 IX86_BUILTIN_VPERMILPS,
23830 IX86_BUILTIN_VPERMILPD256,
23831 IX86_BUILTIN_VPERMILPS256,
23832 IX86_BUILTIN_VPERMIL2PD,
23833 IX86_BUILTIN_VPERMIL2PS,
23834 IX86_BUILTIN_VPERMIL2PD256,
23835 IX86_BUILTIN_VPERMIL2PS256,
23836 IX86_BUILTIN_VPERM2F128PD256,
23837 IX86_BUILTIN_VPERM2F128PS256,
23838 IX86_BUILTIN_VPERM2F128SI256,
23839 IX86_BUILTIN_VBROADCASTSS,
23840 IX86_BUILTIN_VBROADCASTSD256,
23841 IX86_BUILTIN_VBROADCASTSS256,
23842 IX86_BUILTIN_VBROADCASTPD256,
23843 IX86_BUILTIN_VBROADCASTPS256,
23844 IX86_BUILTIN_VINSERTF128PD256,
23845 IX86_BUILTIN_VINSERTF128PS256,
23846 IX86_BUILTIN_VINSERTF128SI256,
23847 IX86_BUILTIN_LOADUPD256,
23848 IX86_BUILTIN_LOADUPS256,
23849 IX86_BUILTIN_STOREUPD256,
23850 IX86_BUILTIN_STOREUPS256,
23851 IX86_BUILTIN_LDDQU256,
23852 IX86_BUILTIN_MOVNTDQ256,
23853 IX86_BUILTIN_MOVNTPD256,
23854 IX86_BUILTIN_MOVNTPS256,
23855 IX86_BUILTIN_LOADDQU256,
23856 IX86_BUILTIN_STOREDQU256,
23857 IX86_BUILTIN_MASKLOADPD,
23858 IX86_BUILTIN_MASKLOADPS,
23859 IX86_BUILTIN_MASKSTOREPD,
23860 IX86_BUILTIN_MASKSTOREPS,
23861 IX86_BUILTIN_MASKLOADPD256,
23862 IX86_BUILTIN_MASKLOADPS256,
23863 IX86_BUILTIN_MASKSTOREPD256,
23864 IX86_BUILTIN_MASKSTOREPS256,
23865 IX86_BUILTIN_MOVSHDUP256,
23866 IX86_BUILTIN_MOVSLDUP256,
23867 IX86_BUILTIN_MOVDDUP256,
23869 IX86_BUILTIN_SQRTPD256,
23870 IX86_BUILTIN_SQRTPS256,
23871 IX86_BUILTIN_SQRTPS_NR256,
23872 IX86_BUILTIN_RSQRTPS256,
23873 IX86_BUILTIN_RSQRTPS_NR256,
23875 IX86_BUILTIN_RCPPS256,
23877 IX86_BUILTIN_ROUNDPD256,
23878 IX86_BUILTIN_ROUNDPS256,
23880 IX86_BUILTIN_UNPCKHPD256,
23881 IX86_BUILTIN_UNPCKLPD256,
23882 IX86_BUILTIN_UNPCKHPS256,
23883 IX86_BUILTIN_UNPCKLPS256,
23885 IX86_BUILTIN_SI256_SI,
23886 IX86_BUILTIN_PS256_PS,
23887 IX86_BUILTIN_PD256_PD,
23888 IX86_BUILTIN_SI_SI256,
23889 IX86_BUILTIN_PS_PS256,
23890 IX86_BUILTIN_PD_PD256,
23892 IX86_BUILTIN_VTESTZPD,
23893 IX86_BUILTIN_VTESTCPD,
23894 IX86_BUILTIN_VTESTNZCPD,
23895 IX86_BUILTIN_VTESTZPS,
23896 IX86_BUILTIN_VTESTCPS,
23897 IX86_BUILTIN_VTESTNZCPS,
23898 IX86_BUILTIN_VTESTZPD256,
23899 IX86_BUILTIN_VTESTCPD256,
23900 IX86_BUILTIN_VTESTNZCPD256,
23901 IX86_BUILTIN_VTESTZPS256,
23902 IX86_BUILTIN_VTESTCPS256,
23903 IX86_BUILTIN_VTESTNZCPS256,
23904 IX86_BUILTIN_PTESTZ256,
23905 IX86_BUILTIN_PTESTC256,
23906 IX86_BUILTIN_PTESTNZC256,
23908 IX86_BUILTIN_MOVMSKPD256,
23909 IX86_BUILTIN_MOVMSKPS256,
23911 /* TFmode support builtins. */
23913 IX86_BUILTIN_HUGE_VALQ,
23914 IX86_BUILTIN_FABSQ,
23915 IX86_BUILTIN_COPYSIGNQ,
23917 /* Vectorizer support builtins. */
23918 IX86_BUILTIN_CPYSGNPS,
23919 IX86_BUILTIN_CPYSGNPD,
23920 IX86_BUILTIN_CPYSGNPS256,
23921 IX86_BUILTIN_CPYSGNPD256,
23923 IX86_BUILTIN_CVTUDQ2PS,
23925 IX86_BUILTIN_VEC_PERM_V2DF,
23926 IX86_BUILTIN_VEC_PERM_V4SF,
23927 IX86_BUILTIN_VEC_PERM_V2DI,
23928 IX86_BUILTIN_VEC_PERM_V4SI,
23929 IX86_BUILTIN_VEC_PERM_V8HI,
23930 IX86_BUILTIN_VEC_PERM_V16QI,
23931 IX86_BUILTIN_VEC_PERM_V2DI_U,
23932 IX86_BUILTIN_VEC_PERM_V4SI_U,
23933 IX86_BUILTIN_VEC_PERM_V8HI_U,
23934 IX86_BUILTIN_VEC_PERM_V16QI_U,
23935 IX86_BUILTIN_VEC_PERM_V4DF,
23936 IX86_BUILTIN_VEC_PERM_V8SF,
23938 /* FMA4 and XOP instructions. */
23939 IX86_BUILTIN_VFMADDSS,
23940 IX86_BUILTIN_VFMADDSD,
23941 IX86_BUILTIN_VFMADDPS,
23942 IX86_BUILTIN_VFMADDPD,
23943 IX86_BUILTIN_VFMADDPS256,
23944 IX86_BUILTIN_VFMADDPD256,
23945 IX86_BUILTIN_VFMADDSUBPS,
23946 IX86_BUILTIN_VFMADDSUBPD,
23947 IX86_BUILTIN_VFMADDSUBPS256,
23948 IX86_BUILTIN_VFMADDSUBPD256,
23950 IX86_BUILTIN_VPCMOV,
23951 IX86_BUILTIN_VPCMOV_V2DI,
23952 IX86_BUILTIN_VPCMOV_V4SI,
23953 IX86_BUILTIN_VPCMOV_V8HI,
23954 IX86_BUILTIN_VPCMOV_V16QI,
23955 IX86_BUILTIN_VPCMOV_V4SF,
23956 IX86_BUILTIN_VPCMOV_V2DF,
23957 IX86_BUILTIN_VPCMOV256,
23958 IX86_BUILTIN_VPCMOV_V4DI256,
23959 IX86_BUILTIN_VPCMOV_V8SI256,
23960 IX86_BUILTIN_VPCMOV_V16HI256,
23961 IX86_BUILTIN_VPCMOV_V32QI256,
23962 IX86_BUILTIN_VPCMOV_V8SF256,
23963 IX86_BUILTIN_VPCMOV_V4DF256,
23965 IX86_BUILTIN_VPPERM,
23967 IX86_BUILTIN_VPMACSSWW,
23968 IX86_BUILTIN_VPMACSWW,
23969 IX86_BUILTIN_VPMACSSWD,
23970 IX86_BUILTIN_VPMACSWD,
23971 IX86_BUILTIN_VPMACSSDD,
23972 IX86_BUILTIN_VPMACSDD,
23973 IX86_BUILTIN_VPMACSSDQL,
23974 IX86_BUILTIN_VPMACSSDQH,
23975 IX86_BUILTIN_VPMACSDQL,
23976 IX86_BUILTIN_VPMACSDQH,
23977 IX86_BUILTIN_VPMADCSSWD,
23978 IX86_BUILTIN_VPMADCSWD,
23980 IX86_BUILTIN_VPHADDBW,
23981 IX86_BUILTIN_VPHADDBD,
23982 IX86_BUILTIN_VPHADDBQ,
23983 IX86_BUILTIN_VPHADDWD,
23984 IX86_BUILTIN_VPHADDWQ,
23985 IX86_BUILTIN_VPHADDDQ,
23986 IX86_BUILTIN_VPHADDUBW,
23987 IX86_BUILTIN_VPHADDUBD,
23988 IX86_BUILTIN_VPHADDUBQ,
23989 IX86_BUILTIN_VPHADDUWD,
23990 IX86_BUILTIN_VPHADDUWQ,
23991 IX86_BUILTIN_VPHADDUDQ,
23992 IX86_BUILTIN_VPHSUBBW,
23993 IX86_BUILTIN_VPHSUBWD,
23994 IX86_BUILTIN_VPHSUBDQ,
23996 IX86_BUILTIN_VPROTB,
23997 IX86_BUILTIN_VPROTW,
23998 IX86_BUILTIN_VPROTD,
23999 IX86_BUILTIN_VPROTQ,
24000 IX86_BUILTIN_VPROTB_IMM,
24001 IX86_BUILTIN_VPROTW_IMM,
24002 IX86_BUILTIN_VPROTD_IMM,
24003 IX86_BUILTIN_VPROTQ_IMM,
24005 IX86_BUILTIN_VPSHLB,
24006 IX86_BUILTIN_VPSHLW,
24007 IX86_BUILTIN_VPSHLD,
24008 IX86_BUILTIN_VPSHLQ,
24009 IX86_BUILTIN_VPSHAB,
24010 IX86_BUILTIN_VPSHAW,
24011 IX86_BUILTIN_VPSHAD,
24012 IX86_BUILTIN_VPSHAQ,
24014 IX86_BUILTIN_VFRCZSS,
24015 IX86_BUILTIN_VFRCZSD,
24016 IX86_BUILTIN_VFRCZPS,
24017 IX86_BUILTIN_VFRCZPD,
24018 IX86_BUILTIN_VFRCZPS256,
24019 IX86_BUILTIN_VFRCZPD256,
24021 IX86_BUILTIN_VPCOMEQUB,
24022 IX86_BUILTIN_VPCOMNEUB,
24023 IX86_BUILTIN_VPCOMLTUB,
24024 IX86_BUILTIN_VPCOMLEUB,
24025 IX86_BUILTIN_VPCOMGTUB,
24026 IX86_BUILTIN_VPCOMGEUB,
24027 IX86_BUILTIN_VPCOMFALSEUB,
24028 IX86_BUILTIN_VPCOMTRUEUB,
24030 IX86_BUILTIN_VPCOMEQUW,
24031 IX86_BUILTIN_VPCOMNEUW,
24032 IX86_BUILTIN_VPCOMLTUW,
24033 IX86_BUILTIN_VPCOMLEUW,
24034 IX86_BUILTIN_VPCOMGTUW,
24035 IX86_BUILTIN_VPCOMGEUW,
24036 IX86_BUILTIN_VPCOMFALSEUW,
24037 IX86_BUILTIN_VPCOMTRUEUW,
24039 IX86_BUILTIN_VPCOMEQUD,
24040 IX86_BUILTIN_VPCOMNEUD,
24041 IX86_BUILTIN_VPCOMLTUD,
24042 IX86_BUILTIN_VPCOMLEUD,
24043 IX86_BUILTIN_VPCOMGTUD,
24044 IX86_BUILTIN_VPCOMGEUD,
24045 IX86_BUILTIN_VPCOMFALSEUD,
24046 IX86_BUILTIN_VPCOMTRUEUD,
24048 IX86_BUILTIN_VPCOMEQUQ,
24049 IX86_BUILTIN_VPCOMNEUQ,
24050 IX86_BUILTIN_VPCOMLTUQ,
24051 IX86_BUILTIN_VPCOMLEUQ,
24052 IX86_BUILTIN_VPCOMGTUQ,
24053 IX86_BUILTIN_VPCOMGEUQ,
24054 IX86_BUILTIN_VPCOMFALSEUQ,
24055 IX86_BUILTIN_VPCOMTRUEUQ,
24057 IX86_BUILTIN_VPCOMEQB,
24058 IX86_BUILTIN_VPCOMNEB,
24059 IX86_BUILTIN_VPCOMLTB,
24060 IX86_BUILTIN_VPCOMLEB,
24061 IX86_BUILTIN_VPCOMGTB,
24062 IX86_BUILTIN_VPCOMGEB,
24063 IX86_BUILTIN_VPCOMFALSEB,
24064 IX86_BUILTIN_VPCOMTRUEB,
24066 IX86_BUILTIN_VPCOMEQW,
24067 IX86_BUILTIN_VPCOMNEW,
24068 IX86_BUILTIN_VPCOMLTW,
24069 IX86_BUILTIN_VPCOMLEW,
24070 IX86_BUILTIN_VPCOMGTW,
24071 IX86_BUILTIN_VPCOMGEW,
24072 IX86_BUILTIN_VPCOMFALSEW,
24073 IX86_BUILTIN_VPCOMTRUEW,
24075 IX86_BUILTIN_VPCOMEQD,
24076 IX86_BUILTIN_VPCOMNED,
24077 IX86_BUILTIN_VPCOMLTD,
24078 IX86_BUILTIN_VPCOMLED,
24079 IX86_BUILTIN_VPCOMGTD,
24080 IX86_BUILTIN_VPCOMGED,
24081 IX86_BUILTIN_VPCOMFALSED,
24082 IX86_BUILTIN_VPCOMTRUED,
24084 IX86_BUILTIN_VPCOMEQQ,
24085 IX86_BUILTIN_VPCOMNEQ,
24086 IX86_BUILTIN_VPCOMLTQ,
24087 IX86_BUILTIN_VPCOMLEQ,
24088 IX86_BUILTIN_VPCOMGTQ,
24089 IX86_BUILTIN_VPCOMGEQ,
24090 IX86_BUILTIN_VPCOMFALSEQ,
24091 IX86_BUILTIN_VPCOMTRUEQ,
24093 /* LWP instructions. */
24094 IX86_BUILTIN_LLWPCB,
24095 IX86_BUILTIN_SLWPCB,
24096 IX86_BUILTIN_LWPVAL32,
24097 IX86_BUILTIN_LWPVAL64,
24098 IX86_BUILTIN_LWPINS32,
24099 IX86_BUILTIN_LWPINS64,
24103 /* BMI instructions. */
24104 IX86_BUILTIN_BEXTR32,
24105 IX86_BUILTIN_BEXTR64,
24108 /* TBM instructions. */
24109 IX86_BUILTIN_BEXTRI32,
24110 IX86_BUILTIN_BEXTRI64,
24113 /* FSGSBASE instructions. */
24114 IX86_BUILTIN_RDFSBASE32,
24115 IX86_BUILTIN_RDFSBASE64,
24116 IX86_BUILTIN_RDGSBASE32,
24117 IX86_BUILTIN_RDGSBASE64,
24118 IX86_BUILTIN_WRFSBASE32,
24119 IX86_BUILTIN_WRFSBASE64,
24120 IX86_BUILTIN_WRGSBASE32,
24121 IX86_BUILTIN_WRGSBASE64,
24123 /* RDRND instructions. */
24124 IX86_BUILTIN_RDRAND16,
24125 IX86_BUILTIN_RDRAND32,
24126 IX86_BUILTIN_RDRAND64,
24128 /* F16C instructions. */
24129 IX86_BUILTIN_CVTPH2PS,
24130 IX86_BUILTIN_CVTPH2PS256,
24131 IX86_BUILTIN_CVTPS2PH,
24132 IX86_BUILTIN_CVTPS2PH256,
24137 /* Table for the ix86 builtin decls. */
24138 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24140 /* Table of all of the builtin functions that are possible with different ISA's
24141 but are waiting to be built until a function is declared to use that
24143 struct builtin_isa {
24144 const char *name; /* function name */
24145 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24146 int isa; /* isa_flags this builtin is defined for */
24147 bool const_p; /* true if the declaration is constant */
24148 bool set_and_not_built_p;
24151 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24154 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24155 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24156 function decl in the ix86_builtins array. Returns the function decl or
24157 NULL_TREE, if the builtin was not added.
24159 If the front end has a special hook for builtin functions, delay adding
24160 builtin functions that aren't in the current ISA until the ISA is changed
24161 with function specific optimization. Doing so, can save about 300K for the
24162 default compiler. When the builtin is expanded, check at that time whether
24165 If the front end doesn't have a special hook, record all builtins, even if
24166 it isn't an instruction set in the current ISA in case the user uses
24167 function specific options for a different ISA, so that we don't get scope
24168 errors if a builtin is added in the middle of a function scope. */
24171 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24172 enum ix86_builtins code)
24174 tree decl = NULL_TREE;
24176 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24178 ix86_builtins_isa[(int) code].isa = mask;
24180 mask &= ~OPTION_MASK_ISA_64BIT;
24182 || (mask & ix86_isa_flags) != 0
24183 || (lang_hooks.builtin_function
24184 == lang_hooks.builtin_function_ext_scope))
24187 tree type = ix86_get_builtin_func_type (tcode);
24188 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24190 ix86_builtins[(int) code] = decl;
24191 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24195 ix86_builtins[(int) code] = NULL_TREE;
24196 ix86_builtins_isa[(int) code].tcode = tcode;
24197 ix86_builtins_isa[(int) code].name = name;
24198 ix86_builtins_isa[(int) code].const_p = false;
24199 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24206 /* Like def_builtin, but also marks the function decl "const". */
24209 def_builtin_const (int mask, const char *name,
24210 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24212 tree decl = def_builtin (mask, name, tcode, code);
24214 TREE_READONLY (decl) = 1;
24216 ix86_builtins_isa[(int) code].const_p = true;
24221 /* Add any new builtin functions for a given ISA that may not have been
24222 declared. This saves a bit of space compared to adding all of the
24223 declarations to the tree, even if we didn't use them. */
24226 ix86_add_new_builtins (int isa)
24230 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24232 if ((ix86_builtins_isa[i].isa & isa) != 0
24233 && ix86_builtins_isa[i].set_and_not_built_p)
24237 /* Don't define the builtin again. */
24238 ix86_builtins_isa[i].set_and_not_built_p = false;
24240 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24241 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24242 type, i, BUILT_IN_MD, NULL,
24245 ix86_builtins[i] = decl;
24246 if (ix86_builtins_isa[i].const_p)
24247 TREE_READONLY (decl) = 1;
24252 /* Bits for builtin_description.flag. */
24254 /* Set when we don't support the comparison natively, and should
24255 swap_comparison in order to support it. */
24256 #define BUILTIN_DESC_SWAP_OPERANDS 1
24258 struct builtin_description
24260 const unsigned int mask;
24261 const enum insn_code icode;
24262 const char *const name;
24263 const enum ix86_builtins code;
24264 const enum rtx_code comparison;
24268 static const struct builtin_description bdesc_comi[] =
24270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24296 static const struct builtin_description bdesc_pcmpestr[] =
24299 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24300 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24301 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24302 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24303 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24304 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24305 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24308 static const struct builtin_description bdesc_pcmpistr[] =
24311 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24312 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24313 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24314 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24315 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24316 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24317 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24320 /* Special builtins with variable number of arguments. */
24321 static const struct builtin_description bdesc_special_args[] =
24323 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24324 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24330 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24342 /* SSE or 3DNow!A */
24343 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24344 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24361 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24367 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24368 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24374 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24375 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24376 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
24393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
24394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
24395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
24396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
24397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
24398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
24399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
24401 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24402 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24403 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24404 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24405 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24406 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24409 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24410 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24411 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24412 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24413 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24414 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24415 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24416 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24419 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
24420 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24421 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24424 /* Builtins with variable number of arguments. */
24425 static const struct builtin_description bdesc_args[] =
24427 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24428 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24429 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24430 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24431 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24432 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24433 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24436 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24437 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24438 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24444 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24449 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24453 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24456 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24457 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24460 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24461 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24462 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24488 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24500 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24501 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24502 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24503 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24505 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24506 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24507 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24508 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24509 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24510 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24511 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24512 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24513 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24514 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24515 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24516 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24517 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24518 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24519 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24522 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24523 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24524 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24525 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24526 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24527 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24532 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24534 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24538 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24541 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24545 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24546 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24547 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24557 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24577 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24578 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24582 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24584 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24585 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24587 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24592 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24593 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24597 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24599 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24605 /* SSE MMX or 3Dnow!A */
24606 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24607 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24608 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24610 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24611 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24612 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24613 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24615 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24616 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24618 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24624 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24627 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24633 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24634 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24653 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24654 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24660 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24661 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24662 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24663 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24691 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24695 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24697 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24698 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24700 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24703 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24704 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24706 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24708 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24709 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24710 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24711 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24712 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24713 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24714 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24715 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24726 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24727 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24729 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24731 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24732 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24744 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24745 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24749 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24750 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24751 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24752 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24753 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24754 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24755 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24756 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24762 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24771 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24776 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24777 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24778 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24779 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24780 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24781 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24784 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24785 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24786 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24787 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24788 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24789 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24791 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24792 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24793 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24794 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24802 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24803 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24808 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24809 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24812 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24813 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24815 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24816 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24817 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24818 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24819 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24820 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24823 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24824 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24825 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24826 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24827 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24828 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24830 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24831 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24832 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24833 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24834 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24835 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24836 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24837 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24838 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24839 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24840 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24841 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24842 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24843 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24844 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24845 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24846 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24847 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24848 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24849 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24850 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24851 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24852 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24853 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24856 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24857 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24860 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24861 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24862 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24863 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24864 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24865 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24866 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24867 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24868 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24869 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24871 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24872 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24873 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24874 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24875 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24876 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24877 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24879 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24880 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24881 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24882 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24883 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24885 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24886 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24887 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24888 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24889 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24890 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24891 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24892 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24893 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24894 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24895 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24899 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24900 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24901 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24902 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24904 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24905 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24906 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24909 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24910 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24911 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24912 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24913 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24916 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24917 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24918 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24919 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24922 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24923 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24925 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24926 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24927 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24928 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24931 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24934 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24935 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24938 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24939 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24942 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24948 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24949 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24950 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24951 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24952 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24953 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24954 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24955 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24956 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24957 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24958 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24959 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25005 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25007 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25009 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25024 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25025 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25026 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25047 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25048 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25050 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25053 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25054 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25055 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25058 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25059 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25062 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25063 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25064 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25065 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25068 /* FMA4 and XOP. */
25069 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25070 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25071 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25072 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25073 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25074 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25075 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25076 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25077 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25078 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25079 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25080 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25081 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25082 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25083 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25084 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25085 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25086 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25087 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25088 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25089 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25090 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25091 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25092 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25093 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25094 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25095 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25096 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25097 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25098 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25099 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25100 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25101 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25102 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25103 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25104 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25105 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25106 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25107 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25108 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25109 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25110 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25111 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25112 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25113 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25114 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25115 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25116 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25117 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25118 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25119 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25120 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25122 static const struct builtin_description bdesc_multi_arg[] =
25124 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25125 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25126 UNKNOWN, (int)MULTI_ARG_3_SF },
25127 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25128 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25129 UNKNOWN, (int)MULTI_ARG_3_DF },
25131 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25132 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25133 UNKNOWN, (int)MULTI_ARG_3_SF },
25134 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25135 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25136 UNKNOWN, (int)MULTI_ARG_3_DF },
25137 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25138 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25139 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25140 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25141 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25142 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25144 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25145 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25146 UNKNOWN, (int)MULTI_ARG_3_SF },
25147 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25148 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25149 UNKNOWN, (int)MULTI_ARG_3_DF },
25150 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25151 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25152 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25153 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25154 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25155 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25317 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25318 in the current target ISA to allow the user to compile particular modules
25319 with different target specific options that differ from the command line
25322 ix86_init_mmx_sse_builtins (void)
25324 const struct builtin_description * d;
25325 enum ix86_builtin_func_type ftype;
25328 /* Add all special builtins with variable number of operands. */
25329 for (i = 0, d = bdesc_special_args;
25330 i < ARRAY_SIZE (bdesc_special_args);
25336 ftype = (enum ix86_builtin_func_type) d->flag;
25337 def_builtin (d->mask, d->name, ftype, d->code);
25340 /* Add all builtins with variable number of operands. */
25341 for (i = 0, d = bdesc_args;
25342 i < ARRAY_SIZE (bdesc_args);
25348 ftype = (enum ix86_builtin_func_type) d->flag;
25349 def_builtin_const (d->mask, d->name, ftype, d->code);
25352 /* pcmpestr[im] insns. */
25353 for (i = 0, d = bdesc_pcmpestr;
25354 i < ARRAY_SIZE (bdesc_pcmpestr);
25357 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25358 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25360 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25361 def_builtin_const (d->mask, d->name, ftype, d->code);
25364 /* pcmpistr[im] insns. */
25365 for (i = 0, d = bdesc_pcmpistr;
25366 i < ARRAY_SIZE (bdesc_pcmpistr);
25369 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25370 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25372 ftype = INT_FTYPE_V16QI_V16QI_INT;
25373 def_builtin_const (d->mask, d->name, ftype, d->code);
25376 /* comi/ucomi insns. */
25377 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25379 if (d->mask == OPTION_MASK_ISA_SSE2)
25380 ftype = INT_FTYPE_V2DF_V2DF;
25382 ftype = INT_FTYPE_V4SF_V4SF;
25383 def_builtin_const (d->mask, d->name, ftype, d->code);
25387 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25388 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25389 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25390 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25392 /* SSE or 3DNow!A */
25393 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25394 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25395 IX86_BUILTIN_MASKMOVQ);
25398 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25399 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25401 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25402 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25403 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25404 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25407 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25408 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25409 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25410 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25413 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25414 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25415 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25416 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25417 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25418 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25419 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25420 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25421 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25422 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25423 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25424 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25427 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25428 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25430 /* MMX access to the vec_init patterns. */
25431 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25432 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25434 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25435 V4HI_FTYPE_HI_HI_HI_HI,
25436 IX86_BUILTIN_VEC_INIT_V4HI);
25438 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25439 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25440 IX86_BUILTIN_VEC_INIT_V8QI);
25442 /* Access to the vec_extract patterns. */
25443 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25444 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25445 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25446 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25447 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25448 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25449 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25450 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25451 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25452 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25454 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25455 "__builtin_ia32_vec_ext_v4hi",
25456 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25458 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25459 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25461 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25462 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25464 /* Access to the vec_set patterns. */
25465 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25466 "__builtin_ia32_vec_set_v2di",
25467 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25469 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25470 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25472 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25473 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25475 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25476 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25478 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25479 "__builtin_ia32_vec_set_v4hi",
25480 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25482 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25483 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25485 /* Add FMA4 multi-arg argument instructions */
25486 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25491 ftype = (enum ix86_builtin_func_type) d->flag;
25492 def_builtin_const (d->mask, d->name, ftype, d->code);
25496 /* Internal method for ix86_init_builtins. */
25499 ix86_init_builtins_va_builtins_abi (void)
25501 tree ms_va_ref, sysv_va_ref;
25502 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25503 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25504 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25505 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25509 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25510 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25511 ms_va_ref = build_reference_type (ms_va_list_type_node);
25513 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25516 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25517 fnvoid_va_start_ms =
25518 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25519 fnvoid_va_end_sysv =
25520 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25521 fnvoid_va_start_sysv =
25522 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25524 fnvoid_va_copy_ms =
25525 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25527 fnvoid_va_copy_sysv =
25528 build_function_type_list (void_type_node, sysv_va_ref,
25529 sysv_va_ref, NULL_TREE);
25531 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25532 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25533 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25534 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25535 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25536 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25537 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25538 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25539 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25540 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25541 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25542 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25546 ix86_init_builtin_types (void)
25548 tree float128_type_node, float80_type_node;
25550 /* The __float80 type. */
25551 float80_type_node = long_double_type_node;
25552 if (TYPE_MODE (float80_type_node) != XFmode)
25554 /* The __float80 type. */
25555 float80_type_node = make_node (REAL_TYPE);
25557 TYPE_PRECISION (float80_type_node) = 80;
25558 layout_type (float80_type_node);
25560 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25562 /* The __float128 type. */
25563 float128_type_node = make_node (REAL_TYPE);
25564 TYPE_PRECISION (float128_type_node) = 128;
25565 layout_type (float128_type_node);
25566 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25568 /* This macro is built by i386-builtin-types.awk. */
25569 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25573 ix86_init_builtins (void)
25577 ix86_init_builtin_types ();
25579 /* TFmode support builtins. */
25580 def_builtin_const (0, "__builtin_infq",
25581 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25582 def_builtin_const (0, "__builtin_huge_valq",
25583 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25585 /* We will expand them to normal call if SSE2 isn't available since
25586 they are used by libgcc. */
25587 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25588 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25589 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25590 TREE_READONLY (t) = 1;
25591 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25593 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25594 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25595 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25596 TREE_READONLY (t) = 1;
25597 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25599 ix86_init_mmx_sse_builtins ();
25602 ix86_init_builtins_va_builtins_abi ();
25604 #ifdef SUBTARGET_INIT_BUILTINS
25605 SUBTARGET_INIT_BUILTINS;
25609 /* Return the ix86 builtin for CODE. */
25612 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25614 if (code >= IX86_BUILTIN_MAX)
25615 return error_mark_node;
25617 return ix86_builtins[code];
25620 /* Errors in the source file can cause expand_expr to return const0_rtx
25621 where we expect a vector. To avoid crashing, use one of the vector
25622 clear instructions. */
25624 safe_vector_operand (rtx x, enum machine_mode mode)
25626 if (x == const0_rtx)
25627 x = CONST0_RTX (mode);
25631 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25634 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25637 tree arg0 = CALL_EXPR_ARG (exp, 0);
25638 tree arg1 = CALL_EXPR_ARG (exp, 1);
25639 rtx op0 = expand_normal (arg0);
25640 rtx op1 = expand_normal (arg1);
25641 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25642 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25643 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25645 if (VECTOR_MODE_P (mode0))
25646 op0 = safe_vector_operand (op0, mode0);
25647 if (VECTOR_MODE_P (mode1))
25648 op1 = safe_vector_operand (op1, mode1);
25650 if (optimize || !target
25651 || GET_MODE (target) != tmode
25652 || !insn_data[icode].operand[0].predicate (target, tmode))
25653 target = gen_reg_rtx (tmode);
25655 if (GET_MODE (op1) == SImode && mode1 == TImode)
25657 rtx x = gen_reg_rtx (V4SImode);
25658 emit_insn (gen_sse2_loadd (x, op1));
25659 op1 = gen_lowpart (TImode, x);
25662 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25663 op0 = copy_to_mode_reg (mode0, op0);
25664 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25665 op1 = copy_to_mode_reg (mode1, op1);
25667 pat = GEN_FCN (icode) (target, op0, op1);
25676 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25679 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25680 enum ix86_builtin_func_type m_type,
25681 enum rtx_code sub_code)
25686 bool comparison_p = false;
25688 bool last_arg_constant = false;
25689 int num_memory = 0;
25692 enum machine_mode mode;
25695 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25699 case MULTI_ARG_4_DF2_DI_I:
25700 case MULTI_ARG_4_DF2_DI_I1:
25701 case MULTI_ARG_4_SF2_SI_I:
25702 case MULTI_ARG_4_SF2_SI_I1:
25704 last_arg_constant = true;
25707 case MULTI_ARG_3_SF:
25708 case MULTI_ARG_3_DF:
25709 case MULTI_ARG_3_SF2:
25710 case MULTI_ARG_3_DF2:
25711 case MULTI_ARG_3_DI:
25712 case MULTI_ARG_3_SI:
25713 case MULTI_ARG_3_SI_DI:
25714 case MULTI_ARG_3_HI:
25715 case MULTI_ARG_3_HI_SI:
25716 case MULTI_ARG_3_QI:
25717 case MULTI_ARG_3_DI2:
25718 case MULTI_ARG_3_SI2:
25719 case MULTI_ARG_3_HI2:
25720 case MULTI_ARG_3_QI2:
25724 case MULTI_ARG_2_SF:
25725 case MULTI_ARG_2_DF:
25726 case MULTI_ARG_2_DI:
25727 case MULTI_ARG_2_SI:
25728 case MULTI_ARG_2_HI:
25729 case MULTI_ARG_2_QI:
25733 case MULTI_ARG_2_DI_IMM:
25734 case MULTI_ARG_2_SI_IMM:
25735 case MULTI_ARG_2_HI_IMM:
25736 case MULTI_ARG_2_QI_IMM:
25738 last_arg_constant = true;
25741 case MULTI_ARG_1_SF:
25742 case MULTI_ARG_1_DF:
25743 case MULTI_ARG_1_SF2:
25744 case MULTI_ARG_1_DF2:
25745 case MULTI_ARG_1_DI:
25746 case MULTI_ARG_1_SI:
25747 case MULTI_ARG_1_HI:
25748 case MULTI_ARG_1_QI:
25749 case MULTI_ARG_1_SI_DI:
25750 case MULTI_ARG_1_HI_DI:
25751 case MULTI_ARG_1_HI_SI:
25752 case MULTI_ARG_1_QI_DI:
25753 case MULTI_ARG_1_QI_SI:
25754 case MULTI_ARG_1_QI_HI:
25758 case MULTI_ARG_2_DI_CMP:
25759 case MULTI_ARG_2_SI_CMP:
25760 case MULTI_ARG_2_HI_CMP:
25761 case MULTI_ARG_2_QI_CMP:
25763 comparison_p = true;
25766 case MULTI_ARG_2_SF_TF:
25767 case MULTI_ARG_2_DF_TF:
25768 case MULTI_ARG_2_DI_TF:
25769 case MULTI_ARG_2_SI_TF:
25770 case MULTI_ARG_2_HI_TF:
25771 case MULTI_ARG_2_QI_TF:
25777 gcc_unreachable ();
25780 if (optimize || !target
25781 || GET_MODE (target) != tmode
25782 || !insn_data[icode].operand[0].predicate (target, tmode))
25783 target = gen_reg_rtx (tmode);
25785 gcc_assert (nargs <= 4);
25787 for (i = 0; i < nargs; i++)
25789 tree arg = CALL_EXPR_ARG (exp, i);
25790 rtx op = expand_normal (arg);
25791 int adjust = (comparison_p) ? 1 : 0;
25792 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25794 if (last_arg_constant && i == nargs-1)
25796 if (!CONST_INT_P (op))
25798 error ("last argument must be an immediate");
25799 return gen_reg_rtx (tmode);
25804 if (VECTOR_MODE_P (mode))
25805 op = safe_vector_operand (op, mode);
25807 /* If we aren't optimizing, only allow one memory operand to be
25809 if (memory_operand (op, mode))
25812 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25815 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25817 op = force_reg (mode, op);
25821 args[i].mode = mode;
25827 pat = GEN_FCN (icode) (target, args[0].op);
25832 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25833 GEN_INT ((int)sub_code));
25834 else if (! comparison_p)
25835 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25838 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25842 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25847 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25851 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25855 gcc_unreachable ();
25865 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25866 insns with vec_merge. */
25869 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25873 tree arg0 = CALL_EXPR_ARG (exp, 0);
25874 rtx op1, op0 = expand_normal (arg0);
25875 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25876 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25878 if (optimize || !target
25879 || GET_MODE (target) != tmode
25880 || !insn_data[icode].operand[0].predicate (target, tmode))
25881 target = gen_reg_rtx (tmode);
25883 if (VECTOR_MODE_P (mode0))
25884 op0 = safe_vector_operand (op0, mode0);
25886 if ((optimize && !register_operand (op0, mode0))
25887 || !insn_data[icode].operand[1].predicate (op0, mode0))
25888 op0 = copy_to_mode_reg (mode0, op0);
25891 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25892 op1 = copy_to_mode_reg (mode0, op1);
25894 pat = GEN_FCN (icode) (target, op0, op1);
25901 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25904 ix86_expand_sse_compare (const struct builtin_description *d,
25905 tree exp, rtx target, bool swap)
25908 tree arg0 = CALL_EXPR_ARG (exp, 0);
25909 tree arg1 = CALL_EXPR_ARG (exp, 1);
25910 rtx op0 = expand_normal (arg0);
25911 rtx op1 = expand_normal (arg1);
25913 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25914 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25915 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25916 enum rtx_code comparison = d->comparison;
25918 if (VECTOR_MODE_P (mode0))
25919 op0 = safe_vector_operand (op0, mode0);
25920 if (VECTOR_MODE_P (mode1))
25921 op1 = safe_vector_operand (op1, mode1);
25923 /* Swap operands if we have a comparison that isn't available in
25927 rtx tmp = gen_reg_rtx (mode1);
25928 emit_move_insn (tmp, op1);
25933 if (optimize || !target
25934 || GET_MODE (target) != tmode
25935 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25936 target = gen_reg_rtx (tmode);
25938 if ((optimize && !register_operand (op0, mode0))
25939 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25940 op0 = copy_to_mode_reg (mode0, op0);
25941 if ((optimize && !register_operand (op1, mode1))
25942 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25943 op1 = copy_to_mode_reg (mode1, op1);
25945 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25946 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25953 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25956 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25960 tree arg0 = CALL_EXPR_ARG (exp, 0);
25961 tree arg1 = CALL_EXPR_ARG (exp, 1);
25962 rtx op0 = expand_normal (arg0);
25963 rtx op1 = expand_normal (arg1);
25964 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25965 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25966 enum rtx_code comparison = d->comparison;
25968 if (VECTOR_MODE_P (mode0))
25969 op0 = safe_vector_operand (op0, mode0);
25970 if (VECTOR_MODE_P (mode1))
25971 op1 = safe_vector_operand (op1, mode1);
25973 /* Swap operands if we have a comparison that isn't available in
25975 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25982 target = gen_reg_rtx (SImode);
25983 emit_move_insn (target, const0_rtx);
25984 target = gen_rtx_SUBREG (QImode, target, 0);
25986 if ((optimize && !register_operand (op0, mode0))
25987 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25988 op0 = copy_to_mode_reg (mode0, op0);
25989 if ((optimize && !register_operand (op1, mode1))
25990 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25991 op1 = copy_to_mode_reg (mode1, op1);
25993 pat = GEN_FCN (d->icode) (op0, op1);
25997 emit_insn (gen_rtx_SET (VOIDmode,
25998 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25999 gen_rtx_fmt_ee (comparison, QImode,
26003 return SUBREG_REG (target);
26006 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26009 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26013 tree arg0 = CALL_EXPR_ARG (exp, 0);
26014 tree arg1 = CALL_EXPR_ARG (exp, 1);
26015 rtx op0 = expand_normal (arg0);
26016 rtx op1 = expand_normal (arg1);
26017 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26018 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26019 enum rtx_code comparison = d->comparison;
26021 if (VECTOR_MODE_P (mode0))
26022 op0 = safe_vector_operand (op0, mode0);
26023 if (VECTOR_MODE_P (mode1))
26024 op1 = safe_vector_operand (op1, mode1);
26026 target = gen_reg_rtx (SImode);
26027 emit_move_insn (target, const0_rtx);
26028 target = gen_rtx_SUBREG (QImode, target, 0);
26030 if ((optimize && !register_operand (op0, mode0))
26031 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26032 op0 = copy_to_mode_reg (mode0, op0);
26033 if ((optimize && !register_operand (op1, mode1))
26034 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26035 op1 = copy_to_mode_reg (mode1, op1);
26037 pat = GEN_FCN (d->icode) (op0, op1);
26041 emit_insn (gen_rtx_SET (VOIDmode,
26042 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26043 gen_rtx_fmt_ee (comparison, QImode,
26047 return SUBREG_REG (target);
26050 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26053 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26054 tree exp, rtx target)
26057 tree arg0 = CALL_EXPR_ARG (exp, 0);
26058 tree arg1 = CALL_EXPR_ARG (exp, 1);
26059 tree arg2 = CALL_EXPR_ARG (exp, 2);
26060 tree arg3 = CALL_EXPR_ARG (exp, 3);
26061 tree arg4 = CALL_EXPR_ARG (exp, 4);
26062 rtx scratch0, scratch1;
26063 rtx op0 = expand_normal (arg0);
26064 rtx op1 = expand_normal (arg1);
26065 rtx op2 = expand_normal (arg2);
26066 rtx op3 = expand_normal (arg3);
26067 rtx op4 = expand_normal (arg4);
26068 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26070 tmode0 = insn_data[d->icode].operand[0].mode;
26071 tmode1 = insn_data[d->icode].operand[1].mode;
26072 modev2 = insn_data[d->icode].operand[2].mode;
26073 modei3 = insn_data[d->icode].operand[3].mode;
26074 modev4 = insn_data[d->icode].operand[4].mode;
26075 modei5 = insn_data[d->icode].operand[5].mode;
26076 modeimm = insn_data[d->icode].operand[6].mode;
26078 if (VECTOR_MODE_P (modev2))
26079 op0 = safe_vector_operand (op0, modev2);
26080 if (VECTOR_MODE_P (modev4))
26081 op2 = safe_vector_operand (op2, modev4);
26083 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26084 op0 = copy_to_mode_reg (modev2, op0);
26085 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26086 op1 = copy_to_mode_reg (modei3, op1);
26087 if ((optimize && !register_operand (op2, modev4))
26088 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26089 op2 = copy_to_mode_reg (modev4, op2);
26090 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26091 op3 = copy_to_mode_reg (modei5, op3);
26093 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26095 error ("the fifth argument must be a 8-bit immediate");
26099 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26101 if (optimize || !target
26102 || GET_MODE (target) != tmode0
26103 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26104 target = gen_reg_rtx (tmode0);
26106 scratch1 = gen_reg_rtx (tmode1);
26108 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26110 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26112 if (optimize || !target
26113 || GET_MODE (target) != tmode1
26114 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26115 target = gen_reg_rtx (tmode1);
26117 scratch0 = gen_reg_rtx (tmode0);
26119 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26123 gcc_assert (d->flag);
26125 scratch0 = gen_reg_rtx (tmode0);
26126 scratch1 = gen_reg_rtx (tmode1);
26128 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26138 target = gen_reg_rtx (SImode);
26139 emit_move_insn (target, const0_rtx);
26140 target = gen_rtx_SUBREG (QImode, target, 0);
26143 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26144 gen_rtx_fmt_ee (EQ, QImode,
26145 gen_rtx_REG ((enum machine_mode) d->flag,
26148 return SUBREG_REG (target);
26155 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26158 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26159 tree exp, rtx target)
26162 tree arg0 = CALL_EXPR_ARG (exp, 0);
26163 tree arg1 = CALL_EXPR_ARG (exp, 1);
26164 tree arg2 = CALL_EXPR_ARG (exp, 2);
26165 rtx scratch0, scratch1;
26166 rtx op0 = expand_normal (arg0);
26167 rtx op1 = expand_normal (arg1);
26168 rtx op2 = expand_normal (arg2);
26169 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26171 tmode0 = insn_data[d->icode].operand[0].mode;
26172 tmode1 = insn_data[d->icode].operand[1].mode;
26173 modev2 = insn_data[d->icode].operand[2].mode;
26174 modev3 = insn_data[d->icode].operand[3].mode;
26175 modeimm = insn_data[d->icode].operand[4].mode;
26177 if (VECTOR_MODE_P (modev2))
26178 op0 = safe_vector_operand (op0, modev2);
26179 if (VECTOR_MODE_P (modev3))
26180 op1 = safe_vector_operand (op1, modev3);
26182 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26183 op0 = copy_to_mode_reg (modev2, op0);
26184 if ((optimize && !register_operand (op1, modev3))
26185 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26186 op1 = copy_to_mode_reg (modev3, op1);
26188 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26190 error ("the third argument must be a 8-bit immediate");
26194 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26196 if (optimize || !target
26197 || GET_MODE (target) != tmode0
26198 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26199 target = gen_reg_rtx (tmode0);
26201 scratch1 = gen_reg_rtx (tmode1);
26203 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26205 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26207 if (optimize || !target
26208 || GET_MODE (target) != tmode1
26209 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26210 target = gen_reg_rtx (tmode1);
26212 scratch0 = gen_reg_rtx (tmode0);
26214 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26218 gcc_assert (d->flag);
26220 scratch0 = gen_reg_rtx (tmode0);
26221 scratch1 = gen_reg_rtx (tmode1);
26223 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26233 target = gen_reg_rtx (SImode);
26234 emit_move_insn (target, const0_rtx);
26235 target = gen_rtx_SUBREG (QImode, target, 0);
26238 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26239 gen_rtx_fmt_ee (EQ, QImode,
26240 gen_rtx_REG ((enum machine_mode) d->flag,
26243 return SUBREG_REG (target);
26249 /* Subroutine of ix86_expand_builtin to take care of insns with
26250 variable number of operands. */
26253 ix86_expand_args_builtin (const struct builtin_description *d,
26254 tree exp, rtx target)
26256 rtx pat, real_target;
26257 unsigned int i, nargs;
26258 unsigned int nargs_constant = 0;
26259 int num_memory = 0;
26263 enum machine_mode mode;
26265 bool last_arg_count = false;
26266 enum insn_code icode = d->icode;
26267 const struct insn_data_d *insn_p = &insn_data[icode];
26268 enum machine_mode tmode = insn_p->operand[0].mode;
26269 enum machine_mode rmode = VOIDmode;
26271 enum rtx_code comparison = d->comparison;
26273 switch ((enum ix86_builtin_func_type) d->flag)
26275 case INT_FTYPE_V8SF_V8SF_PTEST:
26276 case INT_FTYPE_V4DI_V4DI_PTEST:
26277 case INT_FTYPE_V4DF_V4DF_PTEST:
26278 case INT_FTYPE_V4SF_V4SF_PTEST:
26279 case INT_FTYPE_V2DI_V2DI_PTEST:
26280 case INT_FTYPE_V2DF_V2DF_PTEST:
26281 return ix86_expand_sse_ptest (d, exp, target);
26282 case FLOAT128_FTYPE_FLOAT128:
26283 case FLOAT_FTYPE_FLOAT:
26284 case INT_FTYPE_INT:
26285 case UINT64_FTYPE_INT:
26286 case UINT16_FTYPE_UINT16:
26287 case INT64_FTYPE_INT64:
26288 case INT64_FTYPE_V4SF:
26289 case INT64_FTYPE_V2DF:
26290 case INT_FTYPE_V16QI:
26291 case INT_FTYPE_V8QI:
26292 case INT_FTYPE_V8SF:
26293 case INT_FTYPE_V4DF:
26294 case INT_FTYPE_V4SF:
26295 case INT_FTYPE_V2DF:
26296 case V16QI_FTYPE_V16QI:
26297 case V8SI_FTYPE_V8SF:
26298 case V8SI_FTYPE_V4SI:
26299 case V8HI_FTYPE_V8HI:
26300 case V8HI_FTYPE_V16QI:
26301 case V8QI_FTYPE_V8QI:
26302 case V8SF_FTYPE_V8SF:
26303 case V8SF_FTYPE_V8SI:
26304 case V8SF_FTYPE_V4SF:
26305 case V8SF_FTYPE_V8HI:
26306 case V4SI_FTYPE_V4SI:
26307 case V4SI_FTYPE_V16QI:
26308 case V4SI_FTYPE_V4SF:
26309 case V4SI_FTYPE_V8SI:
26310 case V4SI_FTYPE_V8HI:
26311 case V4SI_FTYPE_V4DF:
26312 case V4SI_FTYPE_V2DF:
26313 case V4HI_FTYPE_V4HI:
26314 case V4DF_FTYPE_V4DF:
26315 case V4DF_FTYPE_V4SI:
26316 case V4DF_FTYPE_V4SF:
26317 case V4DF_FTYPE_V2DF:
26318 case V4SF_FTYPE_V4SF:
26319 case V4SF_FTYPE_V4SI:
26320 case V4SF_FTYPE_V8SF:
26321 case V4SF_FTYPE_V4DF:
26322 case V4SF_FTYPE_V8HI:
26323 case V4SF_FTYPE_V2DF:
26324 case V2DI_FTYPE_V2DI:
26325 case V2DI_FTYPE_V16QI:
26326 case V2DI_FTYPE_V8HI:
26327 case V2DI_FTYPE_V4SI:
26328 case V2DF_FTYPE_V2DF:
26329 case V2DF_FTYPE_V4SI:
26330 case V2DF_FTYPE_V4DF:
26331 case V2DF_FTYPE_V4SF:
26332 case V2DF_FTYPE_V2SI:
26333 case V2SI_FTYPE_V2SI:
26334 case V2SI_FTYPE_V4SF:
26335 case V2SI_FTYPE_V2SF:
26336 case V2SI_FTYPE_V2DF:
26337 case V2SF_FTYPE_V2SF:
26338 case V2SF_FTYPE_V2SI:
26341 case V4SF_FTYPE_V4SF_VEC_MERGE:
26342 case V2DF_FTYPE_V2DF_VEC_MERGE:
26343 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26344 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26345 case V16QI_FTYPE_V16QI_V16QI:
26346 case V16QI_FTYPE_V8HI_V8HI:
26347 case V8QI_FTYPE_V8QI_V8QI:
26348 case V8QI_FTYPE_V4HI_V4HI:
26349 case V8HI_FTYPE_V8HI_V8HI:
26350 case V8HI_FTYPE_V16QI_V16QI:
26351 case V8HI_FTYPE_V4SI_V4SI:
26352 case V8SF_FTYPE_V8SF_V8SF:
26353 case V8SF_FTYPE_V8SF_V8SI:
26354 case V4SI_FTYPE_V4SI_V4SI:
26355 case V4SI_FTYPE_V8HI_V8HI:
26356 case V4SI_FTYPE_V4SF_V4SF:
26357 case V4SI_FTYPE_V2DF_V2DF:
26358 case V4HI_FTYPE_V4HI_V4HI:
26359 case V4HI_FTYPE_V8QI_V8QI:
26360 case V4HI_FTYPE_V2SI_V2SI:
26361 case V4DF_FTYPE_V4DF_V4DF:
26362 case V4DF_FTYPE_V4DF_V4DI:
26363 case V4SF_FTYPE_V4SF_V4SF:
26364 case V4SF_FTYPE_V4SF_V4SI:
26365 case V4SF_FTYPE_V4SF_V2SI:
26366 case V4SF_FTYPE_V4SF_V2DF:
26367 case V4SF_FTYPE_V4SF_DI:
26368 case V4SF_FTYPE_V4SF_SI:
26369 case V2DI_FTYPE_V2DI_V2DI:
26370 case V2DI_FTYPE_V16QI_V16QI:
26371 case V2DI_FTYPE_V4SI_V4SI:
26372 case V2DI_FTYPE_V2DI_V16QI:
26373 case V2DI_FTYPE_V2DF_V2DF:
26374 case V2SI_FTYPE_V2SI_V2SI:
26375 case V2SI_FTYPE_V4HI_V4HI:
26376 case V2SI_FTYPE_V2SF_V2SF:
26377 case V2DF_FTYPE_V2DF_V2DF:
26378 case V2DF_FTYPE_V2DF_V4SF:
26379 case V2DF_FTYPE_V2DF_V2DI:
26380 case V2DF_FTYPE_V2DF_DI:
26381 case V2DF_FTYPE_V2DF_SI:
26382 case V2SF_FTYPE_V2SF_V2SF:
26383 case V1DI_FTYPE_V1DI_V1DI:
26384 case V1DI_FTYPE_V8QI_V8QI:
26385 case V1DI_FTYPE_V2SI_V2SI:
26386 if (comparison == UNKNOWN)
26387 return ix86_expand_binop_builtin (icode, exp, target);
26390 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26391 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26392 gcc_assert (comparison != UNKNOWN);
26396 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26397 case V8HI_FTYPE_V8HI_SI_COUNT:
26398 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26399 case V4SI_FTYPE_V4SI_SI_COUNT:
26400 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26401 case V4HI_FTYPE_V4HI_SI_COUNT:
26402 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26403 case V2DI_FTYPE_V2DI_SI_COUNT:
26404 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26405 case V2SI_FTYPE_V2SI_SI_COUNT:
26406 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26407 case V1DI_FTYPE_V1DI_SI_COUNT:
26409 last_arg_count = true;
26411 case UINT64_FTYPE_UINT64_UINT64:
26412 case UINT_FTYPE_UINT_UINT:
26413 case UINT_FTYPE_UINT_USHORT:
26414 case UINT_FTYPE_UINT_UCHAR:
26415 case UINT16_FTYPE_UINT16_INT:
26416 case UINT8_FTYPE_UINT8_INT:
26419 case V2DI_FTYPE_V2DI_INT_CONVERT:
26422 nargs_constant = 1;
26424 case V8HI_FTYPE_V8HI_INT:
26425 case V8HI_FTYPE_V8SF_INT:
26426 case V8HI_FTYPE_V4SF_INT:
26427 case V8SF_FTYPE_V8SF_INT:
26428 case V4SI_FTYPE_V4SI_INT:
26429 case V4SI_FTYPE_V8SI_INT:
26430 case V4HI_FTYPE_V4HI_INT:
26431 case V4DF_FTYPE_V4DF_INT:
26432 case V4SF_FTYPE_V4SF_INT:
26433 case V4SF_FTYPE_V8SF_INT:
26434 case V2DI_FTYPE_V2DI_INT:
26435 case V2DF_FTYPE_V2DF_INT:
26436 case V2DF_FTYPE_V4DF_INT:
26438 nargs_constant = 1;
26440 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26441 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26442 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26443 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26444 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26447 case V16QI_FTYPE_V16QI_V16QI_INT:
26448 case V8HI_FTYPE_V8HI_V8HI_INT:
26449 case V8SI_FTYPE_V8SI_V8SI_INT:
26450 case V8SI_FTYPE_V8SI_V4SI_INT:
26451 case V8SF_FTYPE_V8SF_V8SF_INT:
26452 case V8SF_FTYPE_V8SF_V4SF_INT:
26453 case V4SI_FTYPE_V4SI_V4SI_INT:
26454 case V4DF_FTYPE_V4DF_V4DF_INT:
26455 case V4DF_FTYPE_V4DF_V2DF_INT:
26456 case V4SF_FTYPE_V4SF_V4SF_INT:
26457 case V2DI_FTYPE_V2DI_V2DI_INT:
26458 case V2DF_FTYPE_V2DF_V2DF_INT:
26460 nargs_constant = 1;
26462 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26465 nargs_constant = 1;
26467 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26470 nargs_constant = 1;
26472 case V2DI_FTYPE_V2DI_UINT_UINT:
26474 nargs_constant = 2;
26476 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26477 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26478 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26479 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26481 nargs_constant = 1;
26483 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26485 nargs_constant = 2;
26488 gcc_unreachable ();
26491 gcc_assert (nargs <= ARRAY_SIZE (args));
26493 if (comparison != UNKNOWN)
26495 gcc_assert (nargs == 2);
26496 return ix86_expand_sse_compare (d, exp, target, swap);
26499 if (rmode == VOIDmode || rmode == tmode)
26503 || GET_MODE (target) != tmode
26504 || !insn_p->operand[0].predicate (target, tmode))
26505 target = gen_reg_rtx (tmode);
26506 real_target = target;
26510 target = gen_reg_rtx (rmode);
26511 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26514 for (i = 0; i < nargs; i++)
26516 tree arg = CALL_EXPR_ARG (exp, i);
26517 rtx op = expand_normal (arg);
26518 enum machine_mode mode = insn_p->operand[i + 1].mode;
26519 bool match = insn_p->operand[i + 1].predicate (op, mode);
26521 if (last_arg_count && (i + 1) == nargs)
26523 /* SIMD shift insns take either an 8-bit immediate or
26524 register as count. But builtin functions take int as
26525 count. If count doesn't match, we put it in register. */
26528 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26529 if (!insn_p->operand[i + 1].predicate (op, mode))
26530 op = copy_to_reg (op);
26533 else if ((nargs - i) <= nargs_constant)
26538 case CODE_FOR_sse4_1_roundpd:
26539 case CODE_FOR_sse4_1_roundps:
26540 case CODE_FOR_sse4_1_roundsd:
26541 case CODE_FOR_sse4_1_roundss:
26542 case CODE_FOR_sse4_1_blendps:
26543 case CODE_FOR_avx_blendpd256:
26544 case CODE_FOR_avx_vpermilv4df:
26545 case CODE_FOR_avx_roundpd256:
26546 case CODE_FOR_avx_roundps256:
26547 error ("the last argument must be a 4-bit immediate");
26550 case CODE_FOR_sse4_1_blendpd:
26551 case CODE_FOR_avx_vpermilv2df:
26552 case CODE_FOR_xop_vpermil2v2df3:
26553 case CODE_FOR_xop_vpermil2v4sf3:
26554 case CODE_FOR_xop_vpermil2v4df3:
26555 case CODE_FOR_xop_vpermil2v8sf3:
26556 error ("the last argument must be a 2-bit immediate");
26559 case CODE_FOR_avx_vextractf128v4df:
26560 case CODE_FOR_avx_vextractf128v8sf:
26561 case CODE_FOR_avx_vextractf128v8si:
26562 case CODE_FOR_avx_vinsertf128v4df:
26563 case CODE_FOR_avx_vinsertf128v8sf:
26564 case CODE_FOR_avx_vinsertf128v8si:
26565 error ("the last argument must be a 1-bit immediate");
26568 case CODE_FOR_avx_cmpsdv2df3:
26569 case CODE_FOR_avx_cmpssv4sf3:
26570 case CODE_FOR_avx_cmppdv2df3:
26571 case CODE_FOR_avx_cmppsv4sf3:
26572 case CODE_FOR_avx_cmppdv4df3:
26573 case CODE_FOR_avx_cmppsv8sf3:
26574 error ("the last argument must be a 5-bit immediate");
26578 switch (nargs_constant)
26581 if ((nargs - i) == nargs_constant)
26583 error ("the next to last argument must be an 8-bit immediate");
26587 error ("the last argument must be an 8-bit immediate");
26590 gcc_unreachable ();
26597 if (VECTOR_MODE_P (mode))
26598 op = safe_vector_operand (op, mode);
26600 /* If we aren't optimizing, only allow one memory operand to
26602 if (memory_operand (op, mode))
26605 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26607 if (optimize || !match || num_memory > 1)
26608 op = copy_to_mode_reg (mode, op);
26612 op = copy_to_reg (op);
26613 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26618 args[i].mode = mode;
26624 pat = GEN_FCN (icode) (real_target, args[0].op);
26627 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26630 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26634 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26635 args[2].op, args[3].op);
26638 gcc_unreachable ();
26648 /* Subroutine of ix86_expand_builtin to take care of special insns
26649 with variable number of operands. */
26652 ix86_expand_special_args_builtin (const struct builtin_description *d,
26653 tree exp, rtx target)
26657 unsigned int i, nargs, arg_adjust, memory;
26661 enum machine_mode mode;
26663 enum insn_code icode = d->icode;
26664 bool last_arg_constant = false;
26665 const struct insn_data_d *insn_p = &insn_data[icode];
26666 enum machine_mode tmode = insn_p->operand[0].mode;
26667 enum { load, store } klass;
26669 switch ((enum ix86_builtin_func_type) d->flag)
26671 case VOID_FTYPE_VOID:
26672 if (icode == CODE_FOR_avx_vzeroupper)
26673 target = GEN_INT (vzeroupper_intrinsic);
26674 emit_insn (GEN_FCN (icode) (target));
26676 case VOID_FTYPE_UINT64:
26677 case VOID_FTYPE_UNSIGNED:
26683 case UINT64_FTYPE_VOID:
26684 case UNSIGNED_FTYPE_VOID:
26685 case UINT16_FTYPE_VOID:
26690 case UINT64_FTYPE_PUNSIGNED:
26691 case V2DI_FTYPE_PV2DI:
26692 case V32QI_FTYPE_PCCHAR:
26693 case V16QI_FTYPE_PCCHAR:
26694 case V8SF_FTYPE_PCV4SF:
26695 case V8SF_FTYPE_PCFLOAT:
26696 case V4SF_FTYPE_PCFLOAT:
26697 case V4DF_FTYPE_PCV2DF:
26698 case V4DF_FTYPE_PCDOUBLE:
26699 case V2DF_FTYPE_PCDOUBLE:
26700 case VOID_FTYPE_PVOID:
26705 case VOID_FTYPE_PV2SF_V4SF:
26706 case VOID_FTYPE_PV4DI_V4DI:
26707 case VOID_FTYPE_PV2DI_V2DI:
26708 case VOID_FTYPE_PCHAR_V32QI:
26709 case VOID_FTYPE_PCHAR_V16QI:
26710 case VOID_FTYPE_PFLOAT_V8SF:
26711 case VOID_FTYPE_PFLOAT_V4SF:
26712 case VOID_FTYPE_PDOUBLE_V4DF:
26713 case VOID_FTYPE_PDOUBLE_V2DF:
26714 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26715 case VOID_FTYPE_PINT_INT:
26718 /* Reserve memory operand for target. */
26719 memory = ARRAY_SIZE (args);
26721 case V4SF_FTYPE_V4SF_PCV2SF:
26722 case V2DF_FTYPE_V2DF_PCDOUBLE:
26727 case V8SF_FTYPE_PCV8SF_V8SF:
26728 case V4DF_FTYPE_PCV4DF_V4DF:
26729 case V4SF_FTYPE_PCV4SF_V4SF:
26730 case V2DF_FTYPE_PCV2DF_V2DF:
26735 case VOID_FTYPE_PV8SF_V8SF_V8SF:
26736 case VOID_FTYPE_PV4DF_V4DF_V4DF:
26737 case VOID_FTYPE_PV4SF_V4SF_V4SF:
26738 case VOID_FTYPE_PV2DF_V2DF_V2DF:
26741 /* Reserve memory operand for target. */
26742 memory = ARRAY_SIZE (args);
26744 case VOID_FTYPE_UINT_UINT_UINT:
26745 case VOID_FTYPE_UINT64_UINT_UINT:
26746 case UCHAR_FTYPE_UINT_UINT_UINT:
26747 case UCHAR_FTYPE_UINT64_UINT_UINT:
26750 memory = ARRAY_SIZE (args);
26751 last_arg_constant = true;
26754 gcc_unreachable ();
26757 gcc_assert (nargs <= ARRAY_SIZE (args));
26759 if (klass == store)
26761 arg = CALL_EXPR_ARG (exp, 0);
26762 op = expand_normal (arg);
26763 gcc_assert (target == 0);
26765 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26767 target = force_reg (tmode, op);
26775 || GET_MODE (target) != tmode
26776 || !insn_p->operand[0].predicate (target, tmode))
26777 target = gen_reg_rtx (tmode);
26780 for (i = 0; i < nargs; i++)
26782 enum machine_mode mode = insn_p->operand[i + 1].mode;
26785 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26786 op = expand_normal (arg);
26787 match = insn_p->operand[i + 1].predicate (op, mode);
26789 if (last_arg_constant && (i + 1) == nargs)
26793 if (icode == CODE_FOR_lwp_lwpvalsi3
26794 || icode == CODE_FOR_lwp_lwpinssi3
26795 || icode == CODE_FOR_lwp_lwpvaldi3
26796 || icode == CODE_FOR_lwp_lwpinsdi3)
26797 error ("the last argument must be a 32-bit immediate");
26799 error ("the last argument must be an 8-bit immediate");
26807 /* This must be the memory operand. */
26808 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26809 gcc_assert (GET_MODE (op) == mode
26810 || GET_MODE (op) == VOIDmode);
26814 /* This must be register. */
26815 if (VECTOR_MODE_P (mode))
26816 op = safe_vector_operand (op, mode);
26818 gcc_assert (GET_MODE (op) == mode
26819 || GET_MODE (op) == VOIDmode);
26820 op = copy_to_mode_reg (mode, op);
26825 args[i].mode = mode;
26831 pat = GEN_FCN (icode) (target);
26834 pat = GEN_FCN (icode) (target, args[0].op);
26837 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26840 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26843 gcc_unreachable ();
26849 return klass == store ? 0 : target;
26852 /* Return the integer constant in ARG. Constrain it to be in the range
26853 of the subparts of VEC_TYPE; issue an error if not. */
26856 get_element_number (tree vec_type, tree arg)
26858 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26860 if (!host_integerp (arg, 1)
26861 || (elt = tree_low_cst (arg, 1), elt > max))
26863 error ("selector must be an integer constant in the range 0..%wi", max);
26870 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26871 ix86_expand_vector_init. We DO have language-level syntax for this, in
26872 the form of (type){ init-list }. Except that since we can't place emms
26873 instructions from inside the compiler, we can't allow the use of MMX
26874 registers unless the user explicitly asks for it. So we do *not* define
26875 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26876 we have builtins invoked by mmintrin.h that gives us license to emit
26877 these sorts of instructions. */
26880 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26882 enum machine_mode tmode = TYPE_MODE (type);
26883 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26884 int i, n_elt = GET_MODE_NUNITS (tmode);
26885 rtvec v = rtvec_alloc (n_elt);
26887 gcc_assert (VECTOR_MODE_P (tmode));
26888 gcc_assert (call_expr_nargs (exp) == n_elt);
26890 for (i = 0; i < n_elt; ++i)
26892 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26893 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26896 if (!target || !register_operand (target, tmode))
26897 target = gen_reg_rtx (tmode);
26899 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26903 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26904 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26905 had a language-level syntax for referencing vector elements. */
26908 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26910 enum machine_mode tmode, mode0;
26915 arg0 = CALL_EXPR_ARG (exp, 0);
26916 arg1 = CALL_EXPR_ARG (exp, 1);
26918 op0 = expand_normal (arg0);
26919 elt = get_element_number (TREE_TYPE (arg0), arg1);
26921 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26922 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26923 gcc_assert (VECTOR_MODE_P (mode0));
26925 op0 = force_reg (mode0, op0);
26927 if (optimize || !target || !register_operand (target, tmode))
26928 target = gen_reg_rtx (tmode);
26930 ix86_expand_vector_extract (true, target, op0, elt);
26935 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26936 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26937 a language-level syntax for referencing vector elements. */
26940 ix86_expand_vec_set_builtin (tree exp)
26942 enum machine_mode tmode, mode1;
26943 tree arg0, arg1, arg2;
26945 rtx op0, op1, target;
26947 arg0 = CALL_EXPR_ARG (exp, 0);
26948 arg1 = CALL_EXPR_ARG (exp, 1);
26949 arg2 = CALL_EXPR_ARG (exp, 2);
26951 tmode = TYPE_MODE (TREE_TYPE (arg0));
26952 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26953 gcc_assert (VECTOR_MODE_P (tmode));
26955 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26956 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26957 elt = get_element_number (TREE_TYPE (arg0), arg2);
26959 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26960 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26962 op0 = force_reg (tmode, op0);
26963 op1 = force_reg (mode1, op1);
26965 /* OP0 is the source of these builtin functions and shouldn't be
26966 modified. Create a copy, use it and return it as target. */
26967 target = gen_reg_rtx (tmode);
26968 emit_move_insn (target, op0);
26969 ix86_expand_vector_set (true, target, op1, elt);
26974 /* Expand an expression EXP that calls a built-in function,
26975 with result going to TARGET if that's convenient
26976 (and in mode MODE if that's convenient).
26977 SUBTARGET may be used as the target for computing one of EXP's operands.
26978 IGNORE is nonzero if the value is to be ignored. */
26981 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26982 enum machine_mode mode ATTRIBUTE_UNUSED,
26983 int ignore ATTRIBUTE_UNUSED)
26985 const struct builtin_description *d;
26987 enum insn_code icode;
26988 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26989 tree arg0, arg1, arg2;
26990 rtx op0, op1, op2, pat;
26991 enum machine_mode mode0, mode1, mode2;
26992 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26994 /* Determine whether the builtin function is available under the current ISA.
26995 Originally the builtin was not created if it wasn't applicable to the
26996 current ISA based on the command line switches. With function specific
26997 options, we need to check in the context of the function making the call
26998 whether it is supported. */
26999 if (ix86_builtins_isa[fcode].isa
27000 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27002 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27003 NULL, NULL, false);
27006 error ("%qE needs unknown isa option", fndecl);
27009 gcc_assert (opts != NULL);
27010 error ("%qE needs isa option %s", fndecl, opts);
27018 case IX86_BUILTIN_MASKMOVQ:
27019 case IX86_BUILTIN_MASKMOVDQU:
27020 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27021 ? CODE_FOR_mmx_maskmovq
27022 : CODE_FOR_sse2_maskmovdqu);
27023 /* Note the arg order is different from the operand order. */
27024 arg1 = CALL_EXPR_ARG (exp, 0);
27025 arg2 = CALL_EXPR_ARG (exp, 1);
27026 arg0 = CALL_EXPR_ARG (exp, 2);
27027 op0 = expand_normal (arg0);
27028 op1 = expand_normal (arg1);
27029 op2 = expand_normal (arg2);
27030 mode0 = insn_data[icode].operand[0].mode;
27031 mode1 = insn_data[icode].operand[1].mode;
27032 mode2 = insn_data[icode].operand[2].mode;
27034 op0 = force_reg (Pmode, op0);
27035 op0 = gen_rtx_MEM (mode1, op0);
27037 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27038 op0 = copy_to_mode_reg (mode0, op0);
27039 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27040 op1 = copy_to_mode_reg (mode1, op1);
27041 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27042 op2 = copy_to_mode_reg (mode2, op2);
27043 pat = GEN_FCN (icode) (op0, op1, op2);
27049 case IX86_BUILTIN_LDMXCSR:
27050 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27051 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27052 emit_move_insn (target, op0);
27053 emit_insn (gen_sse_ldmxcsr (target));
27056 case IX86_BUILTIN_STMXCSR:
27057 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27058 emit_insn (gen_sse_stmxcsr (target));
27059 return copy_to_mode_reg (SImode, target);
27061 case IX86_BUILTIN_CLFLUSH:
27062 arg0 = CALL_EXPR_ARG (exp, 0);
27063 op0 = expand_normal (arg0);
27064 icode = CODE_FOR_sse2_clflush;
27065 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27066 op0 = copy_to_mode_reg (Pmode, op0);
27068 emit_insn (gen_sse2_clflush (op0));
27071 case IX86_BUILTIN_MONITOR:
27072 arg0 = CALL_EXPR_ARG (exp, 0);
27073 arg1 = CALL_EXPR_ARG (exp, 1);
27074 arg2 = CALL_EXPR_ARG (exp, 2);
27075 op0 = expand_normal (arg0);
27076 op1 = expand_normal (arg1);
27077 op2 = expand_normal (arg2);
27079 op0 = copy_to_mode_reg (Pmode, op0);
27081 op1 = copy_to_mode_reg (SImode, op1);
27083 op2 = copy_to_mode_reg (SImode, op2);
27084 emit_insn (ix86_gen_monitor (op0, op1, op2));
27087 case IX86_BUILTIN_MWAIT:
27088 arg0 = CALL_EXPR_ARG (exp, 0);
27089 arg1 = CALL_EXPR_ARG (exp, 1);
27090 op0 = expand_normal (arg0);
27091 op1 = expand_normal (arg1);
27093 op0 = copy_to_mode_reg (SImode, op0);
27095 op1 = copy_to_mode_reg (SImode, op1);
27096 emit_insn (gen_sse3_mwait (op0, op1));
27099 case IX86_BUILTIN_VEC_INIT_V2SI:
27100 case IX86_BUILTIN_VEC_INIT_V4HI:
27101 case IX86_BUILTIN_VEC_INIT_V8QI:
27102 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27104 case IX86_BUILTIN_VEC_EXT_V2DF:
27105 case IX86_BUILTIN_VEC_EXT_V2DI:
27106 case IX86_BUILTIN_VEC_EXT_V4SF:
27107 case IX86_BUILTIN_VEC_EXT_V4SI:
27108 case IX86_BUILTIN_VEC_EXT_V8HI:
27109 case IX86_BUILTIN_VEC_EXT_V2SI:
27110 case IX86_BUILTIN_VEC_EXT_V4HI:
27111 case IX86_BUILTIN_VEC_EXT_V16QI:
27112 return ix86_expand_vec_ext_builtin (exp, target);
27114 case IX86_BUILTIN_VEC_SET_V2DI:
27115 case IX86_BUILTIN_VEC_SET_V4SF:
27116 case IX86_BUILTIN_VEC_SET_V4SI:
27117 case IX86_BUILTIN_VEC_SET_V8HI:
27118 case IX86_BUILTIN_VEC_SET_V4HI:
27119 case IX86_BUILTIN_VEC_SET_V16QI:
27120 return ix86_expand_vec_set_builtin (exp);
27122 case IX86_BUILTIN_VEC_PERM_V2DF:
27123 case IX86_BUILTIN_VEC_PERM_V4SF:
27124 case IX86_BUILTIN_VEC_PERM_V2DI:
27125 case IX86_BUILTIN_VEC_PERM_V4SI:
27126 case IX86_BUILTIN_VEC_PERM_V8HI:
27127 case IX86_BUILTIN_VEC_PERM_V16QI:
27128 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27129 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27130 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27131 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27132 case IX86_BUILTIN_VEC_PERM_V4DF:
27133 case IX86_BUILTIN_VEC_PERM_V8SF:
27134 return ix86_expand_vec_perm_builtin (exp);
27136 case IX86_BUILTIN_INFQ:
27137 case IX86_BUILTIN_HUGE_VALQ:
27139 REAL_VALUE_TYPE inf;
27143 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27145 tmp = validize_mem (force_const_mem (mode, tmp));
27148 target = gen_reg_rtx (mode);
27150 emit_move_insn (target, tmp);
27154 case IX86_BUILTIN_LLWPCB:
27155 arg0 = CALL_EXPR_ARG (exp, 0);
27156 op0 = expand_normal (arg0);
27157 icode = CODE_FOR_lwp_llwpcb;
27158 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27159 op0 = copy_to_mode_reg (Pmode, op0);
27160 emit_insn (gen_lwp_llwpcb (op0));
27163 case IX86_BUILTIN_SLWPCB:
27164 icode = CODE_FOR_lwp_slwpcb;
27166 || !insn_data[icode].operand[0].predicate (target, Pmode))
27167 target = gen_reg_rtx (Pmode);
27168 emit_insn (gen_lwp_slwpcb (target));
27171 case IX86_BUILTIN_BEXTRI32:
27172 case IX86_BUILTIN_BEXTRI64:
27173 arg0 = CALL_EXPR_ARG (exp, 0);
27174 arg1 = CALL_EXPR_ARG (exp, 1);
27175 op0 = expand_normal (arg0);
27176 op1 = expand_normal (arg1);
27177 icode = (fcode == IX86_BUILTIN_BEXTRI32
27178 ? CODE_FOR_tbm_bextri_si
27179 : CODE_FOR_tbm_bextri_di);
27180 if (!CONST_INT_P (op1))
27182 error ("last argument must be an immediate");
27187 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27188 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27189 op1 = GEN_INT (length);
27190 op2 = GEN_INT (lsb_index);
27191 pat = GEN_FCN (icode) (target, op0, op1, op2);
27201 for (i = 0, d = bdesc_special_args;
27202 i < ARRAY_SIZE (bdesc_special_args);
27204 if (d->code == fcode)
27205 return ix86_expand_special_args_builtin (d, exp, target);
27207 for (i = 0, d = bdesc_args;
27208 i < ARRAY_SIZE (bdesc_args);
27210 if (d->code == fcode)
27213 case IX86_BUILTIN_FABSQ:
27214 case IX86_BUILTIN_COPYSIGNQ:
27216 /* Emit a normal call if SSE2 isn't available. */
27217 return expand_call (exp, target, ignore);
27219 return ix86_expand_args_builtin (d, exp, target);
27222 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27223 if (d->code == fcode)
27224 return ix86_expand_sse_comi (d, exp, target);
27226 for (i = 0, d = bdesc_pcmpestr;
27227 i < ARRAY_SIZE (bdesc_pcmpestr);
27229 if (d->code == fcode)
27230 return ix86_expand_sse_pcmpestr (d, exp, target);
27232 for (i = 0, d = bdesc_pcmpistr;
27233 i < ARRAY_SIZE (bdesc_pcmpistr);
27235 if (d->code == fcode)
27236 return ix86_expand_sse_pcmpistr (d, exp, target);
27238 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27239 if (d->code == fcode)
27240 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27241 (enum ix86_builtin_func_type)
27242 d->flag, d->comparison);
27244 gcc_unreachable ();
27247 /* Returns a function decl for a vectorized version of the builtin function
27248 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27249 if it is not available. */
27252 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27255 enum machine_mode in_mode, out_mode;
27257 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27259 if (TREE_CODE (type_out) != VECTOR_TYPE
27260 || TREE_CODE (type_in) != VECTOR_TYPE
27261 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27264 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27265 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27266 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27267 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27271 case BUILT_IN_SQRT:
27272 if (out_mode == DFmode && in_mode == DFmode)
27274 if (out_n == 2 && in_n == 2)
27275 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27276 else if (out_n == 4 && in_n == 4)
27277 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27281 case BUILT_IN_SQRTF:
27282 if (out_mode == SFmode && in_mode == SFmode)
27284 if (out_n == 4 && in_n == 4)
27285 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27286 else if (out_n == 8 && in_n == 8)
27287 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27291 case BUILT_IN_LRINT:
27292 if (out_mode == SImode && out_n == 4
27293 && in_mode == DFmode && in_n == 2)
27294 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27297 case BUILT_IN_LRINTF:
27298 if (out_mode == SImode && in_mode == SFmode)
27300 if (out_n == 4 && in_n == 4)
27301 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27302 else if (out_n == 8 && in_n == 8)
27303 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27307 case BUILT_IN_COPYSIGN:
27308 if (out_mode == DFmode && in_mode == DFmode)
27310 if (out_n == 2 && in_n == 2)
27311 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27312 else if (out_n == 4 && in_n == 4)
27313 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27317 case BUILT_IN_COPYSIGNF:
27318 if (out_mode == SFmode && in_mode == SFmode)
27320 if (out_n == 4 && in_n == 4)
27321 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27322 else if (out_n == 8 && in_n == 8)
27323 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27328 if (out_mode == DFmode && in_mode == DFmode)
27330 if (out_n == 2 && in_n == 2)
27331 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27332 if (out_n == 4 && in_n == 4)
27333 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27337 case BUILT_IN_FMAF:
27338 if (out_mode == SFmode && in_mode == SFmode)
27340 if (out_n == 4 && in_n == 4)
27341 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27342 if (out_n == 8 && in_n == 8)
27343 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27351 /* Dispatch to a handler for a vectorization library. */
27352 if (ix86_veclib_handler)
27353 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27359 /* Handler for an SVML-style interface to
27360 a library with vectorized intrinsics. */
27363 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27366 tree fntype, new_fndecl, args;
27369 enum machine_mode el_mode, in_mode;
27372 /* The SVML is suitable for unsafe math only. */
27373 if (!flag_unsafe_math_optimizations)
27376 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27377 n = TYPE_VECTOR_SUBPARTS (type_out);
27378 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27379 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27380 if (el_mode != in_mode
27388 case BUILT_IN_LOG10:
27390 case BUILT_IN_TANH:
27392 case BUILT_IN_ATAN:
27393 case BUILT_IN_ATAN2:
27394 case BUILT_IN_ATANH:
27395 case BUILT_IN_CBRT:
27396 case BUILT_IN_SINH:
27398 case BUILT_IN_ASINH:
27399 case BUILT_IN_ASIN:
27400 case BUILT_IN_COSH:
27402 case BUILT_IN_ACOSH:
27403 case BUILT_IN_ACOS:
27404 if (el_mode != DFmode || n != 2)
27408 case BUILT_IN_EXPF:
27409 case BUILT_IN_LOGF:
27410 case BUILT_IN_LOG10F:
27411 case BUILT_IN_POWF:
27412 case BUILT_IN_TANHF:
27413 case BUILT_IN_TANF:
27414 case BUILT_IN_ATANF:
27415 case BUILT_IN_ATAN2F:
27416 case BUILT_IN_ATANHF:
27417 case BUILT_IN_CBRTF:
27418 case BUILT_IN_SINHF:
27419 case BUILT_IN_SINF:
27420 case BUILT_IN_ASINHF:
27421 case BUILT_IN_ASINF:
27422 case BUILT_IN_COSHF:
27423 case BUILT_IN_COSF:
27424 case BUILT_IN_ACOSHF:
27425 case BUILT_IN_ACOSF:
27426 if (el_mode != SFmode || n != 4)
27434 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27436 if (fn == BUILT_IN_LOGF)
27437 strcpy (name, "vmlsLn4");
27438 else if (fn == BUILT_IN_LOG)
27439 strcpy (name, "vmldLn2");
27442 sprintf (name, "vmls%s", bname+10);
27443 name[strlen (name)-1] = '4';
27446 sprintf (name, "vmld%s2", bname+10);
27448 /* Convert to uppercase. */
27452 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27453 args = TREE_CHAIN (args))
27457 fntype = build_function_type_list (type_out, type_in, NULL);
27459 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27461 /* Build a function declaration for the vectorized function. */
27462 new_fndecl = build_decl (BUILTINS_LOCATION,
27463 FUNCTION_DECL, get_identifier (name), fntype);
27464 TREE_PUBLIC (new_fndecl) = 1;
27465 DECL_EXTERNAL (new_fndecl) = 1;
27466 DECL_IS_NOVOPS (new_fndecl) = 1;
27467 TREE_READONLY (new_fndecl) = 1;
27472 /* Handler for an ACML-style interface to
27473 a library with vectorized intrinsics. */
27476 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27478 char name[20] = "__vr.._";
27479 tree fntype, new_fndecl, args;
27482 enum machine_mode el_mode, in_mode;
27485 /* The ACML is 64bits only and suitable for unsafe math only as
27486 it does not correctly support parts of IEEE with the required
27487 precision such as denormals. */
27489 || !flag_unsafe_math_optimizations)
27492 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27493 n = TYPE_VECTOR_SUBPARTS (type_out);
27494 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27495 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27496 if (el_mode != in_mode
27506 case BUILT_IN_LOG2:
27507 case BUILT_IN_LOG10:
27510 if (el_mode != DFmode
27515 case BUILT_IN_SINF:
27516 case BUILT_IN_COSF:
27517 case BUILT_IN_EXPF:
27518 case BUILT_IN_POWF:
27519 case BUILT_IN_LOGF:
27520 case BUILT_IN_LOG2F:
27521 case BUILT_IN_LOG10F:
27524 if (el_mode != SFmode
27533 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27534 sprintf (name + 7, "%s", bname+10);
27537 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27538 args = TREE_CHAIN (args))
27542 fntype = build_function_type_list (type_out, type_in, NULL);
27544 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27546 /* Build a function declaration for the vectorized function. */
27547 new_fndecl = build_decl (BUILTINS_LOCATION,
27548 FUNCTION_DECL, get_identifier (name), fntype);
27549 TREE_PUBLIC (new_fndecl) = 1;
27550 DECL_EXTERNAL (new_fndecl) = 1;
27551 DECL_IS_NOVOPS (new_fndecl) = 1;
27552 TREE_READONLY (new_fndecl) = 1;
27558 /* Returns a decl of a function that implements conversion of an integer vector
27559 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27560 are the types involved when converting according to CODE.
27561 Return NULL_TREE if it is not available. */
27564 ix86_vectorize_builtin_conversion (unsigned int code,
27565 tree dest_type, tree src_type)
27573 switch (TYPE_MODE (src_type))
27576 switch (TYPE_MODE (dest_type))
27579 return (TYPE_UNSIGNED (src_type)
27580 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27581 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27583 return (TYPE_UNSIGNED (src_type)
27585 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27591 switch (TYPE_MODE (dest_type))
27594 return (TYPE_UNSIGNED (src_type)
27596 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27605 case FIX_TRUNC_EXPR:
27606 switch (TYPE_MODE (dest_type))
27609 switch (TYPE_MODE (src_type))
27612 return (TYPE_UNSIGNED (dest_type)
27614 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27616 return (TYPE_UNSIGNED (dest_type)
27618 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27625 switch (TYPE_MODE (src_type))
27628 return (TYPE_UNSIGNED (dest_type)
27630 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27647 /* Returns a code for a target-specific builtin that implements
27648 reciprocal of the function, or NULL_TREE if not available. */
27651 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27652 bool sqrt ATTRIBUTE_UNUSED)
27654 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27655 && flag_finite_math_only && !flag_trapping_math
27656 && flag_unsafe_math_optimizations))
27660 /* Machine dependent builtins. */
27663 /* Vectorized version of sqrt to rsqrt conversion. */
27664 case IX86_BUILTIN_SQRTPS_NR:
27665 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27667 case IX86_BUILTIN_SQRTPS_NR256:
27668 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27674 /* Normal builtins. */
27677 /* Sqrt to rsqrt conversion. */
27678 case BUILT_IN_SQRTF:
27679 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27686 /* Helper for avx_vpermilps256_operand et al. This is also used by
27687 the expansion functions to turn the parallel back into a mask.
27688 The return value is 0 for no match and the imm8+1 for a match. */
27691 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27693 unsigned i, nelt = GET_MODE_NUNITS (mode);
27695 unsigned char ipar[8];
27697 if (XVECLEN (par, 0) != (int) nelt)
27700 /* Validate that all of the elements are constants, and not totally
27701 out of range. Copy the data into an integral array to make the
27702 subsequent checks easier. */
27703 for (i = 0; i < nelt; ++i)
27705 rtx er = XVECEXP (par, 0, i);
27706 unsigned HOST_WIDE_INT ei;
27708 if (!CONST_INT_P (er))
27719 /* In the 256-bit DFmode case, we can only move elements within
27721 for (i = 0; i < 2; ++i)
27725 mask |= ipar[i] << i;
27727 for (i = 2; i < 4; ++i)
27731 mask |= (ipar[i] - 2) << i;
27736 /* In the 256-bit SFmode case, we have full freedom of movement
27737 within the low 128-bit lane, but the high 128-bit lane must
27738 mirror the exact same pattern. */
27739 for (i = 0; i < 4; ++i)
27740 if (ipar[i] + 4 != ipar[i + 4])
27747 /* In the 128-bit case, we've full freedom in the placement of
27748 the elements from the source operand. */
27749 for (i = 0; i < nelt; ++i)
27750 mask |= ipar[i] << (i * (nelt / 2));
27754 gcc_unreachable ();
27757 /* Make sure success has a non-zero value by adding one. */
27761 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27762 the expansion functions to turn the parallel back into a mask.
27763 The return value is 0 for no match and the imm8+1 for a match. */
27766 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27768 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27770 unsigned char ipar[8];
27772 if (XVECLEN (par, 0) != (int) nelt)
27775 /* Validate that all of the elements are constants, and not totally
27776 out of range. Copy the data into an integral array to make the
27777 subsequent checks easier. */
27778 for (i = 0; i < nelt; ++i)
27780 rtx er = XVECEXP (par, 0, i);
27781 unsigned HOST_WIDE_INT ei;
27783 if (!CONST_INT_P (er))
27786 if (ei >= 2 * nelt)
27791 /* Validate that the halves of the permute are halves. */
27792 for (i = 0; i < nelt2 - 1; ++i)
27793 if (ipar[i] + 1 != ipar[i + 1])
27795 for (i = nelt2; i < nelt - 1; ++i)
27796 if (ipar[i] + 1 != ipar[i + 1])
27799 /* Reconstruct the mask. */
27800 for (i = 0; i < 2; ++i)
27802 unsigned e = ipar[i * nelt2];
27806 mask |= e << (i * 4);
27809 /* Make sure success has a non-zero value by adding one. */
27814 /* Store OPERAND to the memory after reload is completed. This means
27815 that we can't easily use assign_stack_local. */
27817 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27821 gcc_assert (reload_completed);
27822 if (ix86_using_red_zone ())
27824 result = gen_rtx_MEM (mode,
27825 gen_rtx_PLUS (Pmode,
27827 GEN_INT (-RED_ZONE_SIZE)));
27828 emit_move_insn (result, operand);
27830 else if (TARGET_64BIT)
27836 operand = gen_lowpart (DImode, operand);
27840 gen_rtx_SET (VOIDmode,
27841 gen_rtx_MEM (DImode,
27842 gen_rtx_PRE_DEC (DImode,
27843 stack_pointer_rtx)),
27847 gcc_unreachable ();
27849 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27858 split_double_mode (mode, &operand, 1, operands, operands + 1);
27860 gen_rtx_SET (VOIDmode,
27861 gen_rtx_MEM (SImode,
27862 gen_rtx_PRE_DEC (Pmode,
27863 stack_pointer_rtx)),
27866 gen_rtx_SET (VOIDmode,
27867 gen_rtx_MEM (SImode,
27868 gen_rtx_PRE_DEC (Pmode,
27869 stack_pointer_rtx)),
27874 /* Store HImodes as SImodes. */
27875 operand = gen_lowpart (SImode, operand);
27879 gen_rtx_SET (VOIDmode,
27880 gen_rtx_MEM (GET_MODE (operand),
27881 gen_rtx_PRE_DEC (SImode,
27882 stack_pointer_rtx)),
27886 gcc_unreachable ();
27888 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27893 /* Free operand from the memory. */
27895 ix86_free_from_memory (enum machine_mode mode)
27897 if (!ix86_using_red_zone ())
27901 if (mode == DImode || TARGET_64BIT)
27905 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27906 to pop or add instruction if registers are available. */
27907 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27908 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27913 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
27914 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
27916 static const reg_class_t *
27917 i386_ira_cover_classes (void)
27919 static const reg_class_t sse_fpmath_classes[] = {
27920 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
27922 static const reg_class_t no_sse_fpmath_classes[] = {
27923 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
27926 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
27929 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
27931 Put float CONST_DOUBLE in the constant pool instead of fp regs.
27932 QImode must go into class Q_REGS.
27933 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
27934 movdf to do mem-to-mem moves through integer regs. */
27937 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
27939 enum machine_mode mode = GET_MODE (x);
27941 /* We're only allowed to return a subclass of CLASS. Many of the
27942 following checks fail for NO_REGS, so eliminate that early. */
27943 if (regclass == NO_REGS)
27946 /* All classes can load zeros. */
27947 if (x == CONST0_RTX (mode))
27950 /* Force constants into memory if we are loading a (nonzero) constant into
27951 an MMX or SSE register. This is because there are no MMX/SSE instructions
27952 to load from a constant. */
27954 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
27957 /* Prefer SSE regs only, if we can use them for math. */
27958 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
27959 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
27961 /* Floating-point constants need more complex checks. */
27962 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
27964 /* General regs can load everything. */
27965 if (reg_class_subset_p (regclass, GENERAL_REGS))
27968 /* Floats can load 0 and 1 plus some others. Note that we eliminated
27969 zero above. We only want to wind up preferring 80387 registers if
27970 we plan on doing computation with them. */
27972 && standard_80387_constant_p (x))
27974 /* Limit class to non-sse. */
27975 if (regclass == FLOAT_SSE_REGS)
27977 if (regclass == FP_TOP_SSE_REGS)
27979 if (regclass == FP_SECOND_SSE_REGS)
27980 return FP_SECOND_REG;
27981 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
27988 /* Generally when we see PLUS here, it's the function invariant
27989 (plus soft-fp const_int). Which can only be computed into general
27991 if (GET_CODE (x) == PLUS)
27992 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
27994 /* QImode constants are easy to load, but non-constant QImode data
27995 must go into Q_REGS. */
27996 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
27998 if (reg_class_subset_p (regclass, Q_REGS))
28000 if (reg_class_subset_p (Q_REGS, regclass))
28008 /* Discourage putting floating-point values in SSE registers unless
28009 SSE math is being used, and likewise for the 387 registers. */
28011 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28013 enum machine_mode mode = GET_MODE (x);
28015 /* Restrict the output reload class to the register bank that we are doing
28016 math on. If we would like not to return a subset of CLASS, reject this
28017 alternative: if reload cannot do this, it will still use its choice. */
28018 mode = GET_MODE (x);
28019 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28020 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28022 if (X87_FLOAT_MODE_P (mode))
28024 if (regclass == FP_TOP_SSE_REGS)
28026 else if (regclass == FP_SECOND_SSE_REGS)
28027 return FP_SECOND_REG;
28029 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28036 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28037 enum machine_mode mode,
28038 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28040 /* QImode spills from non-QI registers require
28041 intermediate register on 32bit targets. */
28042 if (!in_p && mode == QImode && !TARGET_64BIT
28043 && (rclass == GENERAL_REGS
28044 || rclass == LEGACY_REGS
28045 || rclass == INDEX_REGS))
28054 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28055 regno = true_regnum (x);
28057 /* Return Q_REGS if the operand is in memory. */
28065 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28068 ix86_class_likely_spilled_p (reg_class_t rclass)
28079 case SSE_FIRST_REG:
28081 case FP_SECOND_REG:
28091 /* If we are copying between general and FP registers, we need a memory
28092 location. The same is true for SSE and MMX registers.
28094 To optimize register_move_cost performance, allow inline variant.
28096 The macro can't work reliably when one of the CLASSES is class containing
28097 registers from multiple units (SSE, MMX, integer). We avoid this by never
28098 combining those units in single alternative in the machine description.
28099 Ensure that this constraint holds to avoid unexpected surprises.
28101 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28102 enforce these sanity checks. */
28105 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28106 enum machine_mode mode, int strict)
28108 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28109 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28110 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28111 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28112 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28113 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28115 gcc_assert (!strict);
28119 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28122 /* ??? This is a lie. We do have moves between mmx/general, and for
28123 mmx/sse2. But by saying we need secondary memory we discourage the
28124 register allocator from using the mmx registers unless needed. */
28125 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28128 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28130 /* SSE1 doesn't have any direct moves from other classes. */
28134 /* If the target says that inter-unit moves are more expensive
28135 than moving through memory, then don't generate them. */
28136 if (!TARGET_INTER_UNIT_MOVES)
28139 /* Between SSE and general, we have moves no larger than word size. */
28140 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28148 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28149 enum machine_mode mode, int strict)
28151 return inline_secondary_memory_needed (class1, class2, mode, strict);
28154 /* Return true if the registers in CLASS cannot represent the change from
28155 modes FROM to TO. */
28158 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28159 enum reg_class regclass)
28164 /* x87 registers can't do subreg at all, as all values are reformatted
28165 to extended precision. */
28166 if (MAYBE_FLOAT_CLASS_P (regclass))
28169 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28171 /* Vector registers do not support QI or HImode loads. If we don't
28172 disallow a change to these modes, reload will assume it's ok to
28173 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28174 the vec_dupv4hi pattern. */
28175 if (GET_MODE_SIZE (from) < 4)
28178 /* Vector registers do not support subreg with nonzero offsets, which
28179 are otherwise valid for integer registers. Since we can't see
28180 whether we have a nonzero offset from here, prohibit all
28181 nonparadoxical subregs changing size. */
28182 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28189 /* Return the cost of moving data of mode M between a
28190 register and memory. A value of 2 is the default; this cost is
28191 relative to those in `REGISTER_MOVE_COST'.
28193 This function is used extensively by register_move_cost that is used to
28194 build tables at startup. Make it inline in this case.
28195 When IN is 2, return maximum of in and out move cost.
28197 If moving between registers and memory is more expensive than
28198 between two registers, you should define this macro to express the
28201 Model also increased moving costs of QImode registers in non
28205 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28209 if (FLOAT_CLASS_P (regclass))
28227 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28228 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28230 if (SSE_CLASS_P (regclass))
28233 switch (GET_MODE_SIZE (mode))
28248 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28249 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28251 if (MMX_CLASS_P (regclass))
28254 switch (GET_MODE_SIZE (mode))
28266 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28267 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28269 switch (GET_MODE_SIZE (mode))
28272 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28275 return ix86_cost->int_store[0];
28276 if (TARGET_PARTIAL_REG_DEPENDENCY
28277 && optimize_function_for_speed_p (cfun))
28278 cost = ix86_cost->movzbl_load;
28280 cost = ix86_cost->int_load[0];
28282 return MAX (cost, ix86_cost->int_store[0]);
28288 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28290 return ix86_cost->movzbl_load;
28292 return ix86_cost->int_store[0] + 4;
28297 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28298 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28300 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28301 if (mode == TFmode)
28304 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28306 cost = ix86_cost->int_load[2];
28308 cost = ix86_cost->int_store[2];
28309 return (cost * (((int) GET_MODE_SIZE (mode)
28310 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28315 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28318 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28322 /* Return the cost of moving data from a register in class CLASS1 to
28323 one in class CLASS2.
28325 It is not required that the cost always equal 2 when FROM is the same as TO;
28326 on some machines it is expensive to move between registers if they are not
28327 general registers. */
28330 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28331 reg_class_t class2_i)
28333 enum reg_class class1 = (enum reg_class) class1_i;
28334 enum reg_class class2 = (enum reg_class) class2_i;
28336 /* In case we require secondary memory, compute cost of the store followed
28337 by load. In order to avoid bad register allocation choices, we need
28338 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28340 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28344 cost += inline_memory_move_cost (mode, class1, 2);
28345 cost += inline_memory_move_cost (mode, class2, 2);
28347 /* In case of copying from general_purpose_register we may emit multiple
28348 stores followed by single load causing memory size mismatch stall.
28349 Count this as arbitrarily high cost of 20. */
28350 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28353 /* In the case of FP/MMX moves, the registers actually overlap, and we
28354 have to switch modes in order to treat them differently. */
28355 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28356 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28362 /* Moves between SSE/MMX and integer unit are expensive. */
28363 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28364 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28366 /* ??? By keeping returned value relatively high, we limit the number
28367 of moves between integer and MMX/SSE registers for all targets.
28368 Additionally, high value prevents problem with x86_modes_tieable_p(),
28369 where integer modes in MMX/SSE registers are not tieable
28370 because of missing QImode and HImode moves to, from or between
28371 MMX/SSE registers. */
28372 return MAX (8, ix86_cost->mmxsse_to_integer);
28374 if (MAYBE_FLOAT_CLASS_P (class1))
28375 return ix86_cost->fp_move;
28376 if (MAYBE_SSE_CLASS_P (class1))
28377 return ix86_cost->sse_move;
28378 if (MAYBE_MMX_CLASS_P (class1))
28379 return ix86_cost->mmx_move;
28383 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28386 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28388 /* Flags and only flags can only hold CCmode values. */
28389 if (CC_REGNO_P (regno))
28390 return GET_MODE_CLASS (mode) == MODE_CC;
28391 if (GET_MODE_CLASS (mode) == MODE_CC
28392 || GET_MODE_CLASS (mode) == MODE_RANDOM
28393 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28395 if (FP_REGNO_P (regno))
28396 return VALID_FP_MODE_P (mode);
28397 if (SSE_REGNO_P (regno))
28399 /* We implement the move patterns for all vector modes into and
28400 out of SSE registers, even when no operation instructions
28401 are available. OImode move is available only when AVX is
28403 return ((TARGET_AVX && mode == OImode)
28404 || VALID_AVX256_REG_MODE (mode)
28405 || VALID_SSE_REG_MODE (mode)
28406 || VALID_SSE2_REG_MODE (mode)
28407 || VALID_MMX_REG_MODE (mode)
28408 || VALID_MMX_REG_MODE_3DNOW (mode));
28410 if (MMX_REGNO_P (regno))
28412 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28413 so if the register is available at all, then we can move data of
28414 the given mode into or out of it. */
28415 return (VALID_MMX_REG_MODE (mode)
28416 || VALID_MMX_REG_MODE_3DNOW (mode));
28419 if (mode == QImode)
28421 /* Take care for QImode values - they can be in non-QI regs,
28422 but then they do cause partial register stalls. */
28423 if (regno <= BX_REG || TARGET_64BIT)
28425 if (!TARGET_PARTIAL_REG_STALL)
28427 return reload_in_progress || reload_completed;
28429 /* We handle both integer and floats in the general purpose registers. */
28430 else if (VALID_INT_MODE_P (mode))
28432 else if (VALID_FP_MODE_P (mode))
28434 else if (VALID_DFP_MODE_P (mode))
28436 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28437 on to use that value in smaller contexts, this can easily force a
28438 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28439 supporting DImode, allow it. */
28440 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28446 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28447 tieable integer mode. */
28450 ix86_tieable_integer_mode_p (enum machine_mode mode)
28459 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28462 return TARGET_64BIT;
28469 /* Return true if MODE1 is accessible in a register that can hold MODE2
28470 without copying. That is, all register classes that can hold MODE2
28471 can also hold MODE1. */
28474 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28476 if (mode1 == mode2)
28479 if (ix86_tieable_integer_mode_p (mode1)
28480 && ix86_tieable_integer_mode_p (mode2))
28483 /* MODE2 being XFmode implies fp stack or general regs, which means we
28484 can tie any smaller floating point modes to it. Note that we do not
28485 tie this with TFmode. */
28486 if (mode2 == XFmode)
28487 return mode1 == SFmode || mode1 == DFmode;
28489 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28490 that we can tie it with SFmode. */
28491 if (mode2 == DFmode)
28492 return mode1 == SFmode;
28494 /* If MODE2 is only appropriate for an SSE register, then tie with
28495 any other mode acceptable to SSE registers. */
28496 if (GET_MODE_SIZE (mode2) == 16
28497 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28498 return (GET_MODE_SIZE (mode1) == 16
28499 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28501 /* If MODE2 is appropriate for an MMX register, then tie
28502 with any other mode acceptable to MMX registers. */
28503 if (GET_MODE_SIZE (mode2) == 8
28504 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28505 return (GET_MODE_SIZE (mode1) == 8
28506 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28511 /* Compute a (partial) cost for rtx X. Return true if the complete
28512 cost has been computed, and false if subexpressions should be
28513 scanned. In either case, *TOTAL contains the cost result. */
28516 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28518 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28519 enum machine_mode mode = GET_MODE (x);
28520 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28528 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28530 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28532 else if (flag_pic && SYMBOLIC_CONST (x)
28534 || (!GET_CODE (x) != LABEL_REF
28535 && (GET_CODE (x) != SYMBOL_REF
28536 || !SYMBOL_REF_LOCAL_P (x)))))
28543 if (mode == VOIDmode)
28546 switch (standard_80387_constant_p (x))
28551 default: /* Other constants */
28556 /* Start with (MEM (SYMBOL_REF)), since that's where
28557 it'll probably end up. Add a penalty for size. */
28558 *total = (COSTS_N_INSNS (1)
28559 + (flag_pic != 0 && !TARGET_64BIT)
28560 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28566 /* The zero extensions is often completely free on x86_64, so make
28567 it as cheap as possible. */
28568 if (TARGET_64BIT && mode == DImode
28569 && GET_MODE (XEXP (x, 0)) == SImode)
28571 else if (TARGET_ZERO_EXTEND_WITH_AND)
28572 *total = cost->add;
28574 *total = cost->movzx;
28578 *total = cost->movsx;
28582 if (CONST_INT_P (XEXP (x, 1))
28583 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28585 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28588 *total = cost->add;
28591 if ((value == 2 || value == 3)
28592 && cost->lea <= cost->shift_const)
28594 *total = cost->lea;
28604 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28606 if (CONST_INT_P (XEXP (x, 1)))
28608 if (INTVAL (XEXP (x, 1)) > 32)
28609 *total = cost->shift_const + COSTS_N_INSNS (2);
28611 *total = cost->shift_const * 2;
28615 if (GET_CODE (XEXP (x, 1)) == AND)
28616 *total = cost->shift_var * 2;
28618 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28623 if (CONST_INT_P (XEXP (x, 1)))
28624 *total = cost->shift_const;
28626 *total = cost->shift_var;
28634 gcc_assert (FLOAT_MODE_P (mode));
28635 gcc_assert (TARGET_FMA || TARGET_FMA4);
28637 /* ??? SSE scalar/vector cost should be used here. */
28638 /* ??? Bald assumption that fma has the same cost as fmul. */
28639 *total = cost->fmul;
28640 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28642 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28644 if (GET_CODE (sub) == NEG)
28646 *total += rtx_cost (sub, FMA, speed);
28649 if (GET_CODE (sub) == NEG)
28651 *total += rtx_cost (sub, FMA, speed);
28656 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28658 /* ??? SSE scalar cost should be used here. */
28659 *total = cost->fmul;
28662 else if (X87_FLOAT_MODE_P (mode))
28664 *total = cost->fmul;
28667 else if (FLOAT_MODE_P (mode))
28669 /* ??? SSE vector cost should be used here. */
28670 *total = cost->fmul;
28675 rtx op0 = XEXP (x, 0);
28676 rtx op1 = XEXP (x, 1);
28678 if (CONST_INT_P (XEXP (x, 1)))
28680 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28681 for (nbits = 0; value != 0; value &= value - 1)
28685 /* This is arbitrary. */
28688 /* Compute costs correctly for widening multiplication. */
28689 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28690 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28691 == GET_MODE_SIZE (mode))
28693 int is_mulwiden = 0;
28694 enum machine_mode inner_mode = GET_MODE (op0);
28696 if (GET_CODE (op0) == GET_CODE (op1))
28697 is_mulwiden = 1, op1 = XEXP (op1, 0);
28698 else if (CONST_INT_P (op1))
28700 if (GET_CODE (op0) == SIGN_EXTEND)
28701 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28704 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28708 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28711 *total = (cost->mult_init[MODE_INDEX (mode)]
28712 + nbits * cost->mult_bit
28713 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28722 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28723 /* ??? SSE cost should be used here. */
28724 *total = cost->fdiv;
28725 else if (X87_FLOAT_MODE_P (mode))
28726 *total = cost->fdiv;
28727 else if (FLOAT_MODE_P (mode))
28728 /* ??? SSE vector cost should be used here. */
28729 *total = cost->fdiv;
28731 *total = cost->divide[MODE_INDEX (mode)];
28735 if (GET_MODE_CLASS (mode) == MODE_INT
28736 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28738 if (GET_CODE (XEXP (x, 0)) == PLUS
28739 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28740 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28741 && CONSTANT_P (XEXP (x, 1)))
28743 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28744 if (val == 2 || val == 4 || val == 8)
28746 *total = cost->lea;
28747 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28748 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28749 outer_code, speed);
28750 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28754 else if (GET_CODE (XEXP (x, 0)) == MULT
28755 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28757 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28758 if (val == 2 || val == 4 || val == 8)
28760 *total = cost->lea;
28761 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28762 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28766 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28768 *total = cost->lea;
28769 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28770 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28771 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28778 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28780 /* ??? SSE cost should be used here. */
28781 *total = cost->fadd;
28784 else if (X87_FLOAT_MODE_P (mode))
28786 *total = cost->fadd;
28789 else if (FLOAT_MODE_P (mode))
28791 /* ??? SSE vector cost should be used here. */
28792 *total = cost->fadd;
28800 if (!TARGET_64BIT && mode == DImode)
28802 *total = (cost->add * 2
28803 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28804 << (GET_MODE (XEXP (x, 0)) != DImode))
28805 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28806 << (GET_MODE (XEXP (x, 1)) != DImode)));
28812 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28814 /* ??? SSE cost should be used here. */
28815 *total = cost->fchs;
28818 else if (X87_FLOAT_MODE_P (mode))
28820 *total = cost->fchs;
28823 else if (FLOAT_MODE_P (mode))
28825 /* ??? SSE vector cost should be used here. */
28826 *total = cost->fchs;
28832 if (!TARGET_64BIT && mode == DImode)
28833 *total = cost->add * 2;
28835 *total = cost->add;
28839 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28840 && XEXP (XEXP (x, 0), 1) == const1_rtx
28841 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28842 && XEXP (x, 1) == const0_rtx)
28844 /* This kind of construct is implemented using test[bwl].
28845 Treat it as if we had an AND. */
28846 *total = (cost->add
28847 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28848 + rtx_cost (const1_rtx, outer_code, speed));
28854 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28859 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28860 /* ??? SSE cost should be used here. */
28861 *total = cost->fabs;
28862 else if (X87_FLOAT_MODE_P (mode))
28863 *total = cost->fabs;
28864 else if (FLOAT_MODE_P (mode))
28865 /* ??? SSE vector cost should be used here. */
28866 *total = cost->fabs;
28870 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28871 /* ??? SSE cost should be used here. */
28872 *total = cost->fsqrt;
28873 else if (X87_FLOAT_MODE_P (mode))
28874 *total = cost->fsqrt;
28875 else if (FLOAT_MODE_P (mode))
28876 /* ??? SSE vector cost should be used here. */
28877 *total = cost->fsqrt;
28881 if (XINT (x, 1) == UNSPEC_TP)
28888 case VEC_DUPLICATE:
28889 /* ??? Assume all of these vector manipulation patterns are
28890 recognizable. In which case they all pretty much have the
28892 *total = COSTS_N_INSNS (1);
28902 static int current_machopic_label_num;
28904 /* Given a symbol name and its associated stub, write out the
28905 definition of the stub. */
28908 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28910 unsigned int length;
28911 char *binder_name, *symbol_name, lazy_ptr_name[32];
28912 int label = ++current_machopic_label_num;
28914 /* For 64-bit we shouldn't get here. */
28915 gcc_assert (!TARGET_64BIT);
28917 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28918 symb = targetm.strip_name_encoding (symb);
28920 length = strlen (stub);
28921 binder_name = XALLOCAVEC (char, length + 32);
28922 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
28924 length = strlen (symb);
28925 symbol_name = XALLOCAVEC (char, length + 32);
28926 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28928 sprintf (lazy_ptr_name, "L%d$lz", label);
28930 if (MACHOPIC_ATT_STUB)
28931 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
28932 else if (MACHOPIC_PURE)
28934 if (TARGET_DEEP_BRANCH_PREDICTION)
28935 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
28937 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
28940 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
28942 fprintf (file, "%s:\n", stub);
28943 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28945 if (MACHOPIC_ATT_STUB)
28947 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
28949 else if (MACHOPIC_PURE)
28952 if (TARGET_DEEP_BRANCH_PREDICTION)
28954 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28955 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
28956 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
28957 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
28961 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
28962 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
28963 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
28965 fprintf (file, "\tjmp\t*%%ecx\n");
28968 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
28970 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
28971 it needs no stub-binding-helper. */
28972 if (MACHOPIC_ATT_STUB)
28975 fprintf (file, "%s:\n", binder_name);
28979 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
28980 fprintf (file, "\tpushl\t%%ecx\n");
28983 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
28985 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
28987 /* N.B. Keep the correspondence of these
28988 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
28989 old-pic/new-pic/non-pic stubs; altering this will break
28990 compatibility with existing dylibs. */
28994 if (TARGET_DEEP_BRANCH_PREDICTION)
28995 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28996 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
28998 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
28999 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29002 /* 16-byte -mdynamic-no-pic stub. */
29003 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29005 fprintf (file, "%s:\n", lazy_ptr_name);
29006 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29007 fprintf (file, ASM_LONG "%s\n", binder_name);
29009 #endif /* TARGET_MACHO */
29011 /* Order the registers for register allocator. */
29014 x86_order_regs_for_local_alloc (void)
29019 /* First allocate the local general purpose registers. */
29020 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29021 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29022 reg_alloc_order [pos++] = i;
29024 /* Global general purpose registers. */
29025 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29026 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29027 reg_alloc_order [pos++] = i;
29029 /* x87 registers come first in case we are doing FP math
29031 if (!TARGET_SSE_MATH)
29032 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29033 reg_alloc_order [pos++] = i;
29035 /* SSE registers. */
29036 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29037 reg_alloc_order [pos++] = i;
29038 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29039 reg_alloc_order [pos++] = i;
29041 /* x87 registers. */
29042 if (TARGET_SSE_MATH)
29043 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29044 reg_alloc_order [pos++] = i;
29046 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29047 reg_alloc_order [pos++] = i;
29049 /* Initialize the rest of array as we do not allocate some registers
29051 while (pos < FIRST_PSEUDO_REGISTER)
29052 reg_alloc_order [pos++] = 0;
29055 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29056 struct attribute_spec.handler. */
29058 ix86_handle_abi_attribute (tree *node, tree name,
29059 tree args ATTRIBUTE_UNUSED,
29060 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29062 if (TREE_CODE (*node) != FUNCTION_TYPE
29063 && TREE_CODE (*node) != METHOD_TYPE
29064 && TREE_CODE (*node) != FIELD_DECL
29065 && TREE_CODE (*node) != TYPE_DECL)
29067 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29069 *no_add_attrs = true;
29074 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29076 *no_add_attrs = true;
29080 /* Can combine regparm with all attributes but fastcall. */
29081 if (is_attribute_p ("ms_abi", name))
29083 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29085 error ("ms_abi and sysv_abi attributes are not compatible");
29090 else if (is_attribute_p ("sysv_abi", name))
29092 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29094 error ("ms_abi and sysv_abi attributes are not compatible");
29103 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29104 struct attribute_spec.handler. */
29106 ix86_handle_struct_attribute (tree *node, tree name,
29107 tree args ATTRIBUTE_UNUSED,
29108 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29111 if (DECL_P (*node))
29113 if (TREE_CODE (*node) == TYPE_DECL)
29114 type = &TREE_TYPE (*node);
29119 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29120 || TREE_CODE (*type) == UNION_TYPE)))
29122 warning (OPT_Wattributes, "%qE attribute ignored",
29124 *no_add_attrs = true;
29127 else if ((is_attribute_p ("ms_struct", name)
29128 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29129 || ((is_attribute_p ("gcc_struct", name)
29130 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29132 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29134 *no_add_attrs = true;
29141 ix86_handle_fndecl_attribute (tree *node, tree name,
29142 tree args ATTRIBUTE_UNUSED,
29143 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29145 if (TREE_CODE (*node) != FUNCTION_DECL)
29147 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29149 *no_add_attrs = true;
29155 ix86_ms_bitfield_layout_p (const_tree record_type)
29157 return ((TARGET_MS_BITFIELD_LAYOUT
29158 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29159 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29162 /* Returns an expression indicating where the this parameter is
29163 located on entry to the FUNCTION. */
29166 x86_this_parameter (tree function)
29168 tree type = TREE_TYPE (function);
29169 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29174 const int *parm_regs;
29176 if (ix86_function_type_abi (type) == MS_ABI)
29177 parm_regs = x86_64_ms_abi_int_parameter_registers;
29179 parm_regs = x86_64_int_parameter_registers;
29180 return gen_rtx_REG (DImode, parm_regs[aggr]);
29183 nregs = ix86_function_regparm (type, function);
29185 if (nregs > 0 && !stdarg_p (type))
29189 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
29190 regno = aggr ? DX_REG : CX_REG;
29191 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
29195 return gen_rtx_MEM (SImode,
29196 plus_constant (stack_pointer_rtx, 4));
29205 return gen_rtx_MEM (SImode,
29206 plus_constant (stack_pointer_rtx, 4));
29209 return gen_rtx_REG (SImode, regno);
29212 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29215 /* Determine whether x86_output_mi_thunk can succeed. */
29218 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29219 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29220 HOST_WIDE_INT vcall_offset, const_tree function)
29222 /* 64-bit can handle anything. */
29226 /* For 32-bit, everything's fine if we have one free register. */
29227 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29230 /* Need a free register for vcall_offset. */
29234 /* Need a free register for GOT references. */
29235 if (flag_pic && !targetm.binds_local_p (function))
29238 /* Otherwise ok. */
29242 /* Output the assembler code for a thunk function. THUNK_DECL is the
29243 declaration for the thunk function itself, FUNCTION is the decl for
29244 the target function. DELTA is an immediate constant offset to be
29245 added to THIS. If VCALL_OFFSET is nonzero, the word at
29246 *(*this + vcall_offset) should be added to THIS. */
29249 x86_output_mi_thunk (FILE *file,
29250 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29251 HOST_WIDE_INT vcall_offset, tree function)
29254 rtx this_param = x86_this_parameter (function);
29257 /* Make sure unwind info is emitted for the thunk if needed. */
29258 final_start_function (emit_barrier (), file, 1);
29260 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29261 pull it in now and let DELTA benefit. */
29262 if (REG_P (this_param))
29263 this_reg = this_param;
29264 else if (vcall_offset)
29266 /* Put the this parameter into %eax. */
29267 xops[0] = this_param;
29268 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29269 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29272 this_reg = NULL_RTX;
29274 /* Adjust the this parameter by a fixed constant. */
29277 xops[0] = GEN_INT (delta);
29278 xops[1] = this_reg ? this_reg : this_param;
29281 if (!x86_64_general_operand (xops[0], DImode))
29283 tmp = gen_rtx_REG (DImode, R10_REG);
29285 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29287 xops[1] = this_param;
29289 if (x86_maybe_negate_const_int (&xops[0], DImode))
29290 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29292 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29294 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29295 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29297 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29300 /* Adjust the this parameter by a value stored in the vtable. */
29304 tmp = gen_rtx_REG (DImode, R10_REG);
29307 int tmp_regno = CX_REG;
29308 if (lookup_attribute ("fastcall",
29309 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29310 || lookup_attribute ("thiscall",
29311 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29312 tmp_regno = AX_REG;
29313 tmp = gen_rtx_REG (SImode, tmp_regno);
29316 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29318 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29320 /* Adjust the this parameter. */
29321 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29322 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29324 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29325 xops[0] = GEN_INT (vcall_offset);
29327 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29328 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29330 xops[1] = this_reg;
29331 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29334 /* If necessary, drop THIS back to its stack slot. */
29335 if (this_reg && this_reg != this_param)
29337 xops[0] = this_reg;
29338 xops[1] = this_param;
29339 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29342 xops[0] = XEXP (DECL_RTL (function), 0);
29345 if (!flag_pic || targetm.binds_local_p (function)
29346 || DEFAULT_ABI == MS_ABI)
29347 output_asm_insn ("jmp\t%P0", xops);
29348 /* All thunks should be in the same object as their target,
29349 and thus binds_local_p should be true. */
29350 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29351 gcc_unreachable ();
29354 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29355 tmp = gen_rtx_CONST (Pmode, tmp);
29356 tmp = gen_rtx_MEM (QImode, tmp);
29358 output_asm_insn ("jmp\t%A0", xops);
29363 if (!flag_pic || targetm.binds_local_p (function))
29364 output_asm_insn ("jmp\t%P0", xops);
29369 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29370 if (TARGET_MACHO_BRANCH_ISLANDS)
29371 sym_ref = (gen_rtx_SYMBOL_REF
29373 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29374 tmp = gen_rtx_MEM (QImode, sym_ref);
29376 output_asm_insn ("jmp\t%0", xops);
29379 #endif /* TARGET_MACHO */
29381 tmp = gen_rtx_REG (SImode, CX_REG);
29382 output_set_got (tmp, NULL_RTX);
29385 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29386 output_asm_insn ("jmp\t{*}%1", xops);
29389 final_end_function ();
29393 x86_file_start (void)
29395 default_file_start ();
29397 darwin_file_start ();
29399 if (X86_FILE_START_VERSION_DIRECTIVE)
29400 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29401 if (X86_FILE_START_FLTUSED)
29402 fputs ("\t.global\t__fltused\n", asm_out_file);
29403 if (ix86_asm_dialect == ASM_INTEL)
29404 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29408 x86_field_alignment (tree field, int computed)
29410 enum machine_mode mode;
29411 tree type = TREE_TYPE (field);
29413 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29415 mode = TYPE_MODE (strip_array_types (type));
29416 if (mode == DFmode || mode == DCmode
29417 || GET_MODE_CLASS (mode) == MODE_INT
29418 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29419 return MIN (32, computed);
29423 /* Output assembler code to FILE to increment profiler label # LABELNO
29424 for profiling a function entry. */
29426 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29428 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29433 #ifndef NO_PROFILE_COUNTERS
29434 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29437 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29438 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29440 fprintf (file, "\tcall\t%s\n", mcount_name);
29444 #ifndef NO_PROFILE_COUNTERS
29445 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29448 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29452 #ifndef NO_PROFILE_COUNTERS
29453 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29456 fprintf (file, "\tcall\t%s\n", mcount_name);
29460 /* We don't have exact information about the insn sizes, but we may assume
29461 quite safely that we are informed about all 1 byte insns and memory
29462 address sizes. This is enough to eliminate unnecessary padding in
29466 min_insn_size (rtx insn)
29470 if (!INSN_P (insn) || !active_insn_p (insn))
29473 /* Discard alignments we've emit and jump instructions. */
29474 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29475 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29477 if (JUMP_TABLE_DATA_P (insn))
29480 /* Important case - calls are always 5 bytes.
29481 It is common to have many calls in the row. */
29483 && symbolic_reference_mentioned_p (PATTERN (insn))
29484 && !SIBLING_CALL_P (insn))
29486 len = get_attr_length (insn);
29490 /* For normal instructions we rely on get_attr_length being exact,
29491 with a few exceptions. */
29492 if (!JUMP_P (insn))
29494 enum attr_type type = get_attr_type (insn);
29499 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29500 || asm_noperands (PATTERN (insn)) >= 0)
29507 /* Otherwise trust get_attr_length. */
29511 l = get_attr_length_address (insn);
29512 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29521 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29523 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29527 ix86_avoid_jump_mispredicts (void)
29529 rtx insn, start = get_insns ();
29530 int nbytes = 0, njumps = 0;
29533 /* Look for all minimal intervals of instructions containing 4 jumps.
29534 The intervals are bounded by START and INSN. NBYTES is the total
29535 size of instructions in the interval including INSN and not including
29536 START. When the NBYTES is smaller than 16 bytes, it is possible
29537 that the end of START and INSN ends up in the same 16byte page.
29539 The smallest offset in the page INSN can start is the case where START
29540 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29541 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29543 for (insn = start; insn; insn = NEXT_INSN (insn))
29547 if (LABEL_P (insn))
29549 int align = label_to_alignment (insn);
29550 int max_skip = label_to_max_skip (insn);
29554 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29555 already in the current 16 byte page, because otherwise
29556 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29557 bytes to reach 16 byte boundary. */
29559 || (align <= 3 && max_skip != (1 << align) - 1))
29562 fprintf (dump_file, "Label %i with max_skip %i\n",
29563 INSN_UID (insn), max_skip);
29566 while (nbytes + max_skip >= 16)
29568 start = NEXT_INSN (start);
29569 if ((JUMP_P (start)
29570 && GET_CODE (PATTERN (start)) != ADDR_VEC
29571 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29573 njumps--, isjump = 1;
29576 nbytes -= min_insn_size (start);
29582 min_size = min_insn_size (insn);
29583 nbytes += min_size;
29585 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29586 INSN_UID (insn), min_size);
29588 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29589 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29597 start = NEXT_INSN (start);
29598 if ((JUMP_P (start)
29599 && GET_CODE (PATTERN (start)) != ADDR_VEC
29600 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29602 njumps--, isjump = 1;
29605 nbytes -= min_insn_size (start);
29607 gcc_assert (njumps >= 0);
29609 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29610 INSN_UID (start), INSN_UID (insn), nbytes);
29612 if (njumps == 3 && isjump && nbytes < 16)
29614 int padsize = 15 - nbytes + min_insn_size (insn);
29617 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29618 INSN_UID (insn), padsize);
29619 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29625 /* AMD Athlon works faster
29626 when RET is not destination of conditional jump or directly preceded
29627 by other jump instruction. We avoid the penalty by inserting NOP just
29628 before the RET instructions in such cases. */
29630 ix86_pad_returns (void)
29635 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29637 basic_block bb = e->src;
29638 rtx ret = BB_END (bb);
29640 bool replace = false;
29642 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29643 || optimize_bb_for_size_p (bb))
29645 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29646 if (active_insn_p (prev) || LABEL_P (prev))
29648 if (prev && LABEL_P (prev))
29653 FOR_EACH_EDGE (e, ei, bb->preds)
29654 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29655 && !(e->flags & EDGE_FALLTHRU))
29660 prev = prev_active_insn (ret);
29662 && ((JUMP_P (prev) && any_condjump_p (prev))
29665 /* Empty functions get branch mispredict even when
29666 the jump destination is not visible to us. */
29667 if (!prev && !optimize_function_for_size_p (cfun))
29672 emit_jump_insn_before (gen_return_internal_long (), ret);
29678 /* Count the minimum number of instructions in BB. Return 4 if the
29679 number of instructions >= 4. */
29682 ix86_count_insn_bb (basic_block bb)
29685 int insn_count = 0;
29687 /* Count number of instructions in this block. Return 4 if the number
29688 of instructions >= 4. */
29689 FOR_BB_INSNS (bb, insn)
29691 /* Only happen in exit blocks. */
29693 && GET_CODE (PATTERN (insn)) == RETURN)
29696 if (NONDEBUG_INSN_P (insn)
29697 && GET_CODE (PATTERN (insn)) != USE
29698 && GET_CODE (PATTERN (insn)) != CLOBBER)
29701 if (insn_count >= 4)
29710 /* Count the minimum number of instructions in code path in BB.
29711 Return 4 if the number of instructions >= 4. */
29714 ix86_count_insn (basic_block bb)
29718 int min_prev_count;
29720 /* Only bother counting instructions along paths with no
29721 more than 2 basic blocks between entry and exit. Given
29722 that BB has an edge to exit, determine if a predecessor
29723 of BB has an edge from entry. If so, compute the number
29724 of instructions in the predecessor block. If there
29725 happen to be multiple such blocks, compute the minimum. */
29726 min_prev_count = 4;
29727 FOR_EACH_EDGE (e, ei, bb->preds)
29730 edge_iterator prev_ei;
29732 if (e->src == ENTRY_BLOCK_PTR)
29734 min_prev_count = 0;
29737 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29739 if (prev_e->src == ENTRY_BLOCK_PTR)
29741 int count = ix86_count_insn_bb (e->src);
29742 if (count < min_prev_count)
29743 min_prev_count = count;
29749 if (min_prev_count < 4)
29750 min_prev_count += ix86_count_insn_bb (bb);
29752 return min_prev_count;
29755 /* Pad short funtion to 4 instructions. */
29758 ix86_pad_short_function (void)
29763 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29765 rtx ret = BB_END (e->src);
29766 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29768 int insn_count = ix86_count_insn (e->src);
29770 /* Pad short function. */
29771 if (insn_count < 4)
29775 /* Find epilogue. */
29778 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29779 insn = PREV_INSN (insn);
29784 /* Two NOPs count as one instruction. */
29785 insn_count = 2 * (4 - insn_count);
29786 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29792 /* Implement machine specific optimizations. We implement padding of returns
29793 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29797 /* We are freeing block_for_insn in the toplev to keep compatibility
29798 with old MDEP_REORGS that are not CFG based. Recompute it now. */
29799 compute_bb_for_insn ();
29801 if (optimize && optimize_function_for_speed_p (cfun))
29803 if (TARGET_PAD_SHORT_FUNCTION)
29804 ix86_pad_short_function ();
29805 else if (TARGET_PAD_RETURNS)
29806 ix86_pad_returns ();
29807 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29808 if (TARGET_FOUR_JUMP_LIMIT)
29809 ix86_avoid_jump_mispredicts ();
29813 /* Run the vzeroupper optimization if needed. */
29814 if (TARGET_VZEROUPPER)
29815 move_or_delete_vzeroupper ();
29818 /* Return nonzero when QImode register that must be represented via REX prefix
29821 x86_extended_QIreg_mentioned_p (rtx insn)
29824 extract_insn_cached (insn);
29825 for (i = 0; i < recog_data.n_operands; i++)
29826 if (REG_P (recog_data.operand[i])
29827 && REGNO (recog_data.operand[i]) > BX_REG)
29832 /* Return nonzero when P points to register encoded via REX prefix.
29833 Called via for_each_rtx. */
29835 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29837 unsigned int regno;
29840 regno = REGNO (*p);
29841 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29844 /* Return true when INSN mentions register that must be encoded using REX
29847 x86_extended_reg_mentioned_p (rtx insn)
29849 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29850 extended_reg_mentioned_1, NULL);
29853 /* If profitable, negate (without causing overflow) integer constant
29854 of mode MODE at location LOC. Return true in this case. */
29856 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29860 if (!CONST_INT_P (*loc))
29866 /* DImode x86_64 constants must fit in 32 bits. */
29867 gcc_assert (x86_64_immediate_operand (*loc, mode));
29878 gcc_unreachable ();
29881 /* Avoid overflows. */
29882 if (mode_signbit_p (mode, *loc))
29885 val = INTVAL (*loc);
29887 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
29888 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
29889 if ((val < 0 && val != -128)
29892 *loc = GEN_INT (-val);
29899 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
29900 optabs would emit if we didn't have TFmode patterns. */
29903 x86_emit_floatuns (rtx operands[2])
29905 rtx neglab, donelab, i0, i1, f0, in, out;
29906 enum machine_mode mode, inmode;
29908 inmode = GET_MODE (operands[1]);
29909 gcc_assert (inmode == SImode || inmode == DImode);
29912 in = force_reg (inmode, operands[1]);
29913 mode = GET_MODE (out);
29914 neglab = gen_label_rtx ();
29915 donelab = gen_label_rtx ();
29916 f0 = gen_reg_rtx (mode);
29918 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
29920 expand_float (out, in, 0);
29922 emit_jump_insn (gen_jump (donelab));
29925 emit_label (neglab);
29927 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
29929 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
29931 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
29933 expand_float (f0, i0, 0);
29935 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
29937 emit_label (donelab);
29940 /* AVX does not support 32-byte integer vector operations,
29941 thus the longest vector we are faced with is V16QImode. */
29942 #define MAX_VECT_LEN 16
29944 struct expand_vec_perm_d
29946 rtx target, op0, op1;
29947 unsigned char perm[MAX_VECT_LEN];
29948 enum machine_mode vmode;
29949 unsigned char nelt;
29953 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
29954 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
29956 /* Get a vector mode of the same size as the original but with elements
29957 twice as wide. This is only guaranteed to apply to integral vectors. */
29959 static inline enum machine_mode
29960 get_mode_wider_vector (enum machine_mode o)
29962 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
29963 enum machine_mode n = GET_MODE_WIDER_MODE (o);
29964 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
29965 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
29969 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29970 with all elements equal to VAR. Return true if successful. */
29973 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
29974 rtx target, rtx val)
29997 /* First attempt to recognize VAL as-is. */
29998 dup = gen_rtx_VEC_DUPLICATE (mode, val);
29999 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30000 if (recog_memoized (insn) < 0)
30003 /* If that fails, force VAL into a register. */
30006 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30007 seq = get_insns ();
30010 emit_insn_before (seq, insn);
30012 ok = recog_memoized (insn) >= 0;
30021 if (TARGET_SSE || TARGET_3DNOW_A)
30025 val = gen_lowpart (SImode, val);
30026 x = gen_rtx_TRUNCATE (HImode, val);
30027 x = gen_rtx_VEC_DUPLICATE (mode, x);
30028 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30041 struct expand_vec_perm_d dperm;
30045 memset (&dperm, 0, sizeof (dperm));
30046 dperm.target = target;
30047 dperm.vmode = mode;
30048 dperm.nelt = GET_MODE_NUNITS (mode);
30049 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30051 /* Extend to SImode using a paradoxical SUBREG. */
30052 tmp1 = gen_reg_rtx (SImode);
30053 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30055 /* Insert the SImode value as low element of a V4SImode vector. */
30056 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30057 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30059 ok = (expand_vec_perm_1 (&dperm)
30060 || expand_vec_perm_broadcast_1 (&dperm));
30072 /* Replicate the value once into the next wider mode and recurse. */
30074 enum machine_mode smode, wsmode, wvmode;
30077 smode = GET_MODE_INNER (mode);
30078 wvmode = get_mode_wider_vector (mode);
30079 wsmode = GET_MODE_INNER (wvmode);
30081 val = convert_modes (wsmode, smode, val, true);
30082 x = expand_simple_binop (wsmode, ASHIFT, val,
30083 GEN_INT (GET_MODE_BITSIZE (smode)),
30084 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30085 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30087 x = gen_lowpart (wvmode, target);
30088 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30096 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30097 rtx x = gen_reg_rtx (hvmode);
30099 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30102 x = gen_rtx_VEC_CONCAT (mode, x, x);
30103 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30112 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30113 whose ONE_VAR element is VAR, and other elements are zero. Return true
30117 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30118 rtx target, rtx var, int one_var)
30120 enum machine_mode vsimode;
30123 bool use_vector_set = false;
30128 /* For SSE4.1, we normally use vector set. But if the second
30129 element is zero and inter-unit moves are OK, we use movq
30131 use_vector_set = (TARGET_64BIT
30133 && !(TARGET_INTER_UNIT_MOVES
30139 use_vector_set = TARGET_SSE4_1;
30142 use_vector_set = TARGET_SSE2;
30145 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30152 use_vector_set = TARGET_AVX;
30155 /* Use ix86_expand_vector_set in 64bit mode only. */
30156 use_vector_set = TARGET_AVX && TARGET_64BIT;
30162 if (use_vector_set)
30164 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30165 var = force_reg (GET_MODE_INNER (mode), var);
30166 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30182 var = force_reg (GET_MODE_INNER (mode), var);
30183 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30184 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30189 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30190 new_target = gen_reg_rtx (mode);
30192 new_target = target;
30193 var = force_reg (GET_MODE_INNER (mode), var);
30194 x = gen_rtx_VEC_DUPLICATE (mode, var);
30195 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30196 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30199 /* We need to shuffle the value to the correct position, so
30200 create a new pseudo to store the intermediate result. */
30202 /* With SSE2, we can use the integer shuffle insns. */
30203 if (mode != V4SFmode && TARGET_SSE2)
30205 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30207 GEN_INT (one_var == 1 ? 0 : 1),
30208 GEN_INT (one_var == 2 ? 0 : 1),
30209 GEN_INT (one_var == 3 ? 0 : 1)));
30210 if (target != new_target)
30211 emit_move_insn (target, new_target);
30215 /* Otherwise convert the intermediate result to V4SFmode and
30216 use the SSE1 shuffle instructions. */
30217 if (mode != V4SFmode)
30219 tmp = gen_reg_rtx (V4SFmode);
30220 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30225 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30227 GEN_INT (one_var == 1 ? 0 : 1),
30228 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30229 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30231 if (mode != V4SFmode)
30232 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30233 else if (tmp != target)
30234 emit_move_insn (target, tmp);
30236 else if (target != new_target)
30237 emit_move_insn (target, new_target);
30242 vsimode = V4SImode;
30248 vsimode = V2SImode;
30254 /* Zero extend the variable element to SImode and recurse. */
30255 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30257 x = gen_reg_rtx (vsimode);
30258 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30260 gcc_unreachable ();
30262 emit_move_insn (target, gen_lowpart (mode, x));
30270 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30271 consisting of the values in VALS. It is known that all elements
30272 except ONE_VAR are constants. Return true if successful. */
30275 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30276 rtx target, rtx vals, int one_var)
30278 rtx var = XVECEXP (vals, 0, one_var);
30279 enum machine_mode wmode;
30282 const_vec = copy_rtx (vals);
30283 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30284 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30292 /* For the two element vectors, it's just as easy to use
30293 the general case. */
30297 /* Use ix86_expand_vector_set in 64bit mode only. */
30320 /* There's no way to set one QImode entry easily. Combine
30321 the variable value with its adjacent constant value, and
30322 promote to an HImode set. */
30323 x = XVECEXP (vals, 0, one_var ^ 1);
30326 var = convert_modes (HImode, QImode, var, true);
30327 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30328 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30329 x = GEN_INT (INTVAL (x) & 0xff);
30333 var = convert_modes (HImode, QImode, var, true);
30334 x = gen_int_mode (INTVAL (x) << 8, HImode);
30336 if (x != const0_rtx)
30337 var = expand_simple_binop (HImode, IOR, var, x, var,
30338 1, OPTAB_LIB_WIDEN);
30340 x = gen_reg_rtx (wmode);
30341 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30342 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30344 emit_move_insn (target, gen_lowpart (mode, x));
30351 emit_move_insn (target, const_vec);
30352 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30356 /* A subroutine of ix86_expand_vector_init_general. Use vector
30357 concatenate to handle the most general case: all values variable,
30358 and none identical. */
30361 ix86_expand_vector_init_concat (enum machine_mode mode,
30362 rtx target, rtx *ops, int n)
30364 enum machine_mode cmode, hmode = VOIDmode;
30365 rtx first[8], second[4];
30405 gcc_unreachable ();
30408 if (!register_operand (ops[1], cmode))
30409 ops[1] = force_reg (cmode, ops[1]);
30410 if (!register_operand (ops[0], cmode))
30411 ops[0] = force_reg (cmode, ops[0]);
30412 emit_insn (gen_rtx_SET (VOIDmode, target,
30413 gen_rtx_VEC_CONCAT (mode, ops[0],
30433 gcc_unreachable ();
30449 gcc_unreachable ();
30454 /* FIXME: We process inputs backward to help RA. PR 36222. */
30457 for (; i > 0; i -= 2, j--)
30459 first[j] = gen_reg_rtx (cmode);
30460 v = gen_rtvec (2, ops[i - 1], ops[i]);
30461 ix86_expand_vector_init (false, first[j],
30462 gen_rtx_PARALLEL (cmode, v));
30468 gcc_assert (hmode != VOIDmode);
30469 for (i = j = 0; i < n; i += 2, j++)
30471 second[j] = gen_reg_rtx (hmode);
30472 ix86_expand_vector_init_concat (hmode, second [j],
30476 ix86_expand_vector_init_concat (mode, target, second, n);
30479 ix86_expand_vector_init_concat (mode, target, first, n);
30483 gcc_unreachable ();
30487 /* A subroutine of ix86_expand_vector_init_general. Use vector
30488 interleave to handle the most general case: all values variable,
30489 and none identical. */
30492 ix86_expand_vector_init_interleave (enum machine_mode mode,
30493 rtx target, rtx *ops, int n)
30495 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30498 rtx (*gen_load_even) (rtx, rtx, rtx);
30499 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30500 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30505 gen_load_even = gen_vec_setv8hi;
30506 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30507 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30508 inner_mode = HImode;
30509 first_imode = V4SImode;
30510 second_imode = V2DImode;
30511 third_imode = VOIDmode;
30514 gen_load_even = gen_vec_setv16qi;
30515 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30516 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30517 inner_mode = QImode;
30518 first_imode = V8HImode;
30519 second_imode = V4SImode;
30520 third_imode = V2DImode;
30523 gcc_unreachable ();
30526 for (i = 0; i < n; i++)
30528 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30529 op0 = gen_reg_rtx (SImode);
30530 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30532 /* Insert the SImode value as low element of V4SImode vector. */
30533 op1 = gen_reg_rtx (V4SImode);
30534 op0 = gen_rtx_VEC_MERGE (V4SImode,
30535 gen_rtx_VEC_DUPLICATE (V4SImode,
30537 CONST0_RTX (V4SImode),
30539 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30541 /* Cast the V4SImode vector back to a vector in orignal mode. */
30542 op0 = gen_reg_rtx (mode);
30543 emit_move_insn (op0, gen_lowpart (mode, op1));
30545 /* Load even elements into the second positon. */
30546 emit_insn (gen_load_even (op0,
30547 force_reg (inner_mode,
30551 /* Cast vector to FIRST_IMODE vector. */
30552 ops[i] = gen_reg_rtx (first_imode);
30553 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30556 /* Interleave low FIRST_IMODE vectors. */
30557 for (i = j = 0; i < n; i += 2, j++)
30559 op0 = gen_reg_rtx (first_imode);
30560 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30562 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30563 ops[j] = gen_reg_rtx (second_imode);
30564 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30567 /* Interleave low SECOND_IMODE vectors. */
30568 switch (second_imode)
30571 for (i = j = 0; i < n / 2; i += 2, j++)
30573 op0 = gen_reg_rtx (second_imode);
30574 emit_insn (gen_interleave_second_low (op0, ops[i],
30577 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30579 ops[j] = gen_reg_rtx (third_imode);
30580 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30582 second_imode = V2DImode;
30583 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30587 op0 = gen_reg_rtx (second_imode);
30588 emit_insn (gen_interleave_second_low (op0, ops[0],
30591 /* Cast the SECOND_IMODE vector back to a vector on original
30593 emit_insn (gen_rtx_SET (VOIDmode, target,
30594 gen_lowpart (mode, op0)));
30598 gcc_unreachable ();
30602 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30603 all values variable, and none identical. */
30606 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30607 rtx target, rtx vals)
30609 rtx ops[32], op0, op1;
30610 enum machine_mode half_mode = VOIDmode;
30617 if (!mmx_ok && !TARGET_SSE)
30629 n = GET_MODE_NUNITS (mode);
30630 for (i = 0; i < n; i++)
30631 ops[i] = XVECEXP (vals, 0, i);
30632 ix86_expand_vector_init_concat (mode, target, ops, n);
30636 half_mode = V16QImode;
30640 half_mode = V8HImode;
30644 n = GET_MODE_NUNITS (mode);
30645 for (i = 0; i < n; i++)
30646 ops[i] = XVECEXP (vals, 0, i);
30647 op0 = gen_reg_rtx (half_mode);
30648 op1 = gen_reg_rtx (half_mode);
30649 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30651 ix86_expand_vector_init_interleave (half_mode, op1,
30652 &ops [n >> 1], n >> 2);
30653 emit_insn (gen_rtx_SET (VOIDmode, target,
30654 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30658 if (!TARGET_SSE4_1)
30666 /* Don't use ix86_expand_vector_init_interleave if we can't
30667 move from GPR to SSE register directly. */
30668 if (!TARGET_INTER_UNIT_MOVES)
30671 n = GET_MODE_NUNITS (mode);
30672 for (i = 0; i < n; i++)
30673 ops[i] = XVECEXP (vals, 0, i);
30674 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30682 gcc_unreachable ();
30686 int i, j, n_elts, n_words, n_elt_per_word;
30687 enum machine_mode inner_mode;
30688 rtx words[4], shift;
30690 inner_mode = GET_MODE_INNER (mode);
30691 n_elts = GET_MODE_NUNITS (mode);
30692 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30693 n_elt_per_word = n_elts / n_words;
30694 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30696 for (i = 0; i < n_words; ++i)
30698 rtx word = NULL_RTX;
30700 for (j = 0; j < n_elt_per_word; ++j)
30702 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30703 elt = convert_modes (word_mode, inner_mode, elt, true);
30709 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30710 word, 1, OPTAB_LIB_WIDEN);
30711 word = expand_simple_binop (word_mode, IOR, word, elt,
30712 word, 1, OPTAB_LIB_WIDEN);
30720 emit_move_insn (target, gen_lowpart (mode, words[0]));
30721 else if (n_words == 2)
30723 rtx tmp = gen_reg_rtx (mode);
30724 emit_clobber (tmp);
30725 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30726 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30727 emit_move_insn (target, tmp);
30729 else if (n_words == 4)
30731 rtx tmp = gen_reg_rtx (V4SImode);
30732 gcc_assert (word_mode == SImode);
30733 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30734 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30735 emit_move_insn (target, gen_lowpart (mode, tmp));
30738 gcc_unreachable ();
30742 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30743 instructions unless MMX_OK is true. */
30746 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30748 enum machine_mode mode = GET_MODE (target);
30749 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30750 int n_elts = GET_MODE_NUNITS (mode);
30751 int n_var = 0, one_var = -1;
30752 bool all_same = true, all_const_zero = true;
30756 for (i = 0; i < n_elts; ++i)
30758 x = XVECEXP (vals, 0, i);
30759 if (!(CONST_INT_P (x)
30760 || GET_CODE (x) == CONST_DOUBLE
30761 || GET_CODE (x) == CONST_FIXED))
30762 n_var++, one_var = i;
30763 else if (x != CONST0_RTX (inner_mode))
30764 all_const_zero = false;
30765 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30769 /* Constants are best loaded from the constant pool. */
30772 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30776 /* If all values are identical, broadcast the value. */
30778 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30779 XVECEXP (vals, 0, 0)))
30782 /* Values where only one field is non-constant are best loaded from
30783 the pool and overwritten via move later. */
30787 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30788 XVECEXP (vals, 0, one_var),
30792 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30796 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30800 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30802 enum machine_mode mode = GET_MODE (target);
30803 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30804 enum machine_mode half_mode;
30805 bool use_vec_merge = false;
30807 static rtx (*gen_extract[6][2]) (rtx, rtx)
30809 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30810 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30811 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30812 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30813 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30814 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30816 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30818 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30819 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30820 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30821 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30822 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30823 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30833 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30834 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30836 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30838 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30839 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30845 use_vec_merge = TARGET_SSE4_1;
30853 /* For the two element vectors, we implement a VEC_CONCAT with
30854 the extraction of the other element. */
30856 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30857 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30860 op0 = val, op1 = tmp;
30862 op0 = tmp, op1 = val;
30864 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30865 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30870 use_vec_merge = TARGET_SSE4_1;
30877 use_vec_merge = true;
30881 /* tmp = target = A B C D */
30882 tmp = copy_to_reg (target);
30883 /* target = A A B B */
30884 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
30885 /* target = X A B B */
30886 ix86_expand_vector_set (false, target, val, 0);
30887 /* target = A X C D */
30888 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30889 const1_rtx, const0_rtx,
30890 GEN_INT (2+4), GEN_INT (3+4)));
30894 /* tmp = target = A B C D */
30895 tmp = copy_to_reg (target);
30896 /* tmp = X B C D */
30897 ix86_expand_vector_set (false, tmp, val, 0);
30898 /* target = A B X D */
30899 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30900 const0_rtx, const1_rtx,
30901 GEN_INT (0+4), GEN_INT (3+4)));
30905 /* tmp = target = A B C D */
30906 tmp = copy_to_reg (target);
30907 /* tmp = X B C D */
30908 ix86_expand_vector_set (false, tmp, val, 0);
30909 /* target = A B X D */
30910 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30911 const0_rtx, const1_rtx,
30912 GEN_INT (2+4), GEN_INT (0+4)));
30916 gcc_unreachable ();
30921 use_vec_merge = TARGET_SSE4_1;
30925 /* Element 0 handled by vec_merge below. */
30928 use_vec_merge = true;
30934 /* With SSE2, use integer shuffles to swap element 0 and ELT,
30935 store into element 0, then shuffle them back. */
30939 order[0] = GEN_INT (elt);
30940 order[1] = const1_rtx;
30941 order[2] = const2_rtx;
30942 order[3] = GEN_INT (3);
30943 order[elt] = const0_rtx;
30945 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30946 order[1], order[2], order[3]));
30948 ix86_expand_vector_set (false, target, val, 0);
30950 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30951 order[1], order[2], order[3]));
30955 /* For SSE1, we have to reuse the V4SF code. */
30956 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
30957 gen_lowpart (SFmode, val), elt);
30962 use_vec_merge = TARGET_SSE2;
30965 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30969 use_vec_merge = TARGET_SSE4_1;
30976 half_mode = V16QImode;
30982 half_mode = V8HImode;
30988 half_mode = V4SImode;
30994 half_mode = V2DImode;
31000 half_mode = V4SFmode;
31006 half_mode = V2DFmode;
31012 /* Compute offset. */
31016 gcc_assert (i <= 1);
31018 /* Extract the half. */
31019 tmp = gen_reg_rtx (half_mode);
31020 emit_insn (gen_extract[j][i] (tmp, target));
31022 /* Put val in tmp at elt. */
31023 ix86_expand_vector_set (false, tmp, val, elt);
31026 emit_insn (gen_insert[j][i] (target, target, tmp));
31035 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31036 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31037 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31041 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31043 emit_move_insn (mem, target);
31045 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31046 emit_move_insn (tmp, val);
31048 emit_move_insn (target, mem);
31053 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31055 enum machine_mode mode = GET_MODE (vec);
31056 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31057 bool use_vec_extr = false;
31070 use_vec_extr = true;
31074 use_vec_extr = TARGET_SSE4_1;
31086 tmp = gen_reg_rtx (mode);
31087 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31088 GEN_INT (elt), GEN_INT (elt),
31089 GEN_INT (elt+4), GEN_INT (elt+4)));
31093 tmp = gen_reg_rtx (mode);
31094 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31098 gcc_unreachable ();
31101 use_vec_extr = true;
31106 use_vec_extr = TARGET_SSE4_1;
31120 tmp = gen_reg_rtx (mode);
31121 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31122 GEN_INT (elt), GEN_INT (elt),
31123 GEN_INT (elt), GEN_INT (elt)));
31127 tmp = gen_reg_rtx (mode);
31128 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31132 gcc_unreachable ();
31135 use_vec_extr = true;
31140 /* For SSE1, we have to reuse the V4SF code. */
31141 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31142 gen_lowpart (V4SFmode, vec), elt);
31148 use_vec_extr = TARGET_SSE2;
31151 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31155 use_vec_extr = TARGET_SSE4_1;
31159 /* ??? Could extract the appropriate HImode element and shift. */
31166 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31167 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31169 /* Let the rtl optimizers know about the zero extension performed. */
31170 if (inner_mode == QImode || inner_mode == HImode)
31172 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31173 target = gen_lowpart (SImode, target);
31176 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31180 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31182 emit_move_insn (mem, vec);
31184 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31185 emit_move_insn (target, tmp);
31189 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31190 pattern to reduce; DEST is the destination; IN is the input vector. */
31193 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31195 rtx tmp1, tmp2, tmp3;
31197 tmp1 = gen_reg_rtx (V4SFmode);
31198 tmp2 = gen_reg_rtx (V4SFmode);
31199 tmp3 = gen_reg_rtx (V4SFmode);
31201 emit_insn (gen_sse_movhlps (tmp1, in, in));
31202 emit_insn (fn (tmp2, tmp1, in));
31204 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31205 const1_rtx, const1_rtx,
31206 GEN_INT (1+4), GEN_INT (1+4)));
31207 emit_insn (fn (dest, tmp2, tmp3));
31210 /* Target hook for scalar_mode_supported_p. */
31212 ix86_scalar_mode_supported_p (enum machine_mode mode)
31214 if (DECIMAL_FLOAT_MODE_P (mode))
31215 return default_decimal_float_supported_p ();
31216 else if (mode == TFmode)
31219 return default_scalar_mode_supported_p (mode);
31222 /* Implements target hook vector_mode_supported_p. */
31224 ix86_vector_mode_supported_p (enum machine_mode mode)
31226 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31228 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31230 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31232 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31234 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31239 /* Target hook for c_mode_for_suffix. */
31240 static enum machine_mode
31241 ix86_c_mode_for_suffix (char suffix)
31251 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31253 We do this in the new i386 backend to maintain source compatibility
31254 with the old cc0-based compiler. */
31257 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31258 tree inputs ATTRIBUTE_UNUSED,
31261 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31263 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31268 /* Implements target vector targetm.asm.encode_section_info. This
31269 is not used by netware. */
31271 static void ATTRIBUTE_UNUSED
31272 ix86_encode_section_info (tree decl, rtx rtl, int first)
31274 default_encode_section_info (decl, rtl, first);
31276 if (TREE_CODE (decl) == VAR_DECL
31277 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31278 && ix86_in_large_data_p (decl))
31279 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31282 /* Worker function for REVERSE_CONDITION. */
31285 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31287 return (mode != CCFPmode && mode != CCFPUmode
31288 ? reverse_condition (code)
31289 : reverse_condition_maybe_unordered (code));
31292 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31296 output_387_reg_move (rtx insn, rtx *operands)
31298 if (REG_P (operands[0]))
31300 if (REG_P (operands[1])
31301 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31303 if (REGNO (operands[0]) == FIRST_STACK_REG)
31304 return output_387_ffreep (operands, 0);
31305 return "fstp\t%y0";
31307 if (STACK_TOP_P (operands[0]))
31308 return "fld%Z1\t%y1";
31311 else if (MEM_P (operands[0]))
31313 gcc_assert (REG_P (operands[1]));
31314 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31315 return "fstp%Z0\t%y0";
31318 /* There is no non-popping store to memory for XFmode.
31319 So if we need one, follow the store with a load. */
31320 if (GET_MODE (operands[0]) == XFmode)
31321 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31323 return "fst%Z0\t%y0";
31330 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31331 FP status register is set. */
31334 ix86_emit_fp_unordered_jump (rtx label)
31336 rtx reg = gen_reg_rtx (HImode);
31339 emit_insn (gen_x86_fnstsw_1 (reg));
31341 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31343 emit_insn (gen_x86_sahf_1 (reg));
31345 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31346 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31350 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31352 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31353 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31356 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31357 gen_rtx_LABEL_REF (VOIDmode, label),
31359 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31361 emit_jump_insn (temp);
31362 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31365 /* Output code to perform a log1p XFmode calculation. */
31367 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31369 rtx label1 = gen_label_rtx ();
31370 rtx label2 = gen_label_rtx ();
31372 rtx tmp = gen_reg_rtx (XFmode);
31373 rtx tmp2 = gen_reg_rtx (XFmode);
31376 emit_insn (gen_absxf2 (tmp, op1));
31377 test = gen_rtx_GE (VOIDmode, tmp,
31378 CONST_DOUBLE_FROM_REAL_VALUE (
31379 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31381 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31383 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31384 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31385 emit_jump (label2);
31387 emit_label (label1);
31388 emit_move_insn (tmp, CONST1_RTX (XFmode));
31389 emit_insn (gen_addxf3 (tmp, op1, tmp));
31390 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31391 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31393 emit_label (label2);
31396 /* Output code to perform a Newton-Rhapson approximation of a single precision
31397 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31399 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31401 rtx x0, x1, e0, e1, two;
31403 x0 = gen_reg_rtx (mode);
31404 e0 = gen_reg_rtx (mode);
31405 e1 = gen_reg_rtx (mode);
31406 x1 = gen_reg_rtx (mode);
31408 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
31410 if (VECTOR_MODE_P (mode))
31411 two = ix86_build_const_vector (mode, true, two);
31413 two = force_reg (mode, two);
31415 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31417 /* x0 = rcp(b) estimate */
31418 emit_insn (gen_rtx_SET (VOIDmode, x0,
31419 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31422 emit_insn (gen_rtx_SET (VOIDmode, e0,
31423 gen_rtx_MULT (mode, x0, a)));
31425 emit_insn (gen_rtx_SET (VOIDmode, e1,
31426 gen_rtx_MULT (mode, x0, b)));
31428 emit_insn (gen_rtx_SET (VOIDmode, x1,
31429 gen_rtx_MINUS (mode, two, e1)));
31430 /* res = e0 * x1 */
31431 emit_insn (gen_rtx_SET (VOIDmode, res,
31432 gen_rtx_MULT (mode, e0, x1)));
31435 /* Output code to perform a Newton-Rhapson approximation of a
31436 single precision floating point [reciprocal] square root. */
31438 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31441 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31444 x0 = gen_reg_rtx (mode);
31445 e0 = gen_reg_rtx (mode);
31446 e1 = gen_reg_rtx (mode);
31447 e2 = gen_reg_rtx (mode);
31448 e3 = gen_reg_rtx (mode);
31450 real_from_integer (&r, VOIDmode, -3, -1, 0);
31451 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31453 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31454 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31456 if (VECTOR_MODE_P (mode))
31458 mthree = ix86_build_const_vector (mode, true, mthree);
31459 mhalf = ix86_build_const_vector (mode, true, mhalf);
31462 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31463 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31465 /* x0 = rsqrt(a) estimate */
31466 emit_insn (gen_rtx_SET (VOIDmode, x0,
31467 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31470 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31475 zero = gen_reg_rtx (mode);
31476 mask = gen_reg_rtx (mode);
31478 zero = force_reg (mode, CONST0_RTX(mode));
31479 emit_insn (gen_rtx_SET (VOIDmode, mask,
31480 gen_rtx_NE (mode, zero, a)));
31482 emit_insn (gen_rtx_SET (VOIDmode, x0,
31483 gen_rtx_AND (mode, x0, mask)));
31487 emit_insn (gen_rtx_SET (VOIDmode, e0,
31488 gen_rtx_MULT (mode, x0, a)));
31490 emit_insn (gen_rtx_SET (VOIDmode, e1,
31491 gen_rtx_MULT (mode, e0, x0)));
31494 mthree = force_reg (mode, mthree);
31495 emit_insn (gen_rtx_SET (VOIDmode, e2,
31496 gen_rtx_PLUS (mode, e1, mthree)));
31498 mhalf = force_reg (mode, mhalf);
31500 /* e3 = -.5 * x0 */
31501 emit_insn (gen_rtx_SET (VOIDmode, e3,
31502 gen_rtx_MULT (mode, x0, mhalf)));
31504 /* e3 = -.5 * e0 */
31505 emit_insn (gen_rtx_SET (VOIDmode, e3,
31506 gen_rtx_MULT (mode, e0, mhalf)));
31507 /* ret = e2 * e3 */
31508 emit_insn (gen_rtx_SET (VOIDmode, res,
31509 gen_rtx_MULT (mode, e2, e3)));
31512 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31514 static void ATTRIBUTE_UNUSED
31515 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31518 /* With Binutils 2.15, the "@unwind" marker must be specified on
31519 every occurrence of the ".eh_frame" section, not just the first
31522 && strcmp (name, ".eh_frame") == 0)
31524 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31525 flags & SECTION_WRITE ? "aw" : "a");
31528 default_elf_asm_named_section (name, flags, decl);
31531 /* Return the mangling of TYPE if it is an extended fundamental type. */
31533 static const char *
31534 ix86_mangle_type (const_tree type)
31536 type = TYPE_MAIN_VARIANT (type);
31538 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31539 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31542 switch (TYPE_MODE (type))
31545 /* __float128 is "g". */
31548 /* "long double" or __float80 is "e". */
31555 /* For 32-bit code we can save PIC register setup by using
31556 __stack_chk_fail_local hidden function instead of calling
31557 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31558 register, so it is better to call __stack_chk_fail directly. */
31561 ix86_stack_protect_fail (void)
31563 return TARGET_64BIT
31564 ? default_external_stack_protect_fail ()
31565 : default_hidden_stack_protect_fail ();
31568 /* Select a format to encode pointers in exception handling data. CODE
31569 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31570 true if the symbol may be affected by dynamic relocations.
31572 ??? All x86 object file formats are capable of representing this.
31573 After all, the relocation needed is the same as for the call insn.
31574 Whether or not a particular assembler allows us to enter such, I
31575 guess we'll have to see. */
31577 asm_preferred_eh_data_format (int code, int global)
31581 int type = DW_EH_PE_sdata8;
31583 || ix86_cmodel == CM_SMALL_PIC
31584 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31585 type = DW_EH_PE_sdata4;
31586 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31588 if (ix86_cmodel == CM_SMALL
31589 || (ix86_cmodel == CM_MEDIUM && code))
31590 return DW_EH_PE_udata4;
31591 return DW_EH_PE_absptr;
31594 /* Expand copysign from SIGN to the positive value ABS_VALUE
31595 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31598 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31600 enum machine_mode mode = GET_MODE (sign);
31601 rtx sgn = gen_reg_rtx (mode);
31602 if (mask == NULL_RTX)
31604 enum machine_mode vmode;
31606 if (mode == SFmode)
31608 else if (mode == DFmode)
31613 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31614 if (!VECTOR_MODE_P (mode))
31616 /* We need to generate a scalar mode mask in this case. */
31617 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31618 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31619 mask = gen_reg_rtx (mode);
31620 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31624 mask = gen_rtx_NOT (mode, mask);
31625 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31626 gen_rtx_AND (mode, mask, sign)));
31627 emit_insn (gen_rtx_SET (VOIDmode, result,
31628 gen_rtx_IOR (mode, abs_value, sgn)));
31631 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31632 mask for masking out the sign-bit is stored in *SMASK, if that is
31635 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31637 enum machine_mode vmode, mode = GET_MODE (op0);
31640 xa = gen_reg_rtx (mode);
31641 if (mode == SFmode)
31643 else if (mode == DFmode)
31647 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31648 if (!VECTOR_MODE_P (mode))
31650 /* We need to generate a scalar mode mask in this case. */
31651 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31652 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31653 mask = gen_reg_rtx (mode);
31654 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31656 emit_insn (gen_rtx_SET (VOIDmode, xa,
31657 gen_rtx_AND (mode, op0, mask)));
31665 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31666 swapping the operands if SWAP_OPERANDS is true. The expanded
31667 code is a forward jump to a newly created label in case the
31668 comparison is true. The generated label rtx is returned. */
31670 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31671 bool swap_operands)
31682 label = gen_label_rtx ();
31683 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31684 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31685 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31686 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31687 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31688 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31689 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31690 JUMP_LABEL (tmp) = label;
31695 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31696 using comparison code CODE. Operands are swapped for the comparison if
31697 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31699 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31700 bool swap_operands)
31702 enum machine_mode mode = GET_MODE (op0);
31703 rtx mask = gen_reg_rtx (mode);
31712 if (mode == DFmode)
31713 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
31714 gen_rtx_fmt_ee (code, mode, op0, op1)));
31716 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
31717 gen_rtx_fmt_ee (code, mode, op0, op1)));
31722 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31723 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31725 ix86_gen_TWO52 (enum machine_mode mode)
31727 REAL_VALUE_TYPE TWO52r;
31730 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31731 TWO52 = const_double_from_real_value (TWO52r, mode);
31732 TWO52 = force_reg (mode, TWO52);
31737 /* Expand SSE sequence for computing lround from OP1 storing
31740 ix86_expand_lround (rtx op0, rtx op1)
31742 /* C code for the stuff we're doing below:
31743 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31746 enum machine_mode mode = GET_MODE (op1);
31747 const struct real_format *fmt;
31748 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31751 /* load nextafter (0.5, 0.0) */
31752 fmt = REAL_MODE_FORMAT (mode);
31753 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31754 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31756 /* adj = copysign (0.5, op1) */
31757 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31758 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31760 /* adj = op1 + adj */
31761 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31763 /* op0 = (imode)adj */
31764 expand_fix (op0, adj, 0);
31767 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31770 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31772 /* C code for the stuff we're doing below (for do_floor):
31774 xi -= (double)xi > op1 ? 1 : 0;
31777 enum machine_mode fmode = GET_MODE (op1);
31778 enum machine_mode imode = GET_MODE (op0);
31779 rtx ireg, freg, label, tmp;
31781 /* reg = (long)op1 */
31782 ireg = gen_reg_rtx (imode);
31783 expand_fix (ireg, op1, 0);
31785 /* freg = (double)reg */
31786 freg = gen_reg_rtx (fmode);
31787 expand_float (freg, ireg, 0);
31789 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31790 label = ix86_expand_sse_compare_and_jump (UNLE,
31791 freg, op1, !do_floor);
31792 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31793 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31794 emit_move_insn (ireg, tmp);
31796 emit_label (label);
31797 LABEL_NUSES (label) = 1;
31799 emit_move_insn (op0, ireg);
31802 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31803 result in OPERAND0. */
31805 ix86_expand_rint (rtx operand0, rtx operand1)
31807 /* C code for the stuff we're doing below:
31808 xa = fabs (operand1);
31809 if (!isless (xa, 2**52))
31811 xa = xa + 2**52 - 2**52;
31812 return copysign (xa, operand1);
31814 enum machine_mode mode = GET_MODE (operand0);
31815 rtx res, xa, label, TWO52, mask;
31817 res = gen_reg_rtx (mode);
31818 emit_move_insn (res, operand1);
31820 /* xa = abs (operand1) */
31821 xa = ix86_expand_sse_fabs (res, &mask);
31823 /* if (!isless (xa, TWO52)) goto label; */
31824 TWO52 = ix86_gen_TWO52 (mode);
31825 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31827 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31828 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31830 ix86_sse_copysign_to_positive (res, xa, res, mask);
31832 emit_label (label);
31833 LABEL_NUSES (label) = 1;
31835 emit_move_insn (operand0, res);
31838 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31841 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31843 /* C code for the stuff we expand below.
31844 double xa = fabs (x), x2;
31845 if (!isless (xa, TWO52))
31847 xa = xa + TWO52 - TWO52;
31848 x2 = copysign (xa, x);
31857 enum machine_mode mode = GET_MODE (operand0);
31858 rtx xa, TWO52, tmp, label, one, res, mask;
31860 TWO52 = ix86_gen_TWO52 (mode);
31862 /* Temporary for holding the result, initialized to the input
31863 operand to ease control flow. */
31864 res = gen_reg_rtx (mode);
31865 emit_move_insn (res, operand1);
31867 /* xa = abs (operand1) */
31868 xa = ix86_expand_sse_fabs (res, &mask);
31870 /* if (!isless (xa, TWO52)) goto label; */
31871 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31873 /* xa = xa + TWO52 - TWO52; */
31874 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31875 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31877 /* xa = copysign (xa, operand1) */
31878 ix86_sse_copysign_to_positive (xa, xa, res, mask);
31880 /* generate 1.0 or -1.0 */
31881 one = force_reg (mode,
31882 const_double_from_real_value (do_floor
31883 ? dconst1 : dconstm1, mode));
31885 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31886 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31887 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31888 gen_rtx_AND (mode, one, tmp)));
31889 /* We always need to subtract here to preserve signed zero. */
31890 tmp = expand_simple_binop (mode, MINUS,
31891 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31892 emit_move_insn (res, tmp);
31894 emit_label (label);
31895 LABEL_NUSES (label) = 1;
31897 emit_move_insn (operand0, res);
31900 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31903 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
31905 /* C code for the stuff we expand below.
31906 double xa = fabs (x), x2;
31907 if (!isless (xa, TWO52))
31909 x2 = (double)(long)x;
31916 if (HONOR_SIGNED_ZEROS (mode))
31917 return copysign (x2, x);
31920 enum machine_mode mode = GET_MODE (operand0);
31921 rtx xa, xi, TWO52, tmp, label, one, res, mask;
31923 TWO52 = ix86_gen_TWO52 (mode);
31925 /* Temporary for holding the result, initialized to the input
31926 operand to ease control flow. */
31927 res = gen_reg_rtx (mode);
31928 emit_move_insn (res, operand1);
31930 /* xa = abs (operand1) */
31931 xa = ix86_expand_sse_fabs (res, &mask);
31933 /* if (!isless (xa, TWO52)) goto label; */
31934 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31936 /* xa = (double)(long)x */
31937 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31938 expand_fix (xi, res, 0);
31939 expand_float (xa, xi, 0);
31942 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31944 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31945 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31946 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31947 gen_rtx_AND (mode, one, tmp)));
31948 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
31949 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31950 emit_move_insn (res, tmp);
31952 if (HONOR_SIGNED_ZEROS (mode))
31953 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31955 emit_label (label);
31956 LABEL_NUSES (label) = 1;
31958 emit_move_insn (operand0, res);
31961 /* Expand SSE sequence for computing round from OPERAND1 storing
31962 into OPERAND0. Sequence that works without relying on DImode truncation
31963 via cvttsd2siq that is only available on 64bit targets. */
31965 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
31967 /* C code for the stuff we expand below.
31968 double xa = fabs (x), xa2, x2;
31969 if (!isless (xa, TWO52))
31971 Using the absolute value and copying back sign makes
31972 -0.0 -> -0.0 correct.
31973 xa2 = xa + TWO52 - TWO52;
31978 else if (dxa > 0.5)
31980 x2 = copysign (xa2, x);
31983 enum machine_mode mode = GET_MODE (operand0);
31984 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
31986 TWO52 = ix86_gen_TWO52 (mode);
31988 /* Temporary for holding the result, initialized to the input
31989 operand to ease control flow. */
31990 res = gen_reg_rtx (mode);
31991 emit_move_insn (res, operand1);
31993 /* xa = abs (operand1) */
31994 xa = ix86_expand_sse_fabs (res, &mask);
31996 /* if (!isless (xa, TWO52)) goto label; */
31997 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31999 /* xa2 = xa + TWO52 - TWO52; */
32000 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32001 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32003 /* dxa = xa2 - xa; */
32004 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32006 /* generate 0.5, 1.0 and -0.5 */
32007 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32008 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32009 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32013 tmp = gen_reg_rtx (mode);
32014 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32015 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32016 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32017 gen_rtx_AND (mode, one, tmp)));
32018 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32019 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32020 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32021 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32022 gen_rtx_AND (mode, one, tmp)));
32023 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32025 /* res = copysign (xa2, operand1) */
32026 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32028 emit_label (label);
32029 LABEL_NUSES (label) = 1;
32031 emit_move_insn (operand0, res);
32034 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32037 ix86_expand_trunc (rtx operand0, rtx operand1)
32039 /* C code for SSE variant we expand below.
32040 double xa = fabs (x), x2;
32041 if (!isless (xa, TWO52))
32043 x2 = (double)(long)x;
32044 if (HONOR_SIGNED_ZEROS (mode))
32045 return copysign (x2, x);
32048 enum machine_mode mode = GET_MODE (operand0);
32049 rtx xa, xi, TWO52, label, res, mask;
32051 TWO52 = ix86_gen_TWO52 (mode);
32053 /* Temporary for holding the result, initialized to the input
32054 operand to ease control flow. */
32055 res = gen_reg_rtx (mode);
32056 emit_move_insn (res, operand1);
32058 /* xa = abs (operand1) */
32059 xa = ix86_expand_sse_fabs (res, &mask);
32061 /* if (!isless (xa, TWO52)) goto label; */
32062 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32064 /* x = (double)(long)x */
32065 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32066 expand_fix (xi, res, 0);
32067 expand_float (res, xi, 0);
32069 if (HONOR_SIGNED_ZEROS (mode))
32070 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32072 emit_label (label);
32073 LABEL_NUSES (label) = 1;
32075 emit_move_insn (operand0, res);
32078 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32081 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32083 enum machine_mode mode = GET_MODE (operand0);
32084 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32086 /* C code for SSE variant we expand below.
32087 double xa = fabs (x), x2;
32088 if (!isless (xa, TWO52))
32090 xa2 = xa + TWO52 - TWO52;
32094 x2 = copysign (xa2, x);
32098 TWO52 = ix86_gen_TWO52 (mode);
32100 /* Temporary for holding the result, initialized to the input
32101 operand to ease control flow. */
32102 res = gen_reg_rtx (mode);
32103 emit_move_insn (res, operand1);
32105 /* xa = abs (operand1) */
32106 xa = ix86_expand_sse_fabs (res, &smask);
32108 /* if (!isless (xa, TWO52)) goto label; */
32109 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32111 /* res = xa + TWO52 - TWO52; */
32112 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32113 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32114 emit_move_insn (res, tmp);
32117 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32119 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32120 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32121 emit_insn (gen_rtx_SET (VOIDmode, mask,
32122 gen_rtx_AND (mode, mask, one)));
32123 tmp = expand_simple_binop (mode, MINUS,
32124 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32125 emit_move_insn (res, tmp);
32127 /* res = copysign (res, operand1) */
32128 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32130 emit_label (label);
32131 LABEL_NUSES (label) = 1;
32133 emit_move_insn (operand0, res);
32136 /* Expand SSE sequence for computing round from OPERAND1 storing
32139 ix86_expand_round (rtx operand0, rtx operand1)
32141 /* C code for the stuff we're doing below:
32142 double xa = fabs (x);
32143 if (!isless (xa, TWO52))
32145 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32146 return copysign (xa, x);
32148 enum machine_mode mode = GET_MODE (operand0);
32149 rtx res, TWO52, xa, label, xi, half, mask;
32150 const struct real_format *fmt;
32151 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32153 /* Temporary for holding the result, initialized to the input
32154 operand to ease control flow. */
32155 res = gen_reg_rtx (mode);
32156 emit_move_insn (res, operand1);
32158 TWO52 = ix86_gen_TWO52 (mode);
32159 xa = ix86_expand_sse_fabs (res, &mask);
32160 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32162 /* load nextafter (0.5, 0.0) */
32163 fmt = REAL_MODE_FORMAT (mode);
32164 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32165 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32167 /* xa = xa + 0.5 */
32168 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32169 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32171 /* xa = (double)(int64_t)xa */
32172 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32173 expand_fix (xi, xa, 0);
32174 expand_float (xa, xi, 0);
32176 /* res = copysign (xa, operand1) */
32177 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32179 emit_label (label);
32180 LABEL_NUSES (label) = 1;
32182 emit_move_insn (operand0, res);
32186 /* Table of valid machine attributes. */
32187 static const struct attribute_spec ix86_attribute_table[] =
32189 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
32190 /* Stdcall attribute says callee is responsible for popping arguments
32191 if they are not variable. */
32192 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32193 /* Fastcall attribute says callee is responsible for popping arguments
32194 if they are not variable. */
32195 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32196 /* Thiscall attribute says callee is responsible for popping arguments
32197 if they are not variable. */
32198 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32199 /* Cdecl attribute says the callee is a normal C declaration */
32200 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32201 /* Regparm attribute specifies how many integer arguments are to be
32202 passed in registers. */
32203 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
32204 /* Sseregparm attribute says we are using x86_64 calling conventions
32205 for FP arguments. */
32206 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32207 /* force_align_arg_pointer says this function realigns the stack at entry. */
32208 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32209 false, true, true, ix86_handle_cconv_attribute },
32210 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32211 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
32212 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
32213 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
32215 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32216 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32217 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32218 SUBTARGET_ATTRIBUTE_TABLE,
32220 /* ms_abi and sysv_abi calling convention function attributes. */
32221 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32222 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32223 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
32225 { NULL, 0, 0, false, false, false, NULL }
32228 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32230 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32231 tree vectype ATTRIBUTE_UNUSED,
32232 int misalign ATTRIBUTE_UNUSED)
32234 switch (type_of_cost)
32237 return ix86_cost->scalar_stmt_cost;
32240 return ix86_cost->scalar_load_cost;
32243 return ix86_cost->scalar_store_cost;
32246 return ix86_cost->vec_stmt_cost;
32249 return ix86_cost->vec_align_load_cost;
32252 return ix86_cost->vec_store_cost;
32254 case vec_to_scalar:
32255 return ix86_cost->vec_to_scalar_cost;
32257 case scalar_to_vec:
32258 return ix86_cost->scalar_to_vec_cost;
32260 case unaligned_load:
32261 case unaligned_store:
32262 return ix86_cost->vec_unalign_load_cost;
32264 case cond_branch_taken:
32265 return ix86_cost->cond_taken_branch_cost;
32267 case cond_branch_not_taken:
32268 return ix86_cost->cond_not_taken_branch_cost;
32274 gcc_unreachable ();
32279 /* Implement targetm.vectorize.builtin_vec_perm. */
32282 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32284 tree itype = TREE_TYPE (vec_type);
32285 bool u = TYPE_UNSIGNED (itype);
32286 enum machine_mode vmode = TYPE_MODE (vec_type);
32287 enum ix86_builtins fcode;
32288 bool ok = TARGET_SSE2;
32294 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32297 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32299 itype = ix86_get_builtin_type (IX86_BT_DI);
32304 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32308 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32310 itype = ix86_get_builtin_type (IX86_BT_SI);
32314 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32317 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32320 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32323 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32333 *mask_type = itype;
32334 return ix86_builtins[(int) fcode];
32337 /* Return a vector mode with twice as many elements as VMODE. */
32338 /* ??? Consider moving this to a table generated by genmodes.c. */
32340 static enum machine_mode
32341 doublesize_vector_mode (enum machine_mode vmode)
32345 case V2SFmode: return V4SFmode;
32346 case V1DImode: return V2DImode;
32347 case V2SImode: return V4SImode;
32348 case V4HImode: return V8HImode;
32349 case V8QImode: return V16QImode;
32351 case V2DFmode: return V4DFmode;
32352 case V4SFmode: return V8SFmode;
32353 case V2DImode: return V4DImode;
32354 case V4SImode: return V8SImode;
32355 case V8HImode: return V16HImode;
32356 case V16QImode: return V32QImode;
32358 case V4DFmode: return V8DFmode;
32359 case V8SFmode: return V16SFmode;
32360 case V4DImode: return V8DImode;
32361 case V8SImode: return V16SImode;
32362 case V16HImode: return V32HImode;
32363 case V32QImode: return V64QImode;
32366 gcc_unreachable ();
32370 /* Construct (set target (vec_select op0 (parallel perm))) and
32371 return true if that's a valid instruction in the active ISA. */
32374 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32376 rtx rperm[MAX_VECT_LEN], x;
32379 for (i = 0; i < nelt; ++i)
32380 rperm[i] = GEN_INT (perm[i]);
32382 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32383 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32384 x = gen_rtx_SET (VOIDmode, target, x);
32387 if (recog_memoized (x) < 0)
32395 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32398 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32399 const unsigned char *perm, unsigned nelt)
32401 enum machine_mode v2mode;
32404 v2mode = doublesize_vector_mode (GET_MODE (op0));
32405 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32406 return expand_vselect (target, x, perm, nelt);
32409 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32410 in terms of blendp[sd] / pblendw / pblendvb. */
32413 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32415 enum machine_mode vmode = d->vmode;
32416 unsigned i, mask, nelt = d->nelt;
32417 rtx target, op0, op1, x;
32419 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32421 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32424 /* This is a blend, not a permute. Elements must stay in their
32425 respective lanes. */
32426 for (i = 0; i < nelt; ++i)
32428 unsigned e = d->perm[i];
32429 if (!(e == i || e == i + nelt))
32436 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32437 decision should be extracted elsewhere, so that we only try that
32438 sequence once all budget==3 options have been tried. */
32440 /* For bytes, see if bytes move in pairs so we can use pblendw with
32441 an immediate argument, rather than pblendvb with a vector argument. */
32442 if (vmode == V16QImode)
32444 bool pblendw_ok = true;
32445 for (i = 0; i < 16 && pblendw_ok; i += 2)
32446 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32450 rtx rperm[16], vperm;
32452 for (i = 0; i < nelt; ++i)
32453 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32455 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32456 vperm = force_reg (V16QImode, vperm);
32458 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32463 target = d->target;
32475 for (i = 0; i < nelt; ++i)
32476 mask |= (d->perm[i] >= nelt) << i;
32480 for (i = 0; i < 2; ++i)
32481 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32485 for (i = 0; i < 4; ++i)
32486 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32490 for (i = 0; i < 8; ++i)
32491 mask |= (d->perm[i * 2] >= 16) << i;
32495 target = gen_lowpart (vmode, target);
32496 op0 = gen_lowpart (vmode, op0);
32497 op1 = gen_lowpart (vmode, op1);
32501 gcc_unreachable ();
32504 /* This matches five different patterns with the different modes. */
32505 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32506 x = gen_rtx_SET (VOIDmode, target, x);
32512 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32513 in terms of the variable form of vpermilps.
32515 Note that we will have already failed the immediate input vpermilps,
32516 which requires that the high and low part shuffle be identical; the
32517 variable form doesn't require that. */
32520 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32522 rtx rperm[8], vperm;
32525 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32528 /* We can only permute within the 128-bit lane. */
32529 for (i = 0; i < 8; ++i)
32531 unsigned e = d->perm[i];
32532 if (i < 4 ? e >= 4 : e < 4)
32539 for (i = 0; i < 8; ++i)
32541 unsigned e = d->perm[i];
32543 /* Within each 128-bit lane, the elements of op0 are numbered
32544 from 0 and the elements of op1 are numbered from 4. */
32550 rperm[i] = GEN_INT (e);
32553 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32554 vperm = force_reg (V8SImode, vperm);
32555 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32560 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32561 in terms of pshufb or vpperm. */
32564 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32566 unsigned i, nelt, eltsz;
32567 rtx rperm[16], vperm, target, op0, op1;
32569 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32571 if (GET_MODE_SIZE (d->vmode) != 16)
32578 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32580 for (i = 0; i < nelt; ++i)
32582 unsigned j, e = d->perm[i];
32583 for (j = 0; j < eltsz; ++j)
32584 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32587 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32588 vperm = force_reg (V16QImode, vperm);
32590 target = gen_lowpart (V16QImode, d->target);
32591 op0 = gen_lowpart (V16QImode, d->op0);
32592 if (d->op0 == d->op1)
32593 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32596 op1 = gen_lowpart (V16QImode, d->op1);
32597 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32603 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32604 in a single instruction. */
32607 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32609 unsigned i, nelt = d->nelt;
32610 unsigned char perm2[MAX_VECT_LEN];
32612 /* Check plain VEC_SELECT first, because AVX has instructions that could
32613 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32614 input where SEL+CONCAT may not. */
32615 if (d->op0 == d->op1)
32617 int mask = nelt - 1;
32619 for (i = 0; i < nelt; i++)
32620 perm2[i] = d->perm[i] & mask;
32622 if (expand_vselect (d->target, d->op0, perm2, nelt))
32625 /* There are plenty of patterns in sse.md that are written for
32626 SEL+CONCAT and are not replicated for a single op. Perhaps
32627 that should be changed, to avoid the nastiness here. */
32629 /* Recognize interleave style patterns, which means incrementing
32630 every other permutation operand. */
32631 for (i = 0; i < nelt; i += 2)
32633 perm2[i] = d->perm[i] & mask;
32634 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32636 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32639 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32642 for (i = 0; i < nelt; i += 4)
32644 perm2[i + 0] = d->perm[i + 0] & mask;
32645 perm2[i + 1] = d->perm[i + 1] & mask;
32646 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32647 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32650 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32655 /* Finally, try the fully general two operand permute. */
32656 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32659 /* Recognize interleave style patterns with reversed operands. */
32660 if (d->op0 != d->op1)
32662 for (i = 0; i < nelt; ++i)
32664 unsigned e = d->perm[i];
32672 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32676 /* Try the SSE4.1 blend variable merge instructions. */
32677 if (expand_vec_perm_blend (d))
32680 /* Try one of the AVX vpermil variable permutations. */
32681 if (expand_vec_perm_vpermil (d))
32684 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32685 if (expand_vec_perm_pshufb (d))
32691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32692 in terms of a pair of pshuflw + pshufhw instructions. */
32695 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32697 unsigned char perm2[MAX_VECT_LEN];
32701 if (d->vmode != V8HImode || d->op0 != d->op1)
32704 /* The two permutations only operate in 64-bit lanes. */
32705 for (i = 0; i < 4; ++i)
32706 if (d->perm[i] >= 4)
32708 for (i = 4; i < 8; ++i)
32709 if (d->perm[i] < 4)
32715 /* Emit the pshuflw. */
32716 memcpy (perm2, d->perm, 4);
32717 for (i = 4; i < 8; ++i)
32719 ok = expand_vselect (d->target, d->op0, perm2, 8);
32722 /* Emit the pshufhw. */
32723 memcpy (perm2 + 4, d->perm + 4, 4);
32724 for (i = 0; i < 4; ++i)
32726 ok = expand_vselect (d->target, d->target, perm2, 8);
32732 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32733 the permutation using the SSSE3 palignr instruction. This succeeds
32734 when all of the elements in PERM fit within one vector and we merely
32735 need to shift them down so that a single vector permutation has a
32736 chance to succeed. */
32739 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32741 unsigned i, nelt = d->nelt;
32746 /* Even with AVX, palignr only operates on 128-bit vectors. */
32747 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32750 min = nelt, max = 0;
32751 for (i = 0; i < nelt; ++i)
32753 unsigned e = d->perm[i];
32759 if (min == 0 || max - min >= nelt)
32762 /* Given that we have SSSE3, we know we'll be able to implement the
32763 single operand permutation after the palignr with pshufb. */
32767 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32768 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32769 gen_lowpart (TImode, d->op1),
32770 gen_lowpart (TImode, d->op0), shift));
32772 d->op0 = d->op1 = d->target;
32775 for (i = 0; i < nelt; ++i)
32777 unsigned e = d->perm[i] - min;
32783 /* Test for the degenerate case where the alignment by itself
32784 produces the desired permutation. */
32788 ok = expand_vec_perm_1 (d);
32794 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32795 a two vector permutation into a single vector permutation by using
32796 an interleave operation to merge the vectors. */
32799 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32801 struct expand_vec_perm_d dremap, dfinal;
32802 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32803 unsigned contents, h1, h2, h3, h4;
32804 unsigned char remap[2 * MAX_VECT_LEN];
32808 if (d->op0 == d->op1)
32811 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32812 lanes. We can use similar techniques with the vperm2f128 instruction,
32813 but it requires slightly different logic. */
32814 if (GET_MODE_SIZE (d->vmode) != 16)
32817 /* Examine from whence the elements come. */
32819 for (i = 0; i < nelt; ++i)
32820 contents |= 1u << d->perm[i];
32822 /* Split the two input vectors into 4 halves. */
32823 h1 = (1u << nelt2) - 1;
32828 memset (remap, 0xff, sizeof (remap));
32831 /* If the elements from the low halves use interleave low, and similarly
32832 for interleave high. If the elements are from mis-matched halves, we
32833 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32834 if ((contents & (h1 | h3)) == contents)
32836 for (i = 0; i < nelt2; ++i)
32839 remap[i + nelt] = i * 2 + 1;
32840 dremap.perm[i * 2] = i;
32841 dremap.perm[i * 2 + 1] = i + nelt;
32844 else if ((contents & (h2 | h4)) == contents)
32846 for (i = 0; i < nelt2; ++i)
32848 remap[i + nelt2] = i * 2;
32849 remap[i + nelt + nelt2] = i * 2 + 1;
32850 dremap.perm[i * 2] = i + nelt2;
32851 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32854 else if ((contents & (h1 | h4)) == contents)
32856 for (i = 0; i < nelt2; ++i)
32859 remap[i + nelt + nelt2] = i + nelt2;
32860 dremap.perm[i] = i;
32861 dremap.perm[i + nelt2] = i + nelt + nelt2;
32865 dremap.vmode = V2DImode;
32867 dremap.perm[0] = 0;
32868 dremap.perm[1] = 3;
32871 else if ((contents & (h2 | h3)) == contents)
32873 for (i = 0; i < nelt2; ++i)
32875 remap[i + nelt2] = i;
32876 remap[i + nelt] = i + nelt2;
32877 dremap.perm[i] = i + nelt2;
32878 dremap.perm[i + nelt2] = i + nelt;
32882 dremap.vmode = V2DImode;
32884 dremap.perm[0] = 1;
32885 dremap.perm[1] = 2;
32891 /* Use the remapping array set up above to move the elements from their
32892 swizzled locations into their final destinations. */
32894 for (i = 0; i < nelt; ++i)
32896 unsigned e = remap[d->perm[i]];
32897 gcc_assert (e < nelt);
32898 dfinal.perm[i] = e;
32900 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
32901 dfinal.op1 = dfinal.op0;
32902 dremap.target = dfinal.op0;
32904 /* Test if the final remap can be done with a single insn. For V4SFmode or
32905 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
32907 ok = expand_vec_perm_1 (&dfinal);
32908 seq = get_insns ();
32914 if (dremap.vmode != dfinal.vmode)
32916 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
32917 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
32918 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
32921 ok = expand_vec_perm_1 (&dremap);
32928 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
32929 permutation with two pshufb insns and an ior. We should have already
32930 failed all two instruction sequences. */
32933 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
32935 rtx rperm[2][16], vperm, l, h, op, m128;
32936 unsigned int i, nelt, eltsz;
32938 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32940 gcc_assert (d->op0 != d->op1);
32943 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32945 /* Generate two permutation masks. If the required element is within
32946 the given vector it is shuffled into the proper lane. If the required
32947 element is in the other vector, force a zero into the lane by setting
32948 bit 7 in the permutation mask. */
32949 m128 = GEN_INT (-128);
32950 for (i = 0; i < nelt; ++i)
32952 unsigned j, e = d->perm[i];
32953 unsigned which = (e >= nelt);
32957 for (j = 0; j < eltsz; ++j)
32959 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
32960 rperm[1-which][i*eltsz + j] = m128;
32964 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
32965 vperm = force_reg (V16QImode, vperm);
32967 l = gen_reg_rtx (V16QImode);
32968 op = gen_lowpart (V16QImode, d->op0);
32969 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
32971 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
32972 vperm = force_reg (V16QImode, vperm);
32974 h = gen_reg_rtx (V16QImode);
32975 op = gen_lowpart (V16QImode, d->op1);
32976 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
32978 op = gen_lowpart (V16QImode, d->target);
32979 emit_insn (gen_iorv16qi3 (op, l, h));
32984 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
32985 and extract-odd permutations. */
32988 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
32995 t1 = gen_reg_rtx (V4DFmode);
32996 t2 = gen_reg_rtx (V4DFmode);
32998 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
32999 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33000 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33002 /* Now an unpck[lh]pd will produce the result required. */
33004 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33006 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33012 int mask = odd ? 0xdd : 0x88;
33014 t1 = gen_reg_rtx (V8SFmode);
33015 t2 = gen_reg_rtx (V8SFmode);
33016 t3 = gen_reg_rtx (V8SFmode);
33018 /* Shuffle within the 128-bit lanes to produce:
33019 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33020 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33023 /* Shuffle the lanes around to produce:
33024 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33025 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33028 /* Shuffle within the 128-bit lanes to produce:
33029 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33030 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33032 /* Shuffle within the 128-bit lanes to produce:
33033 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33034 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33036 /* Shuffle the lanes around to produce:
33037 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33038 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33047 /* These are always directly implementable by expand_vec_perm_1. */
33048 gcc_unreachable ();
33052 return expand_vec_perm_pshufb2 (d);
33055 /* We need 2*log2(N)-1 operations to achieve odd/even
33056 with interleave. */
33057 t1 = gen_reg_rtx (V8HImode);
33058 t2 = gen_reg_rtx (V8HImode);
33059 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33060 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33061 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33062 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33064 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33066 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33073 return expand_vec_perm_pshufb2 (d);
33076 t1 = gen_reg_rtx (V16QImode);
33077 t2 = gen_reg_rtx (V16QImode);
33078 t3 = gen_reg_rtx (V16QImode);
33079 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33080 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33081 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33082 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33083 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33084 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33086 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33088 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33094 gcc_unreachable ();
33100 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33101 extract-even and extract-odd permutations. */
33104 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33106 unsigned i, odd, nelt = d->nelt;
33109 if (odd != 0 && odd != 1)
33112 for (i = 1; i < nelt; ++i)
33113 if (d->perm[i] != 2 * i + odd)
33116 return expand_vec_perm_even_odd_1 (d, odd);
33119 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33120 permutations. We assume that expand_vec_perm_1 has already failed. */
33123 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33125 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33126 enum machine_mode vmode = d->vmode;
33127 unsigned char perm2[4];
33135 /* These are special-cased in sse.md so that we can optionally
33136 use the vbroadcast instruction. They expand to two insns
33137 if the input happens to be in a register. */
33138 gcc_unreachable ();
33144 /* These are always implementable using standard shuffle patterns. */
33145 gcc_unreachable ();
33149 /* These can be implemented via interleave. We save one insn by
33150 stopping once we have promoted to V4SImode and then use pshufd. */
33153 optab otab = vec_interleave_low_optab;
33157 otab = vec_interleave_high_optab;
33162 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33163 vmode = get_mode_wider_vector (vmode);
33164 op0 = gen_lowpart (vmode, op0);
33166 while (vmode != V4SImode);
33168 memset (perm2, elt, 4);
33169 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33174 gcc_unreachable ();
33178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33179 broadcast permutations. */
33182 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33184 unsigned i, elt, nelt = d->nelt;
33186 if (d->op0 != d->op1)
33190 for (i = 1; i < nelt; ++i)
33191 if (d->perm[i] != elt)
33194 return expand_vec_perm_broadcast_1 (d);
33197 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33198 With all of the interface bits taken care of, perform the expansion
33199 in D and return true on success. */
33202 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33204 /* Try a single instruction expansion. */
33205 if (expand_vec_perm_1 (d))
33208 /* Try sequences of two instructions. */
33210 if (expand_vec_perm_pshuflw_pshufhw (d))
33213 if (expand_vec_perm_palignr (d))
33216 if (expand_vec_perm_interleave2 (d))
33219 if (expand_vec_perm_broadcast (d))
33222 /* Try sequences of three instructions. */
33224 if (expand_vec_perm_pshufb2 (d))
33227 /* ??? Look for narrow permutations whose element orderings would
33228 allow the promotion to a wider mode. */
33230 /* ??? Look for sequences of interleave or a wider permute that place
33231 the data into the correct lanes for a half-vector shuffle like
33232 pshuf[lh]w or vpermilps. */
33234 /* ??? Look for sequences of interleave that produce the desired results.
33235 The combinatorics of punpck[lh] get pretty ugly... */
33237 if (expand_vec_perm_even_odd (d))
33243 /* Extract the values from the vector CST into the permutation array in D.
33244 Return 0 on error, 1 if all values from the permutation come from the
33245 first vector, 2 if all values from the second vector, and 3 otherwise. */
33248 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33250 tree list = TREE_VECTOR_CST_ELTS (cst);
33251 unsigned i, nelt = d->nelt;
33254 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33256 unsigned HOST_WIDE_INT e;
33258 if (!host_integerp (TREE_VALUE (list), 1))
33260 e = tree_low_cst (TREE_VALUE (list), 1);
33264 ret |= (e < nelt ? 1 : 2);
33267 gcc_assert (list == NULL);
33269 /* For all elements from second vector, fold the elements to first. */
33271 for (i = 0; i < nelt; ++i)
33272 d->perm[i] -= nelt;
33278 ix86_expand_vec_perm_builtin (tree exp)
33280 struct expand_vec_perm_d d;
33281 tree arg0, arg1, arg2;
33283 arg0 = CALL_EXPR_ARG (exp, 0);
33284 arg1 = CALL_EXPR_ARG (exp, 1);
33285 arg2 = CALL_EXPR_ARG (exp, 2);
33287 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33288 d.nelt = GET_MODE_NUNITS (d.vmode);
33289 d.testing_p = false;
33290 gcc_assert (VECTOR_MODE_P (d.vmode));
33292 if (TREE_CODE (arg2) != VECTOR_CST)
33294 error_at (EXPR_LOCATION (exp),
33295 "vector permutation requires vector constant");
33299 switch (extract_vec_perm_cst (&d, arg2))
33305 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33309 if (!operand_equal_p (arg0, arg1, 0))
33311 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33312 d.op0 = force_reg (d.vmode, d.op0);
33313 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33314 d.op1 = force_reg (d.vmode, d.op1);
33318 /* The elements of PERM do not suggest that only the first operand
33319 is used, but both operands are identical. Allow easier matching
33320 of the permutation by folding the permutation into the single
33323 unsigned i, nelt = d.nelt;
33324 for (i = 0; i < nelt; ++i)
33325 if (d.perm[i] >= nelt)
33331 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33332 d.op0 = force_reg (d.vmode, d.op0);
33337 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33338 d.op0 = force_reg (d.vmode, d.op0);
33343 d.target = gen_reg_rtx (d.vmode);
33344 if (ix86_expand_vec_perm_builtin_1 (&d))
33347 /* For compiler generated permutations, we should never got here, because
33348 the compiler should also be checking the ok hook. But since this is a
33349 builtin the user has access too, so don't abort. */
33353 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33356 sorry ("vector permutation (%d %d %d %d)",
33357 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33360 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33361 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33362 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33365 sorry ("vector permutation "
33366 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33367 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33368 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33369 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33370 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33373 gcc_unreachable ();
33376 return CONST0_RTX (d.vmode);
33379 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33382 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33384 struct expand_vec_perm_d d;
33388 d.vmode = TYPE_MODE (vec_type);
33389 d.nelt = GET_MODE_NUNITS (d.vmode);
33390 d.testing_p = true;
33392 /* Given sufficient ISA support we can just return true here
33393 for selected vector modes. */
33394 if (GET_MODE_SIZE (d.vmode) == 16)
33396 /* All implementable with a single vpperm insn. */
33399 /* All implementable with 2 pshufb + 1 ior. */
33402 /* All implementable with shufpd or unpck[lh]pd. */
33407 vec_mask = extract_vec_perm_cst (&d, mask);
33409 /* This hook is cannot be called in response to something that the
33410 user does (unlike the builtin expander) so we shouldn't ever see
33411 an error generated from the extract. */
33412 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33413 one_vec = (vec_mask != 3);
33415 /* Implementable with shufps or pshufd. */
33416 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33419 /* Otherwise we have to go through the motions and see if we can
33420 figure out how to generate the requested permutation. */
33421 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33422 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33424 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33427 ret = ix86_expand_vec_perm_builtin_1 (&d);
33434 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33436 struct expand_vec_perm_d d;
33442 d.vmode = GET_MODE (targ);
33443 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33444 d.testing_p = false;
33446 for (i = 0; i < nelt; ++i)
33447 d.perm[i] = i * 2 + odd;
33449 /* We'll either be able to implement the permutation directly... */
33450 if (expand_vec_perm_1 (&d))
33453 /* ... or we use the special-case patterns. */
33454 expand_vec_perm_even_odd_1 (&d, odd);
33457 /* This function returns the calling abi specific va_list type node.
33458 It returns the FNDECL specific va_list type. */
33461 ix86_fn_abi_va_list (tree fndecl)
33464 return va_list_type_node;
33465 gcc_assert (fndecl != NULL_TREE);
33467 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33468 return ms_va_list_type_node;
33470 return sysv_va_list_type_node;
33473 /* Returns the canonical va_list type specified by TYPE. If there
33474 is no valid TYPE provided, it return NULL_TREE. */
33477 ix86_canonical_va_list_type (tree type)
33481 /* Resolve references and pointers to va_list type. */
33482 if (TREE_CODE (type) == MEM_REF)
33483 type = TREE_TYPE (type);
33484 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33485 type = TREE_TYPE (type);
33486 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33487 type = TREE_TYPE (type);
33491 wtype = va_list_type_node;
33492 gcc_assert (wtype != NULL_TREE);
33494 if (TREE_CODE (wtype) == ARRAY_TYPE)
33496 /* If va_list is an array type, the argument may have decayed
33497 to a pointer type, e.g. by being passed to another function.
33498 In that case, unwrap both types so that we can compare the
33499 underlying records. */
33500 if (TREE_CODE (htype) == ARRAY_TYPE
33501 || POINTER_TYPE_P (htype))
33503 wtype = TREE_TYPE (wtype);
33504 htype = TREE_TYPE (htype);
33507 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33508 return va_list_type_node;
33509 wtype = sysv_va_list_type_node;
33510 gcc_assert (wtype != NULL_TREE);
33512 if (TREE_CODE (wtype) == ARRAY_TYPE)
33514 /* If va_list is an array type, the argument may have decayed
33515 to a pointer type, e.g. by being passed to another function.
33516 In that case, unwrap both types so that we can compare the
33517 underlying records. */
33518 if (TREE_CODE (htype) == ARRAY_TYPE
33519 || POINTER_TYPE_P (htype))
33521 wtype = TREE_TYPE (wtype);
33522 htype = TREE_TYPE (htype);
33525 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33526 return sysv_va_list_type_node;
33527 wtype = ms_va_list_type_node;
33528 gcc_assert (wtype != NULL_TREE);
33530 if (TREE_CODE (wtype) == ARRAY_TYPE)
33532 /* If va_list is an array type, the argument may have decayed
33533 to a pointer type, e.g. by being passed to another function.
33534 In that case, unwrap both types so that we can compare the
33535 underlying records. */
33536 if (TREE_CODE (htype) == ARRAY_TYPE
33537 || POINTER_TYPE_P (htype))
33539 wtype = TREE_TYPE (wtype);
33540 htype = TREE_TYPE (htype);
33543 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33544 return ms_va_list_type_node;
33547 return std_canonical_va_list_type (type);
33550 /* Iterate through the target-specific builtin types for va_list.
33551 IDX denotes the iterator, *PTREE is set to the result type of
33552 the va_list builtin, and *PNAME to its internal type.
33553 Returns zero if there is no element for this index, otherwise
33554 IDX should be increased upon the next call.
33555 Note, do not iterate a base builtin's name like __builtin_va_list.
33556 Used from c_common_nodes_and_builtins. */
33559 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33569 *ptree = ms_va_list_type_node;
33570 *pname = "__builtin_ms_va_list";
33574 *ptree = sysv_va_list_type_node;
33575 *pname = "__builtin_sysv_va_list";
33583 #undef TARGET_SCHED_DISPATCH
33584 #define TARGET_SCHED_DISPATCH has_dispatch
33585 #undef TARGET_SCHED_DISPATCH_DO
33586 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33588 /* The size of the dispatch window is the total number of bytes of
33589 object code allowed in a window. */
33590 #define DISPATCH_WINDOW_SIZE 16
33592 /* Number of dispatch windows considered for scheduling. */
33593 #define MAX_DISPATCH_WINDOWS 3
33595 /* Maximum number of instructions in a window. */
33598 /* Maximum number of immediate operands in a window. */
33601 /* Maximum number of immediate bits allowed in a window. */
33602 #define MAX_IMM_SIZE 128
33604 /* Maximum number of 32 bit immediates allowed in a window. */
33605 #define MAX_IMM_32 4
33607 /* Maximum number of 64 bit immediates allowed in a window. */
33608 #define MAX_IMM_64 2
33610 /* Maximum total of loads or prefetches allowed in a window. */
33613 /* Maximum total of stores allowed in a window. */
33614 #define MAX_STORE 1
33620 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33621 enum dispatch_group {
33636 /* Number of allowable groups in a dispatch window. It is an array
33637 indexed by dispatch_group enum. 100 is used as a big number,
33638 because the number of these kind of operations does not have any
33639 effect in dispatch window, but we need them for other reasons in
33641 static unsigned int num_allowable_groups[disp_last] = {
33642 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33645 char group_name[disp_last + 1][16] = {
33646 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33647 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33648 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33651 /* Instruction path. */
33654 path_single, /* Single micro op. */
33655 path_double, /* Double micro op. */
33656 path_multi, /* Instructions with more than 2 micro op.. */
33660 /* sched_insn_info defines a window to the instructions scheduled in
33661 the basic block. It contains a pointer to the insn_info table and
33662 the instruction scheduled.
33664 Windows are allocated for each basic block and are linked
33666 typedef struct sched_insn_info_s {
33668 enum dispatch_group group;
33669 enum insn_path path;
33674 /* Linked list of dispatch windows. This is a two way list of
33675 dispatch windows of a basic block. It contains information about
33676 the number of uops in the window and the total number of
33677 instructions and of bytes in the object code for this dispatch
33679 typedef struct dispatch_windows_s {
33680 int num_insn; /* Number of insn in the window. */
33681 int num_uops; /* Number of uops in the window. */
33682 int window_size; /* Number of bytes in the window. */
33683 int window_num; /* Window number between 0 or 1. */
33684 int num_imm; /* Number of immediates in an insn. */
33685 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33686 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33687 int imm_size; /* Total immediates in the window. */
33688 int num_loads; /* Total memory loads in the window. */
33689 int num_stores; /* Total memory stores in the window. */
33690 int violation; /* Violation exists in window. */
33691 sched_insn_info *window; /* Pointer to the window. */
33692 struct dispatch_windows_s *next;
33693 struct dispatch_windows_s *prev;
33694 } dispatch_windows;
33696 /* Immediate valuse used in an insn. */
33697 typedef struct imm_info_s
33704 static dispatch_windows *dispatch_window_list;
33705 static dispatch_windows *dispatch_window_list1;
33707 /* Get dispatch group of insn. */
33709 static enum dispatch_group
33710 get_mem_group (rtx insn)
33712 enum attr_memory memory;
33714 if (INSN_CODE (insn) < 0)
33715 return disp_no_group;
33716 memory = get_attr_memory (insn);
33717 if (memory == MEMORY_STORE)
33720 if (memory == MEMORY_LOAD)
33723 if (memory == MEMORY_BOTH)
33724 return disp_load_store;
33726 return disp_no_group;
33729 /* Return true if insn is a compare instruction. */
33734 enum attr_type type;
33736 type = get_attr_type (insn);
33737 return (type == TYPE_TEST
33738 || type == TYPE_ICMP
33739 || type == TYPE_FCMP
33740 || GET_CODE (PATTERN (insn)) == COMPARE);
33743 /* Return true if a dispatch violation encountered. */
33746 dispatch_violation (void)
33748 if (dispatch_window_list->next)
33749 return dispatch_window_list->next->violation;
33750 return dispatch_window_list->violation;
33753 /* Return true if insn is a branch instruction. */
33756 is_branch (rtx insn)
33758 return (CALL_P (insn) || JUMP_P (insn));
33761 /* Return true if insn is a prefetch instruction. */
33764 is_prefetch (rtx insn)
33766 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33769 /* This function initializes a dispatch window and the list container holding a
33770 pointer to the window. */
33773 init_window (int window_num)
33776 dispatch_windows *new_list;
33778 if (window_num == 0)
33779 new_list = dispatch_window_list;
33781 new_list = dispatch_window_list1;
33783 new_list->num_insn = 0;
33784 new_list->num_uops = 0;
33785 new_list->window_size = 0;
33786 new_list->next = NULL;
33787 new_list->prev = NULL;
33788 new_list->window_num = window_num;
33789 new_list->num_imm = 0;
33790 new_list->num_imm_32 = 0;
33791 new_list->num_imm_64 = 0;
33792 new_list->imm_size = 0;
33793 new_list->num_loads = 0;
33794 new_list->num_stores = 0;
33795 new_list->violation = false;
33797 for (i = 0; i < MAX_INSN; i++)
33799 new_list->window[i].insn = NULL;
33800 new_list->window[i].group = disp_no_group;
33801 new_list->window[i].path = no_path;
33802 new_list->window[i].byte_len = 0;
33803 new_list->window[i].imm_bytes = 0;
33808 /* This function allocates and initializes a dispatch window and the
33809 list container holding a pointer to the window. */
33811 static dispatch_windows *
33812 allocate_window (void)
33814 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33815 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33820 /* This routine initializes the dispatch scheduling information. It
33821 initiates building dispatch scheduler tables and constructs the
33822 first dispatch window. */
33825 init_dispatch_sched (void)
33827 /* Allocate a dispatch list and a window. */
33828 dispatch_window_list = allocate_window ();
33829 dispatch_window_list1 = allocate_window ();
33834 /* This function returns true if a branch is detected. End of a basic block
33835 does not have to be a branch, but here we assume only branches end a
33839 is_end_basic_block (enum dispatch_group group)
33841 return group == disp_branch;
33844 /* This function is called when the end of a window processing is reached. */
33847 process_end_window (void)
33849 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
33850 if (dispatch_window_list->next)
33852 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
33853 gcc_assert (dispatch_window_list->window_size
33854 + dispatch_window_list1->window_size <= 48);
33860 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
33861 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
33862 for 48 bytes of instructions. Note that these windows are not dispatch
33863 windows that their sizes are DISPATCH_WINDOW_SIZE. */
33865 static dispatch_windows *
33866 allocate_next_window (int window_num)
33868 if (window_num == 0)
33870 if (dispatch_window_list->next)
33873 return dispatch_window_list;
33876 dispatch_window_list->next = dispatch_window_list1;
33877 dispatch_window_list1->prev = dispatch_window_list;
33879 return dispatch_window_list1;
33882 /* Increment the number of immediate operands of an instruction. */
33885 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
33890 switch ( GET_CODE (*in_rtx))
33895 (imm_values->imm)++;
33896 if (x86_64_immediate_operand (*in_rtx, SImode))
33897 (imm_values->imm32)++;
33899 (imm_values->imm64)++;
33903 (imm_values->imm)++;
33904 (imm_values->imm64)++;
33908 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
33910 (imm_values->imm)++;
33911 (imm_values->imm32)++;
33922 /* Compute number of immediate operands of an instruction. */
33925 find_constant (rtx in_rtx, imm_info *imm_values)
33927 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
33928 (rtx_function) find_constant_1, (void *) imm_values);
33931 /* Return total size of immediate operands of an instruction along with number
33932 of corresponding immediate-operands. It initializes its parameters to zero
33933 befor calling FIND_CONSTANT.
33934 INSN is the input instruction. IMM is the total of immediates.
33935 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
33939 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
33941 imm_info imm_values = {0, 0, 0};
33943 find_constant (insn, &imm_values);
33944 *imm = imm_values.imm;
33945 *imm32 = imm_values.imm32;
33946 *imm64 = imm_values.imm64;
33947 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
33950 /* This function indicates if an operand of an instruction is an
33954 has_immediate (rtx insn)
33956 int num_imm_operand;
33957 int num_imm32_operand;
33958 int num_imm64_operand;
33961 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33962 &num_imm64_operand);
33966 /* Return single or double path for instructions. */
33968 static enum insn_path
33969 get_insn_path (rtx insn)
33971 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
33973 if ((int)path == 0)
33974 return path_single;
33976 if ((int)path == 1)
33977 return path_double;
33982 /* Return insn dispatch group. */
33984 static enum dispatch_group
33985 get_insn_group (rtx insn)
33987 enum dispatch_group group = get_mem_group (insn);
33991 if (is_branch (insn))
33992 return disp_branch;
33997 if (has_immediate (insn))
34000 if (is_prefetch (insn))
34001 return disp_prefetch;
34003 return disp_no_group;
34006 /* Count number of GROUP restricted instructions in a dispatch
34007 window WINDOW_LIST. */
34010 count_num_restricted (rtx insn, dispatch_windows *window_list)
34012 enum dispatch_group group = get_insn_group (insn);
34014 int num_imm_operand;
34015 int num_imm32_operand;
34016 int num_imm64_operand;
34018 if (group == disp_no_group)
34021 if (group == disp_imm)
34023 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34024 &num_imm64_operand);
34025 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34026 || num_imm_operand + window_list->num_imm > MAX_IMM
34027 || (num_imm32_operand > 0
34028 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34029 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34030 || (num_imm64_operand > 0
34031 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34032 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34033 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34034 && num_imm64_operand > 0
34035 && ((window_list->num_imm_64 > 0
34036 && window_list->num_insn >= 2)
34037 || window_list->num_insn >= 3)))
34043 if ((group == disp_load_store
34044 && (window_list->num_loads >= MAX_LOAD
34045 || window_list->num_stores >= MAX_STORE))
34046 || ((group == disp_load
34047 || group == disp_prefetch)
34048 && window_list->num_loads >= MAX_LOAD)
34049 || (group == disp_store
34050 && window_list->num_stores >= MAX_STORE))
34056 /* This function returns true if insn satisfies dispatch rules on the
34057 last window scheduled. */
34060 fits_dispatch_window (rtx insn)
34062 dispatch_windows *window_list = dispatch_window_list;
34063 dispatch_windows *window_list_next = dispatch_window_list->next;
34064 unsigned int num_restrict;
34065 enum dispatch_group group = get_insn_group (insn);
34066 enum insn_path path = get_insn_path (insn);
34069 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34070 instructions should be given the lowest priority in the
34071 scheduling process in Haifa scheduler to make sure they will be
34072 scheduled in the same dispatch window as the refrence to them. */
34073 if (group == disp_jcc || group == disp_cmp)
34076 /* Check nonrestricted. */
34077 if (group == disp_no_group || group == disp_branch)
34080 /* Get last dispatch window. */
34081 if (window_list_next)
34082 window_list = window_list_next;
34084 if (window_list->window_num == 1)
34086 sum = window_list->prev->window_size + window_list->window_size;
34089 || (min_insn_size (insn) + sum) >= 48)
34090 /* Window 1 is full. Go for next window. */
34094 num_restrict = count_num_restricted (insn, window_list);
34096 if (num_restrict > num_allowable_groups[group])
34099 /* See if it fits in the first window. */
34100 if (window_list->window_num == 0)
34102 /* The first widow should have only single and double path
34104 if (path == path_double
34105 && (window_list->num_uops + 2) > MAX_INSN)
34107 else if (path != path_single)
34113 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34114 dispatch window WINDOW_LIST. */
34117 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34119 int byte_len = min_insn_size (insn);
34120 int num_insn = window_list->num_insn;
34122 sched_insn_info *window = window_list->window;
34123 enum dispatch_group group = get_insn_group (insn);
34124 enum insn_path path = get_insn_path (insn);
34125 int num_imm_operand;
34126 int num_imm32_operand;
34127 int num_imm64_operand;
34129 if (!window_list->violation && group != disp_cmp
34130 && !fits_dispatch_window (insn))
34131 window_list->violation = true;
34133 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34134 &num_imm64_operand);
34136 /* Initialize window with new instruction. */
34137 window[num_insn].insn = insn;
34138 window[num_insn].byte_len = byte_len;
34139 window[num_insn].group = group;
34140 window[num_insn].path = path;
34141 window[num_insn].imm_bytes = imm_size;
34143 window_list->window_size += byte_len;
34144 window_list->num_insn = num_insn + 1;
34145 window_list->num_uops = window_list->num_uops + num_uops;
34146 window_list->imm_size += imm_size;
34147 window_list->num_imm += num_imm_operand;
34148 window_list->num_imm_32 += num_imm32_operand;
34149 window_list->num_imm_64 += num_imm64_operand;
34151 if (group == disp_store)
34152 window_list->num_stores += 1;
34153 else if (group == disp_load
34154 || group == disp_prefetch)
34155 window_list->num_loads += 1;
34156 else if (group == disp_load_store)
34158 window_list->num_stores += 1;
34159 window_list->num_loads += 1;
34163 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34164 If the total bytes of instructions or the number of instructions in
34165 the window exceed allowable, it allocates a new window. */
34168 add_to_dispatch_window (rtx insn)
34171 dispatch_windows *window_list;
34172 dispatch_windows *next_list;
34173 dispatch_windows *window0_list;
34174 enum insn_path path;
34175 enum dispatch_group insn_group;
34183 if (INSN_CODE (insn) < 0)
34186 byte_len = min_insn_size (insn);
34187 window_list = dispatch_window_list;
34188 next_list = window_list->next;
34189 path = get_insn_path (insn);
34190 insn_group = get_insn_group (insn);
34192 /* Get the last dispatch window. */
34194 window_list = dispatch_window_list->next;
34196 if (path == path_single)
34198 else if (path == path_double)
34201 insn_num_uops = (int) path;
34203 /* If current window is full, get a new window.
34204 Window number zero is full, if MAX_INSN uops are scheduled in it.
34205 Window number one is full, if window zero's bytes plus window
34206 one's bytes is 32, or if the bytes of the new instruction added
34207 to the total makes it greater than 48, or it has already MAX_INSN
34208 instructions in it. */
34209 num_insn = window_list->num_insn;
34210 num_uops = window_list->num_uops;
34211 window_num = window_list->window_num;
34212 insn_fits = fits_dispatch_window (insn);
34214 if (num_insn >= MAX_INSN
34215 || num_uops + insn_num_uops > MAX_INSN
34218 window_num = ~window_num & 1;
34219 window_list = allocate_next_window (window_num);
34222 if (window_num == 0)
34224 add_insn_window (insn, window_list, insn_num_uops);
34225 if (window_list->num_insn >= MAX_INSN
34226 && insn_group == disp_branch)
34228 process_end_window ();
34232 else if (window_num == 1)
34234 window0_list = window_list->prev;
34235 sum = window0_list->window_size + window_list->window_size;
34237 || (byte_len + sum) >= 48)
34239 process_end_window ();
34240 window_list = dispatch_window_list;
34243 add_insn_window (insn, window_list, insn_num_uops);
34246 gcc_unreachable ();
34248 if (is_end_basic_block (insn_group))
34250 /* End of basic block is reached do end-basic-block process. */
34251 process_end_window ();
34256 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34258 DEBUG_FUNCTION static void
34259 debug_dispatch_window_file (FILE *file, int window_num)
34261 dispatch_windows *list;
34264 if (window_num == 0)
34265 list = dispatch_window_list;
34267 list = dispatch_window_list1;
34269 fprintf (file, "Window #%d:\n", list->window_num);
34270 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34271 list->num_insn, list->num_uops, list->window_size);
34272 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34273 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34275 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34277 fprintf (file, " insn info:\n");
34279 for (i = 0; i < MAX_INSN; i++)
34281 if (!list->window[i].insn)
34283 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34284 i, group_name[list->window[i].group],
34285 i, (void *)list->window[i].insn,
34286 i, list->window[i].path,
34287 i, list->window[i].byte_len,
34288 i, list->window[i].imm_bytes);
34292 /* Print to stdout a dispatch window. */
34294 DEBUG_FUNCTION void
34295 debug_dispatch_window (int window_num)
34297 debug_dispatch_window_file (stdout, window_num);
34300 /* Print INSN dispatch information to FILE. */
34302 DEBUG_FUNCTION static void
34303 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34306 enum insn_path path;
34307 enum dispatch_group group;
34309 int num_imm_operand;
34310 int num_imm32_operand;
34311 int num_imm64_operand;
34313 if (INSN_CODE (insn) < 0)
34316 byte_len = min_insn_size (insn);
34317 path = get_insn_path (insn);
34318 group = get_insn_group (insn);
34319 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34320 &num_imm64_operand);
34322 fprintf (file, " insn info:\n");
34323 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34324 group_name[group], path, byte_len);
34325 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34326 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34329 /* Print to STDERR the status of the ready list with respect to
34330 dispatch windows. */
34332 DEBUG_FUNCTION void
34333 debug_ready_dispatch (void)
34336 int no_ready = number_in_ready ();
34338 fprintf (stdout, "Number of ready: %d\n", no_ready);
34340 for (i = 0; i < no_ready; i++)
34341 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34344 /* This routine is the driver of the dispatch scheduler. */
34347 do_dispatch (rtx insn, int mode)
34349 if (mode == DISPATCH_INIT)
34350 init_dispatch_sched ();
34351 else if (mode == ADD_TO_DISPATCH_WINDOW)
34352 add_to_dispatch_window (insn);
34355 /* Return TRUE if Dispatch Scheduling is supported. */
34358 has_dispatch (rtx insn, int action)
34360 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34366 case IS_DISPATCH_ON:
34371 return is_cmp (insn);
34373 case DISPATCH_VIOLATION:
34374 return dispatch_violation ();
34376 case FITS_DISPATCH_WINDOW:
34377 return fits_dispatch_window (insn);
34383 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34384 place emms and femms instructions. */
34386 static enum machine_mode
34387 ix86_preferred_simd_mode (enum machine_mode mode)
34389 /* Disable double precision vectorizer if needed. */
34390 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34393 if (!TARGET_AVX && !TARGET_SSE)
34399 return TARGET_AVX ? V8SFmode : V4SFmode;
34401 return TARGET_AVX ? V4DFmode : V2DFmode;
34417 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34420 static unsigned int
34421 ix86_autovectorize_vector_sizes (void)
34423 return TARGET_AVX ? 32 | 16 : 0;
34426 /* Initialize the GCC target structure. */
34427 #undef TARGET_RETURN_IN_MEMORY
34428 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34430 #undef TARGET_LEGITIMIZE_ADDRESS
34431 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34433 #undef TARGET_ATTRIBUTE_TABLE
34434 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34435 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34436 # undef TARGET_MERGE_DECL_ATTRIBUTES
34437 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34440 #undef TARGET_COMP_TYPE_ATTRIBUTES
34441 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34443 #undef TARGET_INIT_BUILTINS
34444 #define TARGET_INIT_BUILTINS ix86_init_builtins
34445 #undef TARGET_BUILTIN_DECL
34446 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34447 #undef TARGET_EXPAND_BUILTIN
34448 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34452 ix86_builtin_vectorized_function
34454 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34455 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34457 #undef TARGET_BUILTIN_RECIPROCAL
34458 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34460 #undef TARGET_ASM_FUNCTION_EPILOGUE
34461 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34463 #undef TARGET_ENCODE_SECTION_INFO
34464 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34465 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34467 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34470 #undef TARGET_ASM_OPEN_PAREN
34471 #define TARGET_ASM_OPEN_PAREN ""
34472 #undef TARGET_ASM_CLOSE_PAREN
34473 #define TARGET_ASM_CLOSE_PAREN ""
34475 #undef TARGET_ASM_BYTE_OP
34476 #define TARGET_ASM_BYTE_OP ASM_BYTE
34478 #undef TARGET_ASM_ALIGNED_HI_OP
34479 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34480 #undef TARGET_ASM_ALIGNED_SI_OP
34481 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34483 #undef TARGET_ASM_ALIGNED_DI_OP
34484 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34487 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34488 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34490 #undef TARGET_ASM_UNALIGNED_HI_OP
34491 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34492 #undef TARGET_ASM_UNALIGNED_SI_OP
34493 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34494 #undef TARGET_ASM_UNALIGNED_DI_OP
34495 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34497 #undef TARGET_PRINT_OPERAND
34498 #define TARGET_PRINT_OPERAND ix86_print_operand
34499 #undef TARGET_PRINT_OPERAND_ADDRESS
34500 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34501 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34502 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34503 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34504 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34506 #undef TARGET_SCHED_INIT_GLOBAL
34507 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34508 #undef TARGET_SCHED_ADJUST_COST
34509 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34510 #undef TARGET_SCHED_ISSUE_RATE
34511 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34512 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34513 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34514 ia32_multipass_dfa_lookahead
34516 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34517 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34520 #undef TARGET_HAVE_TLS
34521 #define TARGET_HAVE_TLS true
34523 #undef TARGET_CANNOT_FORCE_CONST_MEM
34524 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34525 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34526 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34528 #undef TARGET_DELEGITIMIZE_ADDRESS
34529 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34531 #undef TARGET_MS_BITFIELD_LAYOUT_P
34532 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34535 #undef TARGET_BINDS_LOCAL_P
34536 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34538 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34539 #undef TARGET_BINDS_LOCAL_P
34540 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34543 #undef TARGET_ASM_OUTPUT_MI_THUNK
34544 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34545 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34546 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34548 #undef TARGET_ASM_FILE_START
34549 #define TARGET_ASM_FILE_START x86_file_start
34551 #undef TARGET_DEFAULT_TARGET_FLAGS
34552 #define TARGET_DEFAULT_TARGET_FLAGS \
34554 | TARGET_SUBTARGET_DEFAULT \
34555 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
34557 #undef TARGET_HANDLE_OPTION
34558 #define TARGET_HANDLE_OPTION ix86_handle_option
34560 #undef TARGET_OPTION_OVERRIDE
34561 #define TARGET_OPTION_OVERRIDE ix86_option_override
34562 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34563 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34564 #undef TARGET_OPTION_INIT_STRUCT
34565 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34567 #undef TARGET_REGISTER_MOVE_COST
34568 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34569 #undef TARGET_MEMORY_MOVE_COST
34570 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34571 #undef TARGET_RTX_COSTS
34572 #define TARGET_RTX_COSTS ix86_rtx_costs
34573 #undef TARGET_ADDRESS_COST
34574 #define TARGET_ADDRESS_COST ix86_address_cost
34576 #undef TARGET_FIXED_CONDITION_CODE_REGS
34577 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34578 #undef TARGET_CC_MODES_COMPATIBLE
34579 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34581 #undef TARGET_MACHINE_DEPENDENT_REORG
34582 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34584 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34585 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34587 #undef TARGET_BUILD_BUILTIN_VA_LIST
34588 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34590 #undef TARGET_ENUM_VA_LIST_P
34591 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34593 #undef TARGET_FN_ABI_VA_LIST
34594 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34596 #undef TARGET_CANONICAL_VA_LIST_TYPE
34597 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34599 #undef TARGET_EXPAND_BUILTIN_VA_START
34600 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34602 #undef TARGET_MD_ASM_CLOBBERS
34603 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34605 #undef TARGET_PROMOTE_PROTOTYPES
34606 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34607 #undef TARGET_STRUCT_VALUE_RTX
34608 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34609 #undef TARGET_SETUP_INCOMING_VARARGS
34610 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34611 #undef TARGET_MUST_PASS_IN_STACK
34612 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34613 #undef TARGET_FUNCTION_ARG_ADVANCE
34614 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34615 #undef TARGET_FUNCTION_ARG
34616 #define TARGET_FUNCTION_ARG ix86_function_arg
34617 #undef TARGET_FUNCTION_ARG_BOUNDARY
34618 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34619 #undef TARGET_PASS_BY_REFERENCE
34620 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34621 #undef TARGET_INTERNAL_ARG_POINTER
34622 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34623 #undef TARGET_UPDATE_STACK_BOUNDARY
34624 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34625 #undef TARGET_GET_DRAP_RTX
34626 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34627 #undef TARGET_STRICT_ARGUMENT_NAMING
34628 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34629 #undef TARGET_STATIC_CHAIN
34630 #define TARGET_STATIC_CHAIN ix86_static_chain
34631 #undef TARGET_TRAMPOLINE_INIT
34632 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34633 #undef TARGET_RETURN_POPS_ARGS
34634 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34636 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34637 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34640 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34642 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34643 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34645 #undef TARGET_C_MODE_FOR_SUFFIX
34646 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34649 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34650 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34653 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34654 #undef TARGET_INSERT_ATTRIBUTES
34655 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34658 #undef TARGET_MANGLE_TYPE
34659 #define TARGET_MANGLE_TYPE ix86_mangle_type
34661 #undef TARGET_STACK_PROTECT_FAIL
34662 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34664 #undef TARGET_SUPPORTS_SPLIT_STACK
34665 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34667 #undef TARGET_FUNCTION_VALUE
34668 #define TARGET_FUNCTION_VALUE ix86_function_value
34670 #undef TARGET_FUNCTION_VALUE_REGNO_P
34671 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34673 #undef TARGET_SECONDARY_RELOAD
34674 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34676 #undef TARGET_PREFERRED_RELOAD_CLASS
34677 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34678 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34679 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34680 #undef TARGET_CLASS_LIKELY_SPILLED_P
34681 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34683 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34684 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34685 ix86_builtin_vectorization_cost
34686 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34687 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34688 ix86_vectorize_builtin_vec_perm
34689 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34690 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34691 ix86_vectorize_builtin_vec_perm_ok
34692 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34693 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34694 ix86_preferred_simd_mode
34695 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34696 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34697 ix86_autovectorize_vector_sizes
34699 #undef TARGET_SET_CURRENT_FUNCTION
34700 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34702 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34703 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34705 #undef TARGET_OPTION_SAVE
34706 #define TARGET_OPTION_SAVE ix86_function_specific_save
34708 #undef TARGET_OPTION_RESTORE
34709 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34711 #undef TARGET_OPTION_PRINT
34712 #define TARGET_OPTION_PRINT ix86_function_specific_print
34714 #undef TARGET_CAN_INLINE_P
34715 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34717 #undef TARGET_EXPAND_TO_RTL_HOOK
34718 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34720 #undef TARGET_LEGITIMATE_ADDRESS_P
34721 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34723 #undef TARGET_IRA_COVER_CLASSES
34724 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
34726 #undef TARGET_FRAME_POINTER_REQUIRED
34727 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34729 #undef TARGET_CAN_ELIMINATE
34730 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34732 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34733 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34735 #undef TARGET_ASM_CODE_END
34736 #define TARGET_ASM_CODE_END ix86_code_end
34738 #undef TARGET_CONDITIONAL_REGISTER_USAGE
34739 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
34741 struct gcc_target targetm = TARGET_INITIALIZER;
34743 #include "gt-i386.h"