1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 enum upper_128bits_state
67 typedef struct block_info_def
69 /* State of the upper 128bits of AVX registers at exit. */
70 enum upper_128bits_state state;
71 /* TRUE if state of the upper 128bits of AVX registers is unchanged
74 /* TRUE if block has been processed. */
78 #define BLOCK_INFO(B) ((block_info) (B)->aux)
80 enum call_avx256_state
82 /* Callee returns 256bit AVX register. */
83 callee_return_avx256 = -1,
84 /* Callee returns and passes 256bit AVX register. */
85 callee_return_pass_avx256,
86 /* Callee passes 256bit AVX register. */
88 /* Callee doesn't return nor passe 256bit AVX register, or no
89 256bit AVX register in function return. */
91 /* vzeroupper intrinsic. */
95 /* Check if a 256bit AVX register is referenced in stores. */
98 check_avx256_stores (rtx dest, const_rtx set, void *data)
101 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
102 || (GET_CODE (set) == SET
103 && REG_P (SET_SRC (set))
104 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
106 enum upper_128bits_state *state
107 = (enum upper_128bits_state *) data;
112 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
113 in basic block BB. Delete it if upper 128bit AVX registers are
114 unused. If it isn't deleted, move it to just before a jump insn.
116 STATE is state of the upper 128bits of AVX registers at entry. */
119 move_or_delete_vzeroupper_2 (basic_block bb,
120 enum upper_128bits_state state)
123 rtx vzeroupper_insn = NULL_RTX;
128 if (BLOCK_INFO (bb)->unchanged)
131 fprintf (dump_file, " [bb %i] unchanged: upper 128bits: %d\n",
134 BLOCK_INFO (bb)->state = state;
139 fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
144 /* BB_END changes when it is deleted. */
145 bb_end = BB_END (bb);
147 while (insn != bb_end)
149 insn = NEXT_INSN (insn);
151 if (!NONDEBUG_INSN_P (insn))
154 /* Move vzeroupper before jump/call. */
155 if (JUMP_P (insn) || CALL_P (insn))
157 if (!vzeroupper_insn)
160 if (PREV_INSN (insn) != vzeroupper_insn)
164 fprintf (dump_file, "Move vzeroupper after:\n");
165 print_rtl_single (dump_file, PREV_INSN (insn));
166 fprintf (dump_file, "before:\n");
167 print_rtl_single (dump_file, insn);
169 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
172 vzeroupper_insn = NULL_RTX;
176 pat = PATTERN (insn);
178 /* Check insn for vzeroupper intrinsic. */
179 if (GET_CODE (pat) == UNSPEC_VOLATILE
180 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
184 /* Found vzeroupper intrinsic. */
185 fprintf (dump_file, "Found vzeroupper:\n");
186 print_rtl_single (dump_file, insn);
191 /* Check insn for vzeroall intrinsic. */
192 if (GET_CODE (pat) == PARALLEL
193 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
194 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
199 /* Delete pending vzeroupper insertion. */
202 delete_insn (vzeroupper_insn);
203 vzeroupper_insn = NULL_RTX;
206 else if (state != used)
208 note_stores (pat, check_avx256_stores, &state);
215 /* Process vzeroupper intrinsic. */
216 avx256 = INTVAL (XVECEXP (pat, 0, 0));
220 /* Since the upper 128bits are cleared, callee must not pass
221 256bit AVX register. We only need to check if callee
222 returns 256bit AVX register. */
223 if (avx256 == callee_return_avx256)
229 /* Remove unnecessary vzeroupper since upper 128bits are
233 fprintf (dump_file, "Delete redundant vzeroupper:\n");
234 print_rtl_single (dump_file, insn);
240 /* Set state to UNUSED if callee doesn't return 256bit AVX
242 if (avx256 != callee_return_pass_avx256)
245 if (avx256 == callee_return_pass_avx256
246 || avx256 == callee_pass_avx256)
248 /* Must remove vzeroupper since callee passes in 256bit
252 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
253 print_rtl_single (dump_file, insn);
259 vzeroupper_insn = insn;
265 BLOCK_INFO (bb)->state = state;
266 BLOCK_INFO (bb)->unchanged = unchanged;
269 fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
270 bb->index, unchanged ? "unchanged" : "changed",
274 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
275 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
276 as USED if UNKNOWN_IS_UNUSED is true. */
279 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
283 enum upper_128bits_state state, old_state, new_state;
287 fprintf (dump_file, " Process [bb %i]: status: %d\n",
288 block->index, BLOCK_INFO (block)->processed);
290 if (BLOCK_INFO (block)->processed)
295 /* Check all predecessor edges of this block. */
296 seen_unknown = false;
297 FOR_EACH_EDGE (e, ei, block->preds)
301 switch (BLOCK_INFO (e->src)->state)
304 if (!unknown_is_unused)
318 old_state = BLOCK_INFO (block)->state;
319 move_or_delete_vzeroupper_2 (block, state);
320 new_state = BLOCK_INFO (block)->state;
322 if (state != unknown || new_state == used)
323 BLOCK_INFO (block)->processed = true;
325 /* Need to rescan if the upper 128bits of AVX registers are changed
327 if (new_state != old_state && new_state == used)
328 cfun->machine->rescan_vzeroupper_p = 1;
331 /* Go through the instruction stream looking for vzeroupper. Delete
332 it if upper 128bit AVX registers are unused. If it isn't deleted,
333 move it to just before a jump insn. */
336 move_or_delete_vzeroupper (void)
343 /* Set up block info for each basic block. */
344 alloc_aux_for_blocks (sizeof (struct block_info_def));
346 /* Process successor blocks of all entry points. */
348 fprintf (dump_file, "Process all entry points\n");
350 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
352 move_or_delete_vzeroupper_2 (e->dest,
353 cfun->machine->caller_pass_avx256_p
355 BLOCK_INFO (e->dest)->processed = true;
358 /* Process all basic blocks. */
363 fprintf (dump_file, "Process all basic blocks: trip %d\n",
365 cfun->machine->rescan_vzeroupper_p = 0;
367 move_or_delete_vzeroupper_1 (bb, false);
369 while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
371 /* FIXME: Is 20 big enough? */
376 fprintf (dump_file, "Process all basic blocks\n");
379 move_or_delete_vzeroupper_1 (bb, true);
381 free_aux_for_blocks ();
384 static rtx legitimize_dllimport_symbol (rtx, bool);
386 #ifndef CHECK_STACK_LIMIT
387 #define CHECK_STACK_LIMIT (-1)
390 /* Return index of given mode in mult and division cost tables. */
391 #define MODE_INDEX(mode) \
392 ((mode) == QImode ? 0 \
393 : (mode) == HImode ? 1 \
394 : (mode) == SImode ? 2 \
395 : (mode) == DImode ? 3 \
398 /* Processor costs (relative to an add) */
399 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
400 #define COSTS_N_BYTES(N) ((N) * 2)
402 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
405 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
406 COSTS_N_BYTES (2), /* cost of an add instruction */
407 COSTS_N_BYTES (3), /* cost of a lea instruction */
408 COSTS_N_BYTES (2), /* variable shift costs */
409 COSTS_N_BYTES (3), /* constant shift costs */
410 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
411 COSTS_N_BYTES (3), /* HI */
412 COSTS_N_BYTES (3), /* SI */
413 COSTS_N_BYTES (3), /* DI */
414 COSTS_N_BYTES (5)}, /* other */
415 0, /* cost of multiply per each bit set */
416 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
417 COSTS_N_BYTES (3), /* HI */
418 COSTS_N_BYTES (3), /* SI */
419 COSTS_N_BYTES (3), /* DI */
420 COSTS_N_BYTES (5)}, /* other */
421 COSTS_N_BYTES (3), /* cost of movsx */
422 COSTS_N_BYTES (3), /* cost of movzx */
423 0, /* "large" insn */
425 2, /* cost for loading QImode using movzbl */
426 {2, 2, 2}, /* cost of loading integer registers
427 in QImode, HImode and SImode.
428 Relative to reg-reg move (2). */
429 {2, 2, 2}, /* cost of storing integer registers */
430 2, /* cost of reg,reg fld/fst */
431 {2, 2, 2}, /* cost of loading fp registers
432 in SFmode, DFmode and XFmode */
433 {2, 2, 2}, /* cost of storing fp registers
434 in SFmode, DFmode and XFmode */
435 3, /* cost of moving MMX register */
436 {3, 3}, /* cost of loading MMX registers
437 in SImode and DImode */
438 {3, 3}, /* cost of storing MMX registers
439 in SImode and DImode */
440 3, /* cost of moving SSE register */
441 {3, 3, 3}, /* cost of loading SSE registers
442 in SImode, DImode and TImode */
443 {3, 3, 3}, /* cost of storing SSE registers
444 in SImode, DImode and TImode */
445 3, /* MMX or SSE register to integer */
446 0, /* size of l1 cache */
447 0, /* size of l2 cache */
448 0, /* size of prefetch block */
449 0, /* number of parallel prefetches */
451 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
452 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
453 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
454 COSTS_N_BYTES (2), /* cost of FABS instruction. */
455 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
456 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
457 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
458 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
459 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
460 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
461 1, /* scalar_stmt_cost. */
462 1, /* scalar load_cost. */
463 1, /* scalar_store_cost. */
464 1, /* vec_stmt_cost. */
465 1, /* vec_to_scalar_cost. */
466 1, /* scalar_to_vec_cost. */
467 1, /* vec_align_load_cost. */
468 1, /* vec_unalign_load_cost. */
469 1, /* vec_store_cost. */
470 1, /* cond_taken_branch_cost. */
471 1, /* cond_not_taken_branch_cost. */
474 /* Processor costs (relative to an add) */
476 struct processor_costs i386_cost = { /* 386 specific costs */
477 COSTS_N_INSNS (1), /* cost of an add instruction */
478 COSTS_N_INSNS (1), /* cost of a lea instruction */
479 COSTS_N_INSNS (3), /* variable shift costs */
480 COSTS_N_INSNS (2), /* constant shift costs */
481 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
482 COSTS_N_INSNS (6), /* HI */
483 COSTS_N_INSNS (6), /* SI */
484 COSTS_N_INSNS (6), /* DI */
485 COSTS_N_INSNS (6)}, /* other */
486 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
487 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
488 COSTS_N_INSNS (23), /* HI */
489 COSTS_N_INSNS (23), /* SI */
490 COSTS_N_INSNS (23), /* DI */
491 COSTS_N_INSNS (23)}, /* other */
492 COSTS_N_INSNS (3), /* cost of movsx */
493 COSTS_N_INSNS (2), /* cost of movzx */
494 15, /* "large" insn */
496 4, /* cost for loading QImode using movzbl */
497 {2, 4, 2}, /* cost of loading integer registers
498 in QImode, HImode and SImode.
499 Relative to reg-reg move (2). */
500 {2, 4, 2}, /* cost of storing integer registers */
501 2, /* cost of reg,reg fld/fst */
502 {8, 8, 8}, /* cost of loading fp registers
503 in SFmode, DFmode and XFmode */
504 {8, 8, 8}, /* cost of storing fp registers
505 in SFmode, DFmode and XFmode */
506 2, /* cost of moving MMX register */
507 {4, 8}, /* cost of loading MMX registers
508 in SImode and DImode */
509 {4, 8}, /* cost of storing MMX registers
510 in SImode and DImode */
511 2, /* cost of moving SSE register */
512 {4, 8, 16}, /* cost of loading SSE registers
513 in SImode, DImode and TImode */
514 {4, 8, 16}, /* cost of storing SSE registers
515 in SImode, DImode and TImode */
516 3, /* MMX or SSE register to integer */
517 0, /* size of l1 cache */
518 0, /* size of l2 cache */
519 0, /* size of prefetch block */
520 0, /* number of parallel prefetches */
522 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
523 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
524 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
525 COSTS_N_INSNS (22), /* cost of FABS instruction. */
526 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
527 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
528 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
529 DUMMY_STRINGOP_ALGS},
530 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
531 DUMMY_STRINGOP_ALGS},
532 1, /* scalar_stmt_cost. */
533 1, /* scalar load_cost. */
534 1, /* scalar_store_cost. */
535 1, /* vec_stmt_cost. */
536 1, /* vec_to_scalar_cost. */
537 1, /* scalar_to_vec_cost. */
538 1, /* vec_align_load_cost. */
539 2, /* vec_unalign_load_cost. */
540 1, /* vec_store_cost. */
541 3, /* cond_taken_branch_cost. */
542 1, /* cond_not_taken_branch_cost. */
546 struct processor_costs i486_cost = { /* 486 specific costs */
547 COSTS_N_INSNS (1), /* cost of an add instruction */
548 COSTS_N_INSNS (1), /* cost of a lea instruction */
549 COSTS_N_INSNS (3), /* variable shift costs */
550 COSTS_N_INSNS (2), /* constant shift costs */
551 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
552 COSTS_N_INSNS (12), /* HI */
553 COSTS_N_INSNS (12), /* SI */
554 COSTS_N_INSNS (12), /* DI */
555 COSTS_N_INSNS (12)}, /* other */
556 1, /* cost of multiply per each bit set */
557 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
558 COSTS_N_INSNS (40), /* HI */
559 COSTS_N_INSNS (40), /* SI */
560 COSTS_N_INSNS (40), /* DI */
561 COSTS_N_INSNS (40)}, /* other */
562 COSTS_N_INSNS (3), /* cost of movsx */
563 COSTS_N_INSNS (2), /* cost of movzx */
564 15, /* "large" insn */
566 4, /* cost for loading QImode using movzbl */
567 {2, 4, 2}, /* cost of loading integer registers
568 in QImode, HImode and SImode.
569 Relative to reg-reg move (2). */
570 {2, 4, 2}, /* cost of storing integer registers */
571 2, /* cost of reg,reg fld/fst */
572 {8, 8, 8}, /* cost of loading fp registers
573 in SFmode, DFmode and XFmode */
574 {8, 8, 8}, /* cost of storing fp registers
575 in SFmode, DFmode and XFmode */
576 2, /* cost of moving MMX register */
577 {4, 8}, /* cost of loading MMX registers
578 in SImode and DImode */
579 {4, 8}, /* cost of storing MMX registers
580 in SImode and DImode */
581 2, /* cost of moving SSE register */
582 {4, 8, 16}, /* cost of loading SSE registers
583 in SImode, DImode and TImode */
584 {4, 8, 16}, /* cost of storing SSE registers
585 in SImode, DImode and TImode */
586 3, /* MMX or SSE register to integer */
587 4, /* size of l1 cache. 486 has 8kB cache
588 shared for code and data, so 4kB is
589 not really precise. */
590 4, /* size of l2 cache */
591 0, /* size of prefetch block */
592 0, /* number of parallel prefetches */
594 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (3), /* cost of FABS instruction. */
598 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
600 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
601 DUMMY_STRINGOP_ALGS},
602 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
603 DUMMY_STRINGOP_ALGS},
604 1, /* scalar_stmt_cost. */
605 1, /* scalar load_cost. */
606 1, /* scalar_store_cost. */
607 1, /* vec_stmt_cost. */
608 1, /* vec_to_scalar_cost. */
609 1, /* scalar_to_vec_cost. */
610 1, /* vec_align_load_cost. */
611 2, /* vec_unalign_load_cost. */
612 1, /* vec_store_cost. */
613 3, /* cond_taken_branch_cost. */
614 1, /* cond_not_taken_branch_cost. */
618 struct processor_costs pentium_cost = {
619 COSTS_N_INSNS (1), /* cost of an add instruction */
620 COSTS_N_INSNS (1), /* cost of a lea instruction */
621 COSTS_N_INSNS (4), /* variable shift costs */
622 COSTS_N_INSNS (1), /* constant shift costs */
623 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
624 COSTS_N_INSNS (11), /* HI */
625 COSTS_N_INSNS (11), /* SI */
626 COSTS_N_INSNS (11), /* DI */
627 COSTS_N_INSNS (11)}, /* other */
628 0, /* cost of multiply per each bit set */
629 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
630 COSTS_N_INSNS (25), /* HI */
631 COSTS_N_INSNS (25), /* SI */
632 COSTS_N_INSNS (25), /* DI */
633 COSTS_N_INSNS (25)}, /* other */
634 COSTS_N_INSNS (3), /* cost of movsx */
635 COSTS_N_INSNS (2), /* cost of movzx */
636 8, /* "large" insn */
638 6, /* cost for loading QImode using movzbl */
639 {2, 4, 2}, /* cost of loading integer registers
640 in QImode, HImode and SImode.
641 Relative to reg-reg move (2). */
642 {2, 4, 2}, /* cost of storing integer registers */
643 2, /* cost of reg,reg fld/fst */
644 {2, 2, 6}, /* cost of loading fp registers
645 in SFmode, DFmode and XFmode */
646 {4, 4, 6}, /* cost of storing fp registers
647 in SFmode, DFmode and XFmode */
648 8, /* cost of moving MMX register */
649 {8, 8}, /* cost of loading MMX registers
650 in SImode and DImode */
651 {8, 8}, /* cost of storing MMX registers
652 in SImode and DImode */
653 2, /* cost of moving SSE register */
654 {4, 8, 16}, /* cost of loading SSE registers
655 in SImode, DImode and TImode */
656 {4, 8, 16}, /* cost of storing SSE registers
657 in SImode, DImode and TImode */
658 3, /* MMX or SSE register to integer */
659 8, /* size of l1 cache. */
660 8, /* size of l2 cache */
661 0, /* size of prefetch block */
662 0, /* number of parallel prefetches */
664 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (1), /* cost of FABS instruction. */
668 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
670 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
671 DUMMY_STRINGOP_ALGS},
672 {{libcall, {{-1, rep_prefix_4_byte}}},
673 DUMMY_STRINGOP_ALGS},
674 1, /* scalar_stmt_cost. */
675 1, /* scalar load_cost. */
676 1, /* scalar_store_cost. */
677 1, /* vec_stmt_cost. */
678 1, /* vec_to_scalar_cost. */
679 1, /* scalar_to_vec_cost. */
680 1, /* vec_align_load_cost. */
681 2, /* vec_unalign_load_cost. */
682 1, /* vec_store_cost. */
683 3, /* cond_taken_branch_cost. */
684 1, /* cond_not_taken_branch_cost. */
688 struct processor_costs pentiumpro_cost = {
689 COSTS_N_INSNS (1), /* cost of an add instruction */
690 COSTS_N_INSNS (1), /* cost of a lea instruction */
691 COSTS_N_INSNS (1), /* variable shift costs */
692 COSTS_N_INSNS (1), /* constant shift costs */
693 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
694 COSTS_N_INSNS (4), /* HI */
695 COSTS_N_INSNS (4), /* SI */
696 COSTS_N_INSNS (4), /* DI */
697 COSTS_N_INSNS (4)}, /* other */
698 0, /* cost of multiply per each bit set */
699 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
700 COSTS_N_INSNS (17), /* HI */
701 COSTS_N_INSNS (17), /* SI */
702 COSTS_N_INSNS (17), /* DI */
703 COSTS_N_INSNS (17)}, /* other */
704 COSTS_N_INSNS (1), /* cost of movsx */
705 COSTS_N_INSNS (1), /* cost of movzx */
706 8, /* "large" insn */
708 2, /* cost for loading QImode using movzbl */
709 {4, 4, 4}, /* cost of loading integer registers
710 in QImode, HImode and SImode.
711 Relative to reg-reg move (2). */
712 {2, 2, 2}, /* cost of storing integer registers */
713 2, /* cost of reg,reg fld/fst */
714 {2, 2, 6}, /* cost of loading fp registers
715 in SFmode, DFmode and XFmode */
716 {4, 4, 6}, /* cost of storing fp registers
717 in SFmode, DFmode and XFmode */
718 2, /* cost of moving MMX register */
719 {2, 2}, /* cost of loading MMX registers
720 in SImode and DImode */
721 {2, 2}, /* cost of storing MMX registers
722 in SImode and DImode */
723 2, /* cost of moving SSE register */
724 {2, 2, 8}, /* cost of loading SSE registers
725 in SImode, DImode and TImode */
726 {2, 2, 8}, /* cost of storing SSE registers
727 in SImode, DImode and TImode */
728 3, /* MMX or SSE register to integer */
729 8, /* size of l1 cache. */
730 256, /* size of l2 cache */
731 32, /* size of prefetch block */
732 6, /* number of parallel prefetches */
734 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
735 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
736 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
737 COSTS_N_INSNS (2), /* cost of FABS instruction. */
738 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
739 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
740 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
741 (we ensure the alignment). For small blocks inline loop is still a
742 noticeable win, for bigger blocks either rep movsl or rep movsb is
743 way to go. Rep movsb has apparently more expensive startup time in CPU,
744 but after 4K the difference is down in the noise. */
745 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
746 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
747 DUMMY_STRINGOP_ALGS},
748 {{rep_prefix_4_byte, {{1024, unrolled_loop},
749 {8192, rep_prefix_4_byte}, {-1, libcall}}},
750 DUMMY_STRINGOP_ALGS},
751 1, /* scalar_stmt_cost. */
752 1, /* scalar load_cost. */
753 1, /* scalar_store_cost. */
754 1, /* vec_stmt_cost. */
755 1, /* vec_to_scalar_cost. */
756 1, /* scalar_to_vec_cost. */
757 1, /* vec_align_load_cost. */
758 2, /* vec_unalign_load_cost. */
759 1, /* vec_store_cost. */
760 3, /* cond_taken_branch_cost. */
761 1, /* cond_not_taken_branch_cost. */
765 struct processor_costs geode_cost = {
766 COSTS_N_INSNS (1), /* cost of an add instruction */
767 COSTS_N_INSNS (1), /* cost of a lea instruction */
768 COSTS_N_INSNS (2), /* variable shift costs */
769 COSTS_N_INSNS (1), /* constant shift costs */
770 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
771 COSTS_N_INSNS (4), /* HI */
772 COSTS_N_INSNS (7), /* SI */
773 COSTS_N_INSNS (7), /* DI */
774 COSTS_N_INSNS (7)}, /* other */
775 0, /* cost of multiply per each bit set */
776 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
777 COSTS_N_INSNS (23), /* HI */
778 COSTS_N_INSNS (39), /* SI */
779 COSTS_N_INSNS (39), /* DI */
780 COSTS_N_INSNS (39)}, /* other */
781 COSTS_N_INSNS (1), /* cost of movsx */
782 COSTS_N_INSNS (1), /* cost of movzx */
783 8, /* "large" insn */
785 1, /* cost for loading QImode using movzbl */
786 {1, 1, 1}, /* cost of loading integer registers
787 in QImode, HImode and SImode.
788 Relative to reg-reg move (2). */
789 {1, 1, 1}, /* cost of storing integer registers */
790 1, /* cost of reg,reg fld/fst */
791 {1, 1, 1}, /* cost of loading fp registers
792 in SFmode, DFmode and XFmode */
793 {4, 6, 6}, /* cost of storing fp registers
794 in SFmode, DFmode and XFmode */
796 1, /* cost of moving MMX register */
797 {1, 1}, /* cost of loading MMX registers
798 in SImode and DImode */
799 {1, 1}, /* cost of storing MMX registers
800 in SImode and DImode */
801 1, /* cost of moving SSE register */
802 {1, 1, 1}, /* cost of loading SSE registers
803 in SImode, DImode and TImode */
804 {1, 1, 1}, /* cost of storing SSE registers
805 in SImode, DImode and TImode */
806 1, /* MMX or SSE register to integer */
807 64, /* size of l1 cache. */
808 128, /* size of l2 cache. */
809 32, /* size of prefetch block */
810 1, /* number of parallel prefetches */
812 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
813 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
814 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
815 COSTS_N_INSNS (1), /* cost of FABS instruction. */
816 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
817 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
818 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
819 DUMMY_STRINGOP_ALGS},
820 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
821 DUMMY_STRINGOP_ALGS},
822 1, /* scalar_stmt_cost. */
823 1, /* scalar load_cost. */
824 1, /* scalar_store_cost. */
825 1, /* vec_stmt_cost. */
826 1, /* vec_to_scalar_cost. */
827 1, /* scalar_to_vec_cost. */
828 1, /* vec_align_load_cost. */
829 2, /* vec_unalign_load_cost. */
830 1, /* vec_store_cost. */
831 3, /* cond_taken_branch_cost. */
832 1, /* cond_not_taken_branch_cost. */
836 struct processor_costs k6_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (2), /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (3), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (3), /* DI */
845 COSTS_N_INSNS (3)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (18), /* HI */
849 COSTS_N_INSNS (18), /* SI */
850 COSTS_N_INSNS (18), /* DI */
851 COSTS_N_INSNS (18)}, /* other */
852 COSTS_N_INSNS (2), /* cost of movsx */
853 COSTS_N_INSNS (2), /* cost of movzx */
854 8, /* "large" insn */
856 3, /* cost for loading QImode using movzbl */
857 {4, 5, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {2, 3, 2}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {6, 6, 6}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {4, 4, 4}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {2, 2}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {2, 2}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {2, 2, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {2, 2, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 6, /* MMX or SSE register to integer */
877 32, /* size of l1 cache. */
878 32, /* size of l2 cache. Some models
879 have integrated l2 cache, but
880 optimizing for k6 is not important
881 enough to worry about that. */
882 32, /* size of prefetch block */
883 1, /* number of parallel prefetches */
885 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
886 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
887 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
888 COSTS_N_INSNS (2), /* cost of FABS instruction. */
889 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
890 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
891 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
892 DUMMY_STRINGOP_ALGS},
893 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
894 DUMMY_STRINGOP_ALGS},
895 1, /* scalar_stmt_cost. */
896 1, /* scalar load_cost. */
897 1, /* scalar_store_cost. */
898 1, /* vec_stmt_cost. */
899 1, /* vec_to_scalar_cost. */
900 1, /* scalar_to_vec_cost. */
901 1, /* vec_align_load_cost. */
902 2, /* vec_unalign_load_cost. */
903 1, /* vec_store_cost. */
904 3, /* cond_taken_branch_cost. */
905 1, /* cond_not_taken_branch_cost. */
909 struct processor_costs athlon_cost = {
910 COSTS_N_INSNS (1), /* cost of an add instruction */
911 COSTS_N_INSNS (2), /* cost of a lea instruction */
912 COSTS_N_INSNS (1), /* variable shift costs */
913 COSTS_N_INSNS (1), /* constant shift costs */
914 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
915 COSTS_N_INSNS (5), /* HI */
916 COSTS_N_INSNS (5), /* SI */
917 COSTS_N_INSNS (5), /* DI */
918 COSTS_N_INSNS (5)}, /* other */
919 0, /* cost of multiply per each bit set */
920 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
921 COSTS_N_INSNS (26), /* HI */
922 COSTS_N_INSNS (42), /* SI */
923 COSTS_N_INSNS (74), /* DI */
924 COSTS_N_INSNS (74)}, /* other */
925 COSTS_N_INSNS (1), /* cost of movsx */
926 COSTS_N_INSNS (1), /* cost of movzx */
927 8, /* "large" insn */
929 4, /* cost for loading QImode using movzbl */
930 {3, 4, 3}, /* cost of loading integer registers
931 in QImode, HImode and SImode.
932 Relative to reg-reg move (2). */
933 {3, 4, 3}, /* cost of storing integer registers */
934 4, /* cost of reg,reg fld/fst */
935 {4, 4, 12}, /* cost of loading fp registers
936 in SFmode, DFmode and XFmode */
937 {6, 6, 8}, /* cost of storing fp registers
938 in SFmode, DFmode and XFmode */
939 2, /* cost of moving MMX register */
940 {4, 4}, /* cost of loading MMX registers
941 in SImode and DImode */
942 {4, 4}, /* cost of storing MMX registers
943 in SImode and DImode */
944 2, /* cost of moving SSE register */
945 {4, 4, 6}, /* cost of loading SSE registers
946 in SImode, DImode and TImode */
947 {4, 4, 5}, /* cost of storing SSE registers
948 in SImode, DImode and TImode */
949 5, /* MMX or SSE register to integer */
950 64, /* size of l1 cache. */
951 256, /* size of l2 cache. */
952 64, /* size of prefetch block */
953 6, /* number of parallel prefetches */
955 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
956 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
957 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
958 COSTS_N_INSNS (2), /* cost of FABS instruction. */
959 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
960 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
961 /* For some reason, Athlon deals better with REP prefix (relative to loops)
962 compared to K8. Alignment becomes important after 8 bytes for memcpy and
963 128 bytes for memset. */
964 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
965 DUMMY_STRINGOP_ALGS},
966 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
967 DUMMY_STRINGOP_ALGS},
968 1, /* scalar_stmt_cost. */
969 1, /* scalar load_cost. */
970 1, /* scalar_store_cost. */
971 1, /* vec_stmt_cost. */
972 1, /* vec_to_scalar_cost. */
973 1, /* scalar_to_vec_cost. */
974 1, /* vec_align_load_cost. */
975 2, /* vec_unalign_load_cost. */
976 1, /* vec_store_cost. */
977 3, /* cond_taken_branch_cost. */
978 1, /* cond_not_taken_branch_cost. */
982 struct processor_costs k8_cost = {
983 COSTS_N_INSNS (1), /* cost of an add instruction */
984 COSTS_N_INSNS (2), /* cost of a lea instruction */
985 COSTS_N_INSNS (1), /* variable shift costs */
986 COSTS_N_INSNS (1), /* constant shift costs */
987 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
988 COSTS_N_INSNS (4), /* HI */
989 COSTS_N_INSNS (3), /* SI */
990 COSTS_N_INSNS (4), /* DI */
991 COSTS_N_INSNS (5)}, /* other */
992 0, /* cost of multiply per each bit set */
993 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
994 COSTS_N_INSNS (26), /* HI */
995 COSTS_N_INSNS (42), /* SI */
996 COSTS_N_INSNS (74), /* DI */
997 COSTS_N_INSNS (74)}, /* other */
998 COSTS_N_INSNS (1), /* cost of movsx */
999 COSTS_N_INSNS (1), /* cost of movzx */
1000 8, /* "large" insn */
1002 4, /* cost for loading QImode using movzbl */
1003 {3, 4, 3}, /* cost of loading integer registers
1004 in QImode, HImode and SImode.
1005 Relative to reg-reg move (2). */
1006 {3, 4, 3}, /* cost of storing integer registers */
1007 4, /* cost of reg,reg fld/fst */
1008 {4, 4, 12}, /* cost of loading fp registers
1009 in SFmode, DFmode and XFmode */
1010 {6, 6, 8}, /* cost of storing fp registers
1011 in SFmode, DFmode and XFmode */
1012 2, /* cost of moving MMX register */
1013 {3, 3}, /* cost of loading MMX registers
1014 in SImode and DImode */
1015 {4, 4}, /* cost of storing MMX registers
1016 in SImode and DImode */
1017 2, /* cost of moving SSE register */
1018 {4, 3, 6}, /* cost of loading SSE registers
1019 in SImode, DImode and TImode */
1020 {4, 4, 5}, /* cost of storing SSE registers
1021 in SImode, DImode and TImode */
1022 5, /* MMX or SSE register to integer */
1023 64, /* size of l1 cache. */
1024 512, /* size of l2 cache. */
1025 64, /* size of prefetch block */
1026 /* New AMD processors never drop prefetches; if they cannot be performed
1027 immediately, they are queued. We set number of simultaneous prefetches
1028 to a large constant to reflect this (it probably is not a good idea not
1029 to limit number of prefetches at all, as their execution also takes some
1031 100, /* number of parallel prefetches */
1032 3, /* Branch cost */
1033 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1034 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1035 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1036 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1037 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1038 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1039 /* K8 has optimized REP instruction for medium sized blocks, but for very
1040 small blocks it is better to use loop. For large blocks, libcall can
1041 do nontemporary accesses and beat inline considerably. */
1042 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1043 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1044 {{libcall, {{8, loop}, {24, unrolled_loop},
1045 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1046 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1047 4, /* scalar_stmt_cost. */
1048 2, /* scalar load_cost. */
1049 2, /* scalar_store_cost. */
1050 5, /* vec_stmt_cost. */
1051 0, /* vec_to_scalar_cost. */
1052 2, /* scalar_to_vec_cost. */
1053 2, /* vec_align_load_cost. */
1054 3, /* vec_unalign_load_cost. */
1055 3, /* vec_store_cost. */
1056 3, /* cond_taken_branch_cost. */
1057 2, /* cond_not_taken_branch_cost. */
1060 struct processor_costs amdfam10_cost = {
1061 COSTS_N_INSNS (1), /* cost of an add instruction */
1062 COSTS_N_INSNS (2), /* cost of a lea instruction */
1063 COSTS_N_INSNS (1), /* variable shift costs */
1064 COSTS_N_INSNS (1), /* constant shift costs */
1065 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1066 COSTS_N_INSNS (4), /* HI */
1067 COSTS_N_INSNS (3), /* SI */
1068 COSTS_N_INSNS (4), /* DI */
1069 COSTS_N_INSNS (5)}, /* other */
1070 0, /* cost of multiply per each bit set */
1071 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1072 COSTS_N_INSNS (35), /* HI */
1073 COSTS_N_INSNS (51), /* SI */
1074 COSTS_N_INSNS (83), /* DI */
1075 COSTS_N_INSNS (83)}, /* other */
1076 COSTS_N_INSNS (1), /* cost of movsx */
1077 COSTS_N_INSNS (1), /* cost of movzx */
1078 8, /* "large" insn */
1080 4, /* cost for loading QImode using movzbl */
1081 {3, 4, 3}, /* cost of loading integer registers
1082 in QImode, HImode and SImode.
1083 Relative to reg-reg move (2). */
1084 {3, 4, 3}, /* cost of storing integer registers */
1085 4, /* cost of reg,reg fld/fst */
1086 {4, 4, 12}, /* cost of loading fp registers
1087 in SFmode, DFmode and XFmode */
1088 {6, 6, 8}, /* cost of storing fp registers
1089 in SFmode, DFmode and XFmode */
1090 2, /* cost of moving MMX register */
1091 {3, 3}, /* cost of loading MMX registers
1092 in SImode and DImode */
1093 {4, 4}, /* cost of storing MMX registers
1094 in SImode and DImode */
1095 2, /* cost of moving SSE register */
1096 {4, 4, 3}, /* cost of loading SSE registers
1097 in SImode, DImode and TImode */
1098 {4, 4, 5}, /* cost of storing SSE registers
1099 in SImode, DImode and TImode */
1100 3, /* MMX or SSE register to integer */
1102 MOVD reg64, xmmreg Double FSTORE 4
1103 MOVD reg32, xmmreg Double FSTORE 4
1105 MOVD reg64, xmmreg Double FADD 3
1107 MOVD reg32, xmmreg Double FADD 3
1109 64, /* size of l1 cache. */
1110 512, /* size of l2 cache. */
1111 64, /* size of prefetch block */
1112 /* New AMD processors never drop prefetches; if they cannot be performed
1113 immediately, they are queued. We set number of simultaneous prefetches
1114 to a large constant to reflect this (it probably is not a good idea not
1115 to limit number of prefetches at all, as their execution also takes some
1117 100, /* number of parallel prefetches */
1118 2, /* Branch cost */
1119 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1120 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1121 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1122 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1123 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1124 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1126 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1127 very small blocks it is better to use loop. For large blocks, libcall can
1128 do nontemporary accesses and beat inline considerably. */
1129 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1130 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1131 {{libcall, {{8, loop}, {24, unrolled_loop},
1132 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1133 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1134 4, /* scalar_stmt_cost. */
1135 2, /* scalar load_cost. */
1136 2, /* scalar_store_cost. */
1137 6, /* vec_stmt_cost. */
1138 0, /* vec_to_scalar_cost. */
1139 2, /* scalar_to_vec_cost. */
1140 2, /* vec_align_load_cost. */
1141 2, /* vec_unalign_load_cost. */
1142 2, /* vec_store_cost. */
1143 2, /* cond_taken_branch_cost. */
1144 1, /* cond_not_taken_branch_cost. */
1147 struct processor_costs bdver1_cost = {
1148 COSTS_N_INSNS (1), /* cost of an add instruction */
1149 COSTS_N_INSNS (1), /* cost of a lea instruction */
1150 COSTS_N_INSNS (1), /* variable shift costs */
1151 COSTS_N_INSNS (1), /* constant shift costs */
1152 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1153 COSTS_N_INSNS (4), /* HI */
1154 COSTS_N_INSNS (4), /* SI */
1155 COSTS_N_INSNS (6), /* DI */
1156 COSTS_N_INSNS (6)}, /* other */
1157 0, /* cost of multiply per each bit set */
1158 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1159 COSTS_N_INSNS (35), /* HI */
1160 COSTS_N_INSNS (51), /* SI */
1161 COSTS_N_INSNS (83), /* DI */
1162 COSTS_N_INSNS (83)}, /* other */
1163 COSTS_N_INSNS (1), /* cost of movsx */
1164 COSTS_N_INSNS (1), /* cost of movzx */
1165 8, /* "large" insn */
1167 4, /* cost for loading QImode using movzbl */
1168 {5, 5, 4}, /* cost of loading integer registers
1169 in QImode, HImode and SImode.
1170 Relative to reg-reg move (2). */
1171 {4, 4, 4}, /* cost of storing integer registers */
1172 2, /* cost of reg,reg fld/fst */
1173 {5, 5, 12}, /* cost of loading fp registers
1174 in SFmode, DFmode and XFmode */
1175 {4, 4, 8}, /* cost of storing fp registers
1176 in SFmode, DFmode and XFmode */
1177 2, /* cost of moving MMX register */
1178 {4, 4}, /* cost of loading MMX registers
1179 in SImode and DImode */
1180 {4, 4}, /* cost of storing MMX registers
1181 in SImode and DImode */
1182 2, /* cost of moving SSE register */
1183 {4, 4, 4}, /* cost of loading SSE registers
1184 in SImode, DImode and TImode */
1185 {4, 4, 4}, /* cost of storing SSE registers
1186 in SImode, DImode and TImode */
1187 2, /* MMX or SSE register to integer */
1189 MOVD reg64, xmmreg Double FSTORE 4
1190 MOVD reg32, xmmreg Double FSTORE 4
1192 MOVD reg64, xmmreg Double FADD 3
1194 MOVD reg32, xmmreg Double FADD 3
1196 16, /* size of l1 cache. */
1197 2048, /* size of l2 cache. */
1198 64, /* size of prefetch block */
1199 /* New AMD processors never drop prefetches; if they cannot be performed
1200 immediately, they are queued. We set number of simultaneous prefetches
1201 to a large constant to reflect this (it probably is not a good idea not
1202 to limit number of prefetches at all, as their execution also takes some
1204 100, /* number of parallel prefetches */
1205 2, /* Branch cost */
1206 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1207 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1208 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1209 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1210 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1211 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1213 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1214 very small blocks it is better to use loop. For large blocks, libcall
1215 can do nontemporary accesses and beat inline considerably. */
1216 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1217 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1218 {{libcall, {{8, loop}, {24, unrolled_loop},
1219 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1220 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1221 6, /* scalar_stmt_cost. */
1222 4, /* scalar load_cost. */
1223 4, /* scalar_store_cost. */
1224 6, /* vec_stmt_cost. */
1225 0, /* vec_to_scalar_cost. */
1226 2, /* scalar_to_vec_cost. */
1227 4, /* vec_align_load_cost. */
1228 4, /* vec_unalign_load_cost. */
1229 4, /* vec_store_cost. */
1230 2, /* cond_taken_branch_cost. */
1231 1, /* cond_not_taken_branch_cost. */
1234 struct processor_costs btver1_cost = {
1235 COSTS_N_INSNS (1), /* cost of an add instruction */
1236 COSTS_N_INSNS (2), /* cost of a lea instruction */
1237 COSTS_N_INSNS (1), /* variable shift costs */
1238 COSTS_N_INSNS (1), /* constant shift costs */
1239 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1240 COSTS_N_INSNS (4), /* HI */
1241 COSTS_N_INSNS (3), /* SI */
1242 COSTS_N_INSNS (4), /* DI */
1243 COSTS_N_INSNS (5)}, /* other */
1244 0, /* cost of multiply per each bit set */
1245 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1246 COSTS_N_INSNS (35), /* HI */
1247 COSTS_N_INSNS (51), /* SI */
1248 COSTS_N_INSNS (83), /* DI */
1249 COSTS_N_INSNS (83)}, /* other */
1250 COSTS_N_INSNS (1), /* cost of movsx */
1251 COSTS_N_INSNS (1), /* cost of movzx */
1252 8, /* "large" insn */
1254 4, /* cost for loading QImode using movzbl */
1255 {3, 4, 3}, /* cost of loading integer registers
1256 in QImode, HImode and SImode.
1257 Relative to reg-reg move (2). */
1258 {3, 4, 3}, /* cost of storing integer registers */
1259 4, /* cost of reg,reg fld/fst */
1260 {4, 4, 12}, /* cost of loading fp registers
1261 in SFmode, DFmode and XFmode */
1262 {6, 6, 8}, /* cost of storing fp registers
1263 in SFmode, DFmode and XFmode */
1264 2, /* cost of moving MMX register */
1265 {3, 3}, /* cost of loading MMX registers
1266 in SImode and DImode */
1267 {4, 4}, /* cost of storing MMX registers
1268 in SImode and DImode */
1269 2, /* cost of moving SSE register */
1270 {4, 4, 3}, /* cost of loading SSE registers
1271 in SImode, DImode and TImode */
1272 {4, 4, 5}, /* cost of storing SSE registers
1273 in SImode, DImode and TImode */
1274 3, /* MMX or SSE register to integer */
1276 MOVD reg64, xmmreg Double FSTORE 4
1277 MOVD reg32, xmmreg Double FSTORE 4
1279 MOVD reg64, xmmreg Double FADD 3
1281 MOVD reg32, xmmreg Double FADD 3
1283 32, /* size of l1 cache. */
1284 512, /* size of l2 cache. */
1285 64, /* size of prefetch block */
1286 100, /* number of parallel prefetches */
1287 2, /* Branch cost */
1288 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1289 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1290 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1291 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1292 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1293 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1295 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1296 very small blocks it is better to use loop. For large blocks, libcall can
1297 do nontemporary accesses and beat inline considerably. */
1298 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1299 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1300 {{libcall, {{8, loop}, {24, unrolled_loop},
1301 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1302 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1303 4, /* scalar_stmt_cost. */
1304 2, /* scalar load_cost. */
1305 2, /* scalar_store_cost. */
1306 6, /* vec_stmt_cost. */
1307 0, /* vec_to_scalar_cost. */
1308 2, /* scalar_to_vec_cost. */
1309 2, /* vec_align_load_cost. */
1310 2, /* vec_unalign_load_cost. */
1311 2, /* vec_store_cost. */
1312 2, /* cond_taken_branch_cost. */
1313 1, /* cond_not_taken_branch_cost. */
1317 struct processor_costs pentium4_cost = {
1318 COSTS_N_INSNS (1), /* cost of an add instruction */
1319 COSTS_N_INSNS (3), /* cost of a lea instruction */
1320 COSTS_N_INSNS (4), /* variable shift costs */
1321 COSTS_N_INSNS (4), /* constant shift costs */
1322 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1323 COSTS_N_INSNS (15), /* HI */
1324 COSTS_N_INSNS (15), /* SI */
1325 COSTS_N_INSNS (15), /* DI */
1326 COSTS_N_INSNS (15)}, /* other */
1327 0, /* cost of multiply per each bit set */
1328 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1329 COSTS_N_INSNS (56), /* HI */
1330 COSTS_N_INSNS (56), /* SI */
1331 COSTS_N_INSNS (56), /* DI */
1332 COSTS_N_INSNS (56)}, /* other */
1333 COSTS_N_INSNS (1), /* cost of movsx */
1334 COSTS_N_INSNS (1), /* cost of movzx */
1335 16, /* "large" insn */
1337 2, /* cost for loading QImode using movzbl */
1338 {4, 5, 4}, /* cost of loading integer registers
1339 in QImode, HImode and SImode.
1340 Relative to reg-reg move (2). */
1341 {2, 3, 2}, /* cost of storing integer registers */
1342 2, /* cost of reg,reg fld/fst */
1343 {2, 2, 6}, /* cost of loading fp registers
1344 in SFmode, DFmode and XFmode */
1345 {4, 4, 6}, /* cost of storing fp registers
1346 in SFmode, DFmode and XFmode */
1347 2, /* cost of moving MMX register */
1348 {2, 2}, /* cost of loading MMX registers
1349 in SImode and DImode */
1350 {2, 2}, /* cost of storing MMX registers
1351 in SImode and DImode */
1352 12, /* cost of moving SSE register */
1353 {12, 12, 12}, /* cost of loading SSE registers
1354 in SImode, DImode and TImode */
1355 {2, 2, 8}, /* cost of storing SSE registers
1356 in SImode, DImode and TImode */
1357 10, /* MMX or SSE register to integer */
1358 8, /* size of l1 cache. */
1359 256, /* size of l2 cache. */
1360 64, /* size of prefetch block */
1361 6, /* number of parallel prefetches */
1362 2, /* Branch cost */
1363 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1364 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1365 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1366 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1367 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1368 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1369 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1370 DUMMY_STRINGOP_ALGS},
1371 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1373 DUMMY_STRINGOP_ALGS},
1374 1, /* scalar_stmt_cost. */
1375 1, /* scalar load_cost. */
1376 1, /* scalar_store_cost. */
1377 1, /* vec_stmt_cost. */
1378 1, /* vec_to_scalar_cost. */
1379 1, /* scalar_to_vec_cost. */
1380 1, /* vec_align_load_cost. */
1381 2, /* vec_unalign_load_cost. */
1382 1, /* vec_store_cost. */
1383 3, /* cond_taken_branch_cost. */
1384 1, /* cond_not_taken_branch_cost. */
1388 struct processor_costs nocona_cost = {
1389 COSTS_N_INSNS (1), /* cost of an add instruction */
1390 COSTS_N_INSNS (1), /* cost of a lea instruction */
1391 COSTS_N_INSNS (1), /* variable shift costs */
1392 COSTS_N_INSNS (1), /* constant shift costs */
1393 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1394 COSTS_N_INSNS (10), /* HI */
1395 COSTS_N_INSNS (10), /* SI */
1396 COSTS_N_INSNS (10), /* DI */
1397 COSTS_N_INSNS (10)}, /* other */
1398 0, /* cost of multiply per each bit set */
1399 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1400 COSTS_N_INSNS (66), /* HI */
1401 COSTS_N_INSNS (66), /* SI */
1402 COSTS_N_INSNS (66), /* DI */
1403 COSTS_N_INSNS (66)}, /* other */
1404 COSTS_N_INSNS (1), /* cost of movsx */
1405 COSTS_N_INSNS (1), /* cost of movzx */
1406 16, /* "large" insn */
1407 17, /* MOVE_RATIO */
1408 4, /* cost for loading QImode using movzbl */
1409 {4, 4, 4}, /* cost of loading integer registers
1410 in QImode, HImode and SImode.
1411 Relative to reg-reg move (2). */
1412 {4, 4, 4}, /* cost of storing integer registers */
1413 3, /* cost of reg,reg fld/fst */
1414 {12, 12, 12}, /* cost of loading fp registers
1415 in SFmode, DFmode and XFmode */
1416 {4, 4, 4}, /* cost of storing fp registers
1417 in SFmode, DFmode and XFmode */
1418 6, /* cost of moving MMX register */
1419 {12, 12}, /* cost of loading MMX registers
1420 in SImode and DImode */
1421 {12, 12}, /* cost of storing MMX registers
1422 in SImode and DImode */
1423 6, /* cost of moving SSE register */
1424 {12, 12, 12}, /* cost of loading SSE registers
1425 in SImode, DImode and TImode */
1426 {12, 12, 12}, /* cost of storing SSE registers
1427 in SImode, DImode and TImode */
1428 8, /* MMX or SSE register to integer */
1429 8, /* size of l1 cache. */
1430 1024, /* size of l2 cache. */
1431 128, /* size of prefetch block */
1432 8, /* number of parallel prefetches */
1433 1, /* Branch cost */
1434 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1435 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1436 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1437 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1438 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1439 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1440 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1441 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1442 {100000, unrolled_loop}, {-1, libcall}}}},
1443 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1445 {libcall, {{24, loop}, {64, unrolled_loop},
1446 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1447 1, /* scalar_stmt_cost. */
1448 1, /* scalar load_cost. */
1449 1, /* scalar_store_cost. */
1450 1, /* vec_stmt_cost. */
1451 1, /* vec_to_scalar_cost. */
1452 1, /* scalar_to_vec_cost. */
1453 1, /* vec_align_load_cost. */
1454 2, /* vec_unalign_load_cost. */
1455 1, /* vec_store_cost. */
1456 3, /* cond_taken_branch_cost. */
1457 1, /* cond_not_taken_branch_cost. */
1461 struct processor_costs atom_cost = {
1462 COSTS_N_INSNS (1), /* cost of an add instruction */
1463 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1464 COSTS_N_INSNS (1), /* variable shift costs */
1465 COSTS_N_INSNS (1), /* constant shift costs */
1466 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1467 COSTS_N_INSNS (4), /* HI */
1468 COSTS_N_INSNS (3), /* SI */
1469 COSTS_N_INSNS (4), /* DI */
1470 COSTS_N_INSNS (2)}, /* other */
1471 0, /* cost of multiply per each bit set */
1472 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1473 COSTS_N_INSNS (26), /* HI */
1474 COSTS_N_INSNS (42), /* SI */
1475 COSTS_N_INSNS (74), /* DI */
1476 COSTS_N_INSNS (74)}, /* other */
1477 COSTS_N_INSNS (1), /* cost of movsx */
1478 COSTS_N_INSNS (1), /* cost of movzx */
1479 8, /* "large" insn */
1480 17, /* MOVE_RATIO */
1481 2, /* cost for loading QImode using movzbl */
1482 {4, 4, 4}, /* cost of loading integer registers
1483 in QImode, HImode and SImode.
1484 Relative to reg-reg move (2). */
1485 {4, 4, 4}, /* cost of storing integer registers */
1486 4, /* cost of reg,reg fld/fst */
1487 {12, 12, 12}, /* cost of loading fp registers
1488 in SFmode, DFmode and XFmode */
1489 {6, 6, 8}, /* cost of storing fp registers
1490 in SFmode, DFmode and XFmode */
1491 2, /* cost of moving MMX register */
1492 {8, 8}, /* cost of loading MMX registers
1493 in SImode and DImode */
1494 {8, 8}, /* cost of storing MMX registers
1495 in SImode and DImode */
1496 2, /* cost of moving SSE register */
1497 {8, 8, 8}, /* cost of loading SSE registers
1498 in SImode, DImode and TImode */
1499 {8, 8, 8}, /* cost of storing SSE registers
1500 in SImode, DImode and TImode */
1501 5, /* MMX or SSE register to integer */
1502 32, /* size of l1 cache. */
1503 256, /* size of l2 cache. */
1504 64, /* size of prefetch block */
1505 6, /* number of parallel prefetches */
1506 3, /* Branch cost */
1507 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1508 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1509 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1510 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1511 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1512 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1513 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1514 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1515 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1516 {{libcall, {{8, loop}, {15, unrolled_loop},
1517 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1518 {libcall, {{24, loop}, {32, unrolled_loop},
1519 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1520 1, /* scalar_stmt_cost. */
1521 1, /* scalar load_cost. */
1522 1, /* scalar_store_cost. */
1523 1, /* vec_stmt_cost. */
1524 1, /* vec_to_scalar_cost. */
1525 1, /* scalar_to_vec_cost. */
1526 1, /* vec_align_load_cost. */
1527 2, /* vec_unalign_load_cost. */
1528 1, /* vec_store_cost. */
1529 3, /* cond_taken_branch_cost. */
1530 1, /* cond_not_taken_branch_cost. */
1533 /* Generic64 should produce code tuned for Nocona and K8. */
1535 struct processor_costs generic64_cost = {
1536 COSTS_N_INSNS (1), /* cost of an add instruction */
1537 /* On all chips taken into consideration lea is 2 cycles and more. With
1538 this cost however our current implementation of synth_mult results in
1539 use of unnecessary temporary registers causing regression on several
1540 SPECfp benchmarks. */
1541 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1542 COSTS_N_INSNS (1), /* variable shift costs */
1543 COSTS_N_INSNS (1), /* constant shift costs */
1544 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1545 COSTS_N_INSNS (4), /* HI */
1546 COSTS_N_INSNS (3), /* SI */
1547 COSTS_N_INSNS (4), /* DI */
1548 COSTS_N_INSNS (2)}, /* other */
1549 0, /* cost of multiply per each bit set */
1550 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1551 COSTS_N_INSNS (26), /* HI */
1552 COSTS_N_INSNS (42), /* SI */
1553 COSTS_N_INSNS (74), /* DI */
1554 COSTS_N_INSNS (74)}, /* other */
1555 COSTS_N_INSNS (1), /* cost of movsx */
1556 COSTS_N_INSNS (1), /* cost of movzx */
1557 8, /* "large" insn */
1558 17, /* MOVE_RATIO */
1559 4, /* cost for loading QImode using movzbl */
1560 {4, 4, 4}, /* cost of loading integer registers
1561 in QImode, HImode and SImode.
1562 Relative to reg-reg move (2). */
1563 {4, 4, 4}, /* cost of storing integer registers */
1564 4, /* cost of reg,reg fld/fst */
1565 {12, 12, 12}, /* cost of loading fp registers
1566 in SFmode, DFmode and XFmode */
1567 {6, 6, 8}, /* cost of storing fp registers
1568 in SFmode, DFmode and XFmode */
1569 2, /* cost of moving MMX register */
1570 {8, 8}, /* cost of loading MMX registers
1571 in SImode and DImode */
1572 {8, 8}, /* cost of storing MMX registers
1573 in SImode and DImode */
1574 2, /* cost of moving SSE register */
1575 {8, 8, 8}, /* cost of loading SSE registers
1576 in SImode, DImode and TImode */
1577 {8, 8, 8}, /* cost of storing SSE registers
1578 in SImode, DImode and TImode */
1579 5, /* MMX or SSE register to integer */
1580 32, /* size of l1 cache. */
1581 512, /* size of l2 cache. */
1582 64, /* size of prefetch block */
1583 6, /* number of parallel prefetches */
1584 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1585 value is increased to perhaps more appropriate value of 5. */
1586 3, /* Branch cost */
1587 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1588 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1589 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1590 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1591 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1592 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1593 {DUMMY_STRINGOP_ALGS,
1594 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1595 {DUMMY_STRINGOP_ALGS,
1596 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1597 1, /* scalar_stmt_cost. */
1598 1, /* scalar load_cost. */
1599 1, /* scalar_store_cost. */
1600 1, /* vec_stmt_cost. */
1601 1, /* vec_to_scalar_cost. */
1602 1, /* scalar_to_vec_cost. */
1603 1, /* vec_align_load_cost. */
1604 2, /* vec_unalign_load_cost. */
1605 1, /* vec_store_cost. */
1606 3, /* cond_taken_branch_cost. */
1607 1, /* cond_not_taken_branch_cost. */
1610 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1613 struct processor_costs generic32_cost = {
1614 COSTS_N_INSNS (1), /* cost of an add instruction */
1615 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1616 COSTS_N_INSNS (1), /* variable shift costs */
1617 COSTS_N_INSNS (1), /* constant shift costs */
1618 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1619 COSTS_N_INSNS (4), /* HI */
1620 COSTS_N_INSNS (3), /* SI */
1621 COSTS_N_INSNS (4), /* DI */
1622 COSTS_N_INSNS (2)}, /* other */
1623 0, /* cost of multiply per each bit set */
1624 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1625 COSTS_N_INSNS (26), /* HI */
1626 COSTS_N_INSNS (42), /* SI */
1627 COSTS_N_INSNS (74), /* DI */
1628 COSTS_N_INSNS (74)}, /* other */
1629 COSTS_N_INSNS (1), /* cost of movsx */
1630 COSTS_N_INSNS (1), /* cost of movzx */
1631 8, /* "large" insn */
1632 17, /* MOVE_RATIO */
1633 4, /* cost for loading QImode using movzbl */
1634 {4, 4, 4}, /* cost of loading integer registers
1635 in QImode, HImode and SImode.
1636 Relative to reg-reg move (2). */
1637 {4, 4, 4}, /* cost of storing integer registers */
1638 4, /* cost of reg,reg fld/fst */
1639 {12, 12, 12}, /* cost of loading fp registers
1640 in SFmode, DFmode and XFmode */
1641 {6, 6, 8}, /* cost of storing fp registers
1642 in SFmode, DFmode and XFmode */
1643 2, /* cost of moving MMX register */
1644 {8, 8}, /* cost of loading MMX registers
1645 in SImode and DImode */
1646 {8, 8}, /* cost of storing MMX registers
1647 in SImode and DImode */
1648 2, /* cost of moving SSE register */
1649 {8, 8, 8}, /* cost of loading SSE registers
1650 in SImode, DImode and TImode */
1651 {8, 8, 8}, /* cost of storing SSE registers
1652 in SImode, DImode and TImode */
1653 5, /* MMX or SSE register to integer */
1654 32, /* size of l1 cache. */
1655 256, /* size of l2 cache. */
1656 64, /* size of prefetch block */
1657 6, /* number of parallel prefetches */
1658 3, /* Branch cost */
1659 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1660 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1661 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1662 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1663 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1664 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1665 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1666 DUMMY_STRINGOP_ALGS},
1667 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1668 DUMMY_STRINGOP_ALGS},
1669 1, /* scalar_stmt_cost. */
1670 1, /* scalar load_cost. */
1671 1, /* scalar_store_cost. */
1672 1, /* vec_stmt_cost. */
1673 1, /* vec_to_scalar_cost. */
1674 1, /* scalar_to_vec_cost. */
1675 1, /* vec_align_load_cost. */
1676 2, /* vec_unalign_load_cost. */
1677 1, /* vec_store_cost. */
1678 3, /* cond_taken_branch_cost. */
1679 1, /* cond_not_taken_branch_cost. */
1682 const struct processor_costs *ix86_cost = &pentium_cost;
1684 /* Processor feature/optimization bitmasks. */
1685 #define m_386 (1<<PROCESSOR_I386)
1686 #define m_486 (1<<PROCESSOR_I486)
1687 #define m_PENT (1<<PROCESSOR_PENTIUM)
1688 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1689 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1690 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1691 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1692 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1693 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1694 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1695 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1696 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1697 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1698 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1699 #define m_ATOM (1<<PROCESSOR_ATOM)
1701 #define m_GEODE (1<<PROCESSOR_GEODE)
1702 #define m_K6 (1<<PROCESSOR_K6)
1703 #define m_K6_GEODE (m_K6 | m_GEODE)
1704 #define m_K8 (1<<PROCESSOR_K8)
1705 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1706 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1707 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1708 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1709 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1710 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
1712 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1713 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1715 /* Generic instruction choice should be common subset of supported CPUs
1716 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1719 /* Feature tests against the various tunings. */
1720 unsigned char ix86_tune_features[X86_TUNE_LAST];
1722 /* Feature tests against the various tunings used to create ix86_tune_features
1723 based on the processor mask. */
1724 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1725 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1726 negatively, so enabling for Generic64 seems like good code size
1727 tradeoff. We can't enable it for 32bit generic because it does not
1728 work well with PPro base chips. */
1729 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
1731 /* X86_TUNE_PUSH_MEMORY */
1732 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1733 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1735 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1738 /* X86_TUNE_UNROLL_STRLEN */
1739 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1740 | m_CORE2I7 | m_GENERIC,
1742 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1743 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1744 | m_CORE2I7 | m_GENERIC,
1746 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1747 on simulation result. But after P4 was made, no performance benefit
1748 was observed with branch hints. It also increases the code size.
1749 As a result, icc never generates branch hints. */
1752 /* X86_TUNE_DOUBLE_WITH_ADD */
1755 /* X86_TUNE_USE_SAHF */
1756 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
1757 | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1759 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1760 partial dependencies. */
1761 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1762 | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1764 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1765 register stalls on Generic32 compilation setting as well. However
1766 in current implementation the partial register stalls are not eliminated
1767 very well - they can be introduced via subregs synthesized by combine
1768 and can happen in caller/callee saving sequences. Because this option
1769 pays back little on PPro based chips and is in conflict with partial reg
1770 dependencies used by Athlon/P4 based chips, it is better to leave it off
1771 for generic32 for now. */
1774 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1775 m_CORE2I7 | m_GENERIC,
1777 /* X86_TUNE_USE_HIMODE_FIOP */
1778 m_386 | m_486 | m_K6_GEODE,
1780 /* X86_TUNE_USE_SIMODE_FIOP */
1781 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
1783 /* X86_TUNE_USE_MOV0 */
1786 /* X86_TUNE_USE_CLTD */
1787 ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
1789 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1792 /* X86_TUNE_SPLIT_LONG_MOVES */
1795 /* X86_TUNE_READ_MODIFY_WRITE */
1798 /* X86_TUNE_READ_MODIFY */
1801 /* X86_TUNE_PROMOTE_QIMODE */
1802 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1803 | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
1805 /* X86_TUNE_FAST_PREFIX */
1806 ~(m_PENT | m_486 | m_386),
1808 /* X86_TUNE_SINGLE_STRINGOP */
1809 m_386 | m_PENT4 | m_NOCONA,
1811 /* X86_TUNE_QIMODE_MATH */
1814 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1815 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1816 might be considered for Generic32 if our scheme for avoiding partial
1817 stalls was more effective. */
1820 /* X86_TUNE_PROMOTE_QI_REGS */
1823 /* X86_TUNE_PROMOTE_HI_REGS */
1826 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1827 over esp addition. */
1828 m_386 | m_486 | m_PENT | m_PPRO,
1830 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1831 over esp addition. */
1834 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1835 over esp subtraction. */
1836 m_386 | m_486 | m_PENT | m_K6_GEODE,
1838 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1839 over esp subtraction. */
1840 m_PENT | m_K6_GEODE,
1842 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1843 for DFmode copies */
1844 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
1845 | m_GENERIC | m_GEODE),
1847 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1848 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1850 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1851 conflict here in between PPro/Pentium4 based chips that thread 128bit
1852 SSE registers as single units versus K8 based chips that divide SSE
1853 registers to two 64bit halves. This knob promotes all store destinations
1854 to be 128bit to allow register renaming on 128bit SSE units, but usually
1855 results in one extra microop on 64bit SSE units. Experimental results
1856 shows that disabling this option on P4 brings over 20% SPECfp regression,
1857 while enabling it on K8 brings roughly 2.4% regression that can be partly
1858 masked by careful scheduling of moves. */
1859 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
1860 | m_AMDFAM10 | m_BDVER1,
1862 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1863 m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
1865 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1866 m_BDVER1 | m_COREI7,
1868 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1871 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1872 are resolved on SSE register parts instead of whole registers, so we may
1873 maintain just lower part of scalar values in proper format leaving the
1874 upper part undefined. */
1877 /* X86_TUNE_SSE_TYPELESS_STORES */
1880 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1881 m_PPRO | m_PENT4 | m_NOCONA,
1883 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1884 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
1886 /* X86_TUNE_PROLOGUE_USING_MOVE */
1887 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1889 /* X86_TUNE_EPILOGUE_USING_MOVE */
1890 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
1892 /* X86_TUNE_SHIFT1 */
1895 /* X86_TUNE_USE_FFREEP */
1898 /* X86_TUNE_INTER_UNIT_MOVES */
1899 ~(m_AMD_MULTIPLE | m_GENERIC),
1901 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1902 ~(m_AMDFAM10 | m_BDVER1),
1904 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1905 than 4 branch instructions in the 16 byte window. */
1906 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
1909 /* X86_TUNE_SCHEDULE */
1910 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
1913 /* X86_TUNE_USE_BT */
1914 m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
1916 /* X86_TUNE_USE_INCDEC */
1917 ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
1919 /* X86_TUNE_PAD_RETURNS */
1920 m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
1922 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1925 /* X86_TUNE_EXT_80387_CONSTANTS */
1926 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1927 | m_CORE2I7 | m_GENERIC,
1929 /* X86_TUNE_SHORTEN_X87_SSE */
1932 /* X86_TUNE_AVOID_VECTOR_DECODE */
1933 m_K8 | m_CORE2I7_64 | m_GENERIC64,
1935 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1936 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1939 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1940 vector path on AMD machines. */
1941 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
1943 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1945 m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
1947 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1951 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1952 but one byte longer. */
1955 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1956 operand that cannot be represented using a modRM byte. The XOR
1957 replacement is long decoded, so this split helps here as well. */
1960 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1962 m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
1964 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1965 from integer to FP. */
1968 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1969 with a subsequent conditional jump instruction into a single
1970 compare-and-branch uop. */
1973 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1974 will impact LEA instruction selection. */
1977 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1982 /* Feature tests against the various architecture variations. */
1983 unsigned char ix86_arch_features[X86_ARCH_LAST];
1985 /* Feature tests against the various architecture variations, used to create
1986 ix86_arch_features based on the processor mask. */
1987 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1988 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1989 ~(m_386 | m_486 | m_PENT | m_K6),
1991 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1994 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1997 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2000 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2004 static const unsigned int x86_accumulate_outgoing_args
2005 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
2008 static const unsigned int x86_arch_always_fancy_math_387
2009 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
2010 | m_NOCONA | m_CORE2I7 | m_GENERIC;
2012 static enum stringop_alg stringop_alg = no_stringop;
2014 /* In case the average insn count for single function invocation is
2015 lower than this constant, emit fast (but longer) prologue and
2017 #define FAST_PROLOGUE_INSN_COUNT 20
2019 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2020 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2021 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2022 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2024 /* Array of the smallest class containing reg number REGNO, indexed by
2025 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2027 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2029 /* ax, dx, cx, bx */
2030 AREG, DREG, CREG, BREG,
2031 /* si, di, bp, sp */
2032 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2034 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2035 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2038 /* flags, fpsr, fpcr, frame */
2039 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2041 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2044 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2047 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2048 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2049 /* SSE REX registers */
2050 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2054 /* The "default" register map used in 32bit mode. */
2056 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2058 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2059 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2060 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2061 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2062 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2063 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2064 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2067 /* The "default" register map used in 64bit mode. */
2069 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2071 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2072 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2073 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2074 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2075 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2076 8,9,10,11,12,13,14,15, /* extended integer registers */
2077 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2080 /* Define the register numbers to be used in Dwarf debugging information.
2081 The SVR4 reference port C compiler uses the following register numbers
2082 in its Dwarf output code:
2083 0 for %eax (gcc regno = 0)
2084 1 for %ecx (gcc regno = 2)
2085 2 for %edx (gcc regno = 1)
2086 3 for %ebx (gcc regno = 3)
2087 4 for %esp (gcc regno = 7)
2088 5 for %ebp (gcc regno = 6)
2089 6 for %esi (gcc regno = 4)
2090 7 for %edi (gcc regno = 5)
2091 The following three DWARF register numbers are never generated by
2092 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2093 believes these numbers have these meanings.
2094 8 for %eip (no gcc equivalent)
2095 9 for %eflags (gcc regno = 17)
2096 10 for %trapno (no gcc equivalent)
2097 It is not at all clear how we should number the FP stack registers
2098 for the x86 architecture. If the version of SDB on x86/svr4 were
2099 a bit less brain dead with respect to floating-point then we would
2100 have a precedent to follow with respect to DWARF register numbers
2101 for x86 FP registers, but the SDB on x86/svr4 is so completely
2102 broken with respect to FP registers that it is hardly worth thinking
2103 of it as something to strive for compatibility with.
2104 The version of x86/svr4 SDB I have at the moment does (partially)
2105 seem to believe that DWARF register number 11 is associated with
2106 the x86 register %st(0), but that's about all. Higher DWARF
2107 register numbers don't seem to be associated with anything in
2108 particular, and even for DWARF regno 11, SDB only seems to under-
2109 stand that it should say that a variable lives in %st(0) (when
2110 asked via an `=' command) if we said it was in DWARF regno 11,
2111 but SDB still prints garbage when asked for the value of the
2112 variable in question (via a `/' command).
2113 (Also note that the labels SDB prints for various FP stack regs
2114 when doing an `x' command are all wrong.)
2115 Note that these problems generally don't affect the native SVR4
2116 C compiler because it doesn't allow the use of -O with -g and
2117 because when it is *not* optimizing, it allocates a memory
2118 location for each floating-point variable, and the memory
2119 location is what gets described in the DWARF AT_location
2120 attribute for the variable in question.
2121 Regardless of the severe mental illness of the x86/svr4 SDB, we
2122 do something sensible here and we use the following DWARF
2123 register numbers. Note that these are all stack-top-relative
2125 11 for %st(0) (gcc regno = 8)
2126 12 for %st(1) (gcc regno = 9)
2127 13 for %st(2) (gcc regno = 10)
2128 14 for %st(3) (gcc regno = 11)
2129 15 for %st(4) (gcc regno = 12)
2130 16 for %st(5) (gcc regno = 13)
2131 17 for %st(6) (gcc regno = 14)
2132 18 for %st(7) (gcc regno = 15)
2134 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2136 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2137 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2138 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2145 /* Define parameter passing and return registers. */
2147 static int const x86_64_int_parameter_registers[6] =
2149 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2152 static int const x86_64_ms_abi_int_parameter_registers[4] =
2154 CX_REG, DX_REG, R8_REG, R9_REG
2157 static int const x86_64_int_return_registers[4] =
2159 AX_REG, DX_REG, DI_REG, SI_REG
2162 /* Define the structure for the machine field in struct function. */
2164 struct GTY(()) stack_local_entry {
2165 unsigned short mode;
2168 struct stack_local_entry *next;
2171 /* Structure describing stack frame layout.
2172 Stack grows downward:
2178 saved static chain if ix86_static_chain_on_stack
2180 saved frame pointer if frame_pointer_needed
2181 <- HARD_FRAME_POINTER
2187 <- sse_regs_save_offset
2190 [va_arg registers] |
2194 [padding2] | = to_allocate
2203 int outgoing_arguments_size;
2204 HOST_WIDE_INT frame;
2206 /* The offsets relative to ARG_POINTER. */
2207 HOST_WIDE_INT frame_pointer_offset;
2208 HOST_WIDE_INT hard_frame_pointer_offset;
2209 HOST_WIDE_INT stack_pointer_offset;
2210 HOST_WIDE_INT hfp_save_offset;
2211 HOST_WIDE_INT reg_save_offset;
2212 HOST_WIDE_INT sse_reg_save_offset;
2214 /* When save_regs_using_mov is set, emit prologue using
2215 move instead of push instructions. */
2216 bool save_regs_using_mov;
2219 /* Code model option. */
2220 enum cmodel ix86_cmodel;
2222 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2224 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2226 /* Which unit we are generating floating point math for. */
2227 enum fpmath_unit ix86_fpmath;
2229 /* Which cpu are we scheduling for. */
2230 enum attr_cpu ix86_schedule;
2232 /* Which cpu are we optimizing for. */
2233 enum processor_type ix86_tune;
2235 /* Which instruction set architecture to use. */
2236 enum processor_type ix86_arch;
2238 /* true if sse prefetch instruction is not NOOP. */
2239 int x86_prefetch_sse;
2241 /* ix86_regparm_string as a number */
2242 static int ix86_regparm;
2244 /* -mstackrealign option */
2245 static const char ix86_force_align_arg_pointer_string[]
2246 = "force_align_arg_pointer";
2248 static rtx (*ix86_gen_leave) (void);
2249 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2250 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2251 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2252 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2253 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2254 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2255 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2256 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2257 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2259 /* Preferred alignment for stack boundary in bits. */
2260 unsigned int ix86_preferred_stack_boundary;
2262 /* Alignment for incoming stack boundary in bits specified at
2264 static unsigned int ix86_user_incoming_stack_boundary;
2266 /* Default alignment for incoming stack boundary in bits. */
2267 static unsigned int ix86_default_incoming_stack_boundary;
2269 /* Alignment for incoming stack boundary in bits. */
2270 unsigned int ix86_incoming_stack_boundary;
2272 /* The abi used by target. */
2273 enum calling_abi ix86_abi;
2275 /* Values 1-5: see jump.c */
2276 int ix86_branch_cost;
2278 /* Calling abi specific va_list type nodes. */
2279 static GTY(()) tree sysv_va_list_type_node;
2280 static GTY(()) tree ms_va_list_type_node;
2282 /* Variables which are this size or smaller are put in the data/bss
2283 or ldata/lbss sections. */
2285 int ix86_section_threshold = 65536;
2287 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2288 char internal_label_prefix[16];
2289 int internal_label_prefix_len;
2291 /* Fence to use after loop using movnt. */
2294 /* Register class used for passing given 64bit part of the argument.
2295 These represent classes as documented by the PS ABI, with the exception
2296 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2297 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2299 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2300 whenever possible (upper half does contain padding). */
2301 enum x86_64_reg_class
2304 X86_64_INTEGER_CLASS,
2305 X86_64_INTEGERSI_CLASS,
2312 X86_64_COMPLEX_X87_CLASS,
2316 #define MAX_CLASSES 4
2318 /* Table of constants used by fldpi, fldln2, etc.... */
2319 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2320 static bool ext_80387_constants_init = 0;
2323 static struct machine_function * ix86_init_machine_status (void);
2324 static rtx ix86_function_value (const_tree, const_tree, bool);
2325 static bool ix86_function_value_regno_p (const unsigned int);
2326 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2328 static rtx ix86_static_chain (const_tree, bool);
2329 static int ix86_function_regparm (const_tree, const_tree);
2330 static void ix86_compute_frame_layout (struct ix86_frame *);
2331 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2333 static void ix86_add_new_builtins (int);
2334 static rtx ix86_expand_vec_perm_builtin (tree);
2335 static tree ix86_canonical_va_list_type (tree);
2336 static void predict_jump (int);
2337 static unsigned int split_stack_prologue_scratch_regno (void);
2338 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2340 enum ix86_function_specific_strings
2342 IX86_FUNCTION_SPECIFIC_ARCH,
2343 IX86_FUNCTION_SPECIFIC_TUNE,
2344 IX86_FUNCTION_SPECIFIC_FPMATH,
2345 IX86_FUNCTION_SPECIFIC_MAX
2348 static char *ix86_target_string (int, int, const char *, const char *,
2349 const char *, bool);
2350 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2351 static void ix86_function_specific_save (struct cl_target_option *);
2352 static void ix86_function_specific_restore (struct cl_target_option *);
2353 static void ix86_function_specific_print (FILE *, int,
2354 struct cl_target_option *);
2355 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2356 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2357 static bool ix86_can_inline_p (tree, tree);
2358 static void ix86_set_current_function (tree);
2359 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2361 static enum calling_abi ix86_function_abi (const_tree);
2364 #ifndef SUBTARGET32_DEFAULT_CPU
2365 #define SUBTARGET32_DEFAULT_CPU "i386"
2368 /* The svr4 ABI for the i386 says that records and unions are returned
2370 #ifndef DEFAULT_PCC_STRUCT_RETURN
2371 #define DEFAULT_PCC_STRUCT_RETURN 1
2374 /* Whether -mtune= or -march= were specified */
2375 static int ix86_tune_defaulted;
2376 static int ix86_arch_specified;
2378 /* A mask of ix86_isa_flags that includes bit X if X
2379 was set or cleared on the command line. */
2380 static int ix86_isa_flags_explicit;
2382 /* Define a set of ISAs which are available when a given ISA is
2383 enabled. MMX and SSE ISAs are handled separately. */
2385 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2386 #define OPTION_MASK_ISA_3DNOW_SET \
2387 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2389 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2390 #define OPTION_MASK_ISA_SSE2_SET \
2391 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2392 #define OPTION_MASK_ISA_SSE3_SET \
2393 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2394 #define OPTION_MASK_ISA_SSSE3_SET \
2395 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2396 #define OPTION_MASK_ISA_SSE4_1_SET \
2397 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2398 #define OPTION_MASK_ISA_SSE4_2_SET \
2399 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2400 #define OPTION_MASK_ISA_AVX_SET \
2401 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2402 #define OPTION_MASK_ISA_FMA_SET \
2403 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2405 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2407 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2409 #define OPTION_MASK_ISA_SSE4A_SET \
2410 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2411 #define OPTION_MASK_ISA_FMA4_SET \
2412 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2413 | OPTION_MASK_ISA_AVX_SET)
2414 #define OPTION_MASK_ISA_XOP_SET \
2415 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2416 #define OPTION_MASK_ISA_LWP_SET \
2419 /* AES and PCLMUL need SSE2 because they use xmm registers */
2420 #define OPTION_MASK_ISA_AES_SET \
2421 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2422 #define OPTION_MASK_ISA_PCLMUL_SET \
2423 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2425 #define OPTION_MASK_ISA_ABM_SET \
2426 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2428 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
2429 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
2430 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2431 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2432 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2433 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2434 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2436 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2437 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2438 #define OPTION_MASK_ISA_F16C_SET \
2439 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2441 /* Define a set of ISAs which aren't available when a given ISA is
2442 disabled. MMX and SSE ISAs are handled separately. */
2444 #define OPTION_MASK_ISA_MMX_UNSET \
2445 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2446 #define OPTION_MASK_ISA_3DNOW_UNSET \
2447 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2448 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2450 #define OPTION_MASK_ISA_SSE_UNSET \
2451 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2452 #define OPTION_MASK_ISA_SSE2_UNSET \
2453 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2454 #define OPTION_MASK_ISA_SSE3_UNSET \
2455 (OPTION_MASK_ISA_SSE3 \
2456 | OPTION_MASK_ISA_SSSE3_UNSET \
2457 | OPTION_MASK_ISA_SSE4A_UNSET )
2458 #define OPTION_MASK_ISA_SSSE3_UNSET \
2459 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2460 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2461 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2462 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2463 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2464 #define OPTION_MASK_ISA_AVX_UNSET \
2465 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2466 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2467 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2469 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2471 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2473 #define OPTION_MASK_ISA_SSE4A_UNSET \
2474 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2476 #define OPTION_MASK_ISA_FMA4_UNSET \
2477 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2478 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2479 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2481 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2482 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2483 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2484 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
2485 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
2486 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2487 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2488 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2489 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2490 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2492 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2493 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2494 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2496 /* Vectorization library interface and handlers. */
2497 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2499 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2500 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2502 /* Processor target table, indexed by processor number */
2505 const struct processor_costs *cost; /* Processor costs */
2506 const int align_loop; /* Default alignments. */
2507 const int align_loop_max_skip;
2508 const int align_jump;
2509 const int align_jump_max_skip;
2510 const int align_func;
2513 static const struct ptt processor_target_table[PROCESSOR_max] =
2515 {&i386_cost, 4, 3, 4, 3, 4},
2516 {&i486_cost, 16, 15, 16, 15, 16},
2517 {&pentium_cost, 16, 7, 16, 7, 16},
2518 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2519 {&geode_cost, 0, 0, 0, 0, 0},
2520 {&k6_cost, 32, 7, 32, 7, 32},
2521 {&athlon_cost, 16, 7, 16, 7, 16},
2522 {&pentium4_cost, 0, 0, 0, 0, 0},
2523 {&k8_cost, 16, 7, 16, 7, 16},
2524 {&nocona_cost, 0, 0, 0, 0, 0},
2525 /* Core 2 32-bit. */
2526 {&generic32_cost, 16, 10, 16, 10, 16},
2527 /* Core 2 64-bit. */
2528 {&generic64_cost, 16, 10, 16, 10, 16},
2529 /* Core i7 32-bit. */
2530 {&generic32_cost, 16, 10, 16, 10, 16},
2531 /* Core i7 64-bit. */
2532 {&generic64_cost, 16, 10, 16, 10, 16},
2533 {&generic32_cost, 16, 7, 16, 7, 16},
2534 {&generic64_cost, 16, 10, 16, 10, 16},
2535 {&amdfam10_cost, 32, 24, 32, 7, 32},
2536 {&bdver1_cost, 32, 24, 32, 7, 32},
2537 {&btver1_cost, 32, 24, 32, 7, 32},
2538 {&atom_cost, 16, 7, 16, 7, 16}
2541 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2570 /* Return true if a red-zone is in use. */
2573 ix86_using_red_zone (void)
2575 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2578 /* Implement TARGET_HANDLE_OPTION. */
2581 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2588 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2589 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2593 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2594 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2601 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2602 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2606 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2607 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2617 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2622 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2630 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2635 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2643 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2644 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2648 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2656 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2657 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2661 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2662 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2669 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2670 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2674 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2675 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2682 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2683 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2687 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2688 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2695 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2696 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2700 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2701 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2708 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2709 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2713 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2714 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2719 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2720 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2724 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2725 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2731 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2732 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2736 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2737 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2744 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2745 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2749 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2750 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2757 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2758 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2762 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2763 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2770 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2771 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2775 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2776 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2783 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2784 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2788 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2789 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2796 ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
2797 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
2801 ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
2802 ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
2809 ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
2810 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
2814 ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
2815 ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
2822 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2823 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2827 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2828 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2835 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2836 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2840 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2841 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2848 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2853 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2861 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2866 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2874 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2879 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2887 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2888 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2892 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2893 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2900 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2901 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2905 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2906 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2913 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2914 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2918 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2919 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2926 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2927 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2931 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2932 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2939 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2940 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2944 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2945 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2954 /* Return a string that documents the current -m options. The caller is
2955 responsible for freeing the string. */
2958 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2959 const char *fpmath, bool add_nl_p)
2961 struct ix86_target_opts
2963 const char *option; /* option string */
2964 int mask; /* isa mask options */
2967 /* This table is ordered so that options like -msse4.2 that imply
2968 preceding options while match those first. */
2969 static struct ix86_target_opts isa_opts[] =
2971 { "-m64", OPTION_MASK_ISA_64BIT },
2972 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2973 { "-mfma", OPTION_MASK_ISA_FMA },
2974 { "-mxop", OPTION_MASK_ISA_XOP },
2975 { "-mlwp", OPTION_MASK_ISA_LWP },
2976 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2977 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2978 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2979 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2980 { "-msse3", OPTION_MASK_ISA_SSE3 },
2981 { "-msse2", OPTION_MASK_ISA_SSE2 },
2982 { "-msse", OPTION_MASK_ISA_SSE },
2983 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2984 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2985 { "-mmmx", OPTION_MASK_ISA_MMX },
2986 { "-mabm", OPTION_MASK_ISA_ABM },
2987 { "-mbmi", OPTION_MASK_ISA_BMI },
2988 { "-mtbm", OPTION_MASK_ISA_TBM },
2989 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2990 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2991 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2992 { "-maes", OPTION_MASK_ISA_AES },
2993 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2994 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2995 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2996 { "-mf16c", OPTION_MASK_ISA_F16C },
3000 static struct ix86_target_opts flag_opts[] =
3002 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3003 { "-m80387", MASK_80387 },
3004 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3005 { "-malign-double", MASK_ALIGN_DOUBLE },
3006 { "-mcld", MASK_CLD },
3007 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3008 { "-mieee-fp", MASK_IEEE_FP },
3009 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3010 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3011 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3012 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3013 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3014 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3015 { "-mno-red-zone", MASK_NO_RED_ZONE },
3016 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3017 { "-mrecip", MASK_RECIP },
3018 { "-mrtd", MASK_RTD },
3019 { "-msseregparm", MASK_SSEREGPARM },
3020 { "-mstack-arg-probe", MASK_STACK_PROBE },
3021 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3022 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3023 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3024 { "-mvzeroupper", MASK_VZEROUPPER },
3027 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3030 char target_other[40];
3039 memset (opts, '\0', sizeof (opts));
3041 /* Add -march= option. */
3044 opts[num][0] = "-march=";
3045 opts[num++][1] = arch;
3048 /* Add -mtune= option. */
3051 opts[num][0] = "-mtune=";
3052 opts[num++][1] = tune;
3055 /* Pick out the options in isa options. */
3056 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3058 if ((isa & isa_opts[i].mask) != 0)
3060 opts[num++][0] = isa_opts[i].option;
3061 isa &= ~ isa_opts[i].mask;
3065 if (isa && add_nl_p)
3067 opts[num++][0] = isa_other;
3068 sprintf (isa_other, "(other isa: %#x)", isa);
3071 /* Add flag options. */
3072 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3074 if ((flags & flag_opts[i].mask) != 0)
3076 opts[num++][0] = flag_opts[i].option;
3077 flags &= ~ flag_opts[i].mask;
3081 if (flags && add_nl_p)
3083 opts[num++][0] = target_other;
3084 sprintf (target_other, "(other flags: %#x)", flags);
3087 /* Add -fpmath= option. */
3090 opts[num][0] = "-mfpmath=";
3091 opts[num++][1] = fpmath;
3098 gcc_assert (num < ARRAY_SIZE (opts));
3100 /* Size the string. */
3102 sep_len = (add_nl_p) ? 3 : 1;
3103 for (i = 0; i < num; i++)
3106 for (j = 0; j < 2; j++)
3108 len += strlen (opts[i][j]);
3111 /* Build the string. */
3112 ret = ptr = (char *) xmalloc (len);
3115 for (i = 0; i < num; i++)
3119 for (j = 0; j < 2; j++)
3120 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3127 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3135 for (j = 0; j < 2; j++)
3138 memcpy (ptr, opts[i][j], len2[j]);
3140 line_len += len2[j];
3145 gcc_assert (ret + len >= ptr);
3150 /* Return TRUE if software prefetching is beneficial for the
3154 software_prefetching_beneficial_p (void)
3158 case PROCESSOR_GEODE:
3160 case PROCESSOR_ATHLON:
3162 case PROCESSOR_AMDFAM10:
3163 case PROCESSOR_BTVER1:
3171 /* Return true, if profiling code should be emitted before
3172 prologue. Otherwise it returns false.
3173 Note: For x86 with "hotfix" it is sorried. */
3175 ix86_profile_before_prologue (void)
3177 return flag_fentry != 0;
3180 /* Function that is callable from the debugger to print the current
3183 ix86_debug_options (void)
3185 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3186 ix86_arch_string, ix86_tune_string,
3187 ix86_fpmath_string, true);
3191 fprintf (stderr, "%s\n\n", opts);
3195 fputs ("<no options>\n\n", stderr);
3200 /* Override various settings based on options. If MAIN_ARGS_P, the
3201 options are from the command line, otherwise they are from
3205 ix86_option_override_internal (bool main_args_p)
3208 unsigned int ix86_arch_mask, ix86_tune_mask;
3209 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3214 /* Comes from final.c -- no real reason to change it. */
3215 #define MAX_CODE_ALIGN 16
3223 PTA_PREFETCH_SSE = 1 << 4,
3225 PTA_3DNOW_A = 1 << 6,
3229 PTA_POPCNT = 1 << 10,
3231 PTA_SSE4A = 1 << 12,
3232 PTA_NO_SAHF = 1 << 13,
3233 PTA_SSE4_1 = 1 << 14,
3234 PTA_SSE4_2 = 1 << 15,
3236 PTA_PCLMUL = 1 << 17,
3239 PTA_MOVBE = 1 << 20,
3243 PTA_FSGSBASE = 1 << 24,
3244 PTA_RDRND = 1 << 25,
3248 /* if this reaches 32, need to widen struct pta flags below */
3253 const char *const name; /* processor name or nickname. */
3254 const enum processor_type processor;
3255 const enum attr_cpu schedule;
3256 const unsigned /*enum pta_flags*/ flags;
3258 const processor_alias_table[] =
3260 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3261 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3262 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3263 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3264 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3265 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3266 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3267 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3268 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3269 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3270 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3271 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3272 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3274 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3276 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3277 PTA_MMX | PTA_SSE | PTA_SSE2},
3278 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3279 PTA_MMX |PTA_SSE | PTA_SSE2},
3280 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3281 PTA_MMX | PTA_SSE | PTA_SSE2},
3282 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3283 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3284 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3285 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3286 | PTA_CX16 | PTA_NO_SAHF},
3287 {"core2", PROCESSOR_CORE2_64, CPU_CORE2,
3288 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3289 | PTA_SSSE3 | PTA_CX16},
3290 {"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
3291 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3292 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3293 {"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
3294 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3295 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3296 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
3297 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3298 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3299 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3300 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3301 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3302 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3303 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3304 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3305 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3306 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3307 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3308 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3309 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3310 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3311 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3312 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3313 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3314 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3315 {"x86-64", PROCESSOR_K8, CPU_K8,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3317 {"k8", PROCESSOR_K8, CPU_K8,
3318 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3319 | PTA_SSE2 | PTA_NO_SAHF},
3320 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3321 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3322 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3323 {"opteron", PROCESSOR_K8, CPU_K8,
3324 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3325 | PTA_SSE2 | PTA_NO_SAHF},
3326 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3327 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3328 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3329 {"athlon64", PROCESSOR_K8, CPU_K8,
3330 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3331 | PTA_SSE2 | PTA_NO_SAHF},
3332 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3333 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3334 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3335 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3336 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3337 | PTA_SSE2 | PTA_NO_SAHF},
3338 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3339 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3340 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3341 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3342 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3343 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3344 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3345 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3346 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3347 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3348 | PTA_XOP | PTA_LWP},
3349 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3350 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3351 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
3352 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3353 0 /* flags are only used for -march switch. */ },
3354 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3355 PTA_64BIT /* flags are only used for -march switch. */ },
3358 int const pta_size = ARRAY_SIZE (processor_alias_table);
3360 /* Set up prefix/suffix so the error messages refer to either the command
3361 line argument, or the attribute(target). */
3370 prefix = "option(\"";
3375 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3376 SUBTARGET_OVERRIDE_OPTIONS;
3379 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3380 SUBSUBTARGET_OVERRIDE_OPTIONS;
3383 /* -fPIC is the default for x86_64. */
3384 if (TARGET_MACHO && TARGET_64BIT)
3387 /* Need to check -mtune=generic first. */
3388 if (ix86_tune_string)
3390 if (!strcmp (ix86_tune_string, "generic")
3391 || !strcmp (ix86_tune_string, "i686")
3392 /* As special support for cross compilers we read -mtune=native
3393 as -mtune=generic. With native compilers we won't see the
3394 -mtune=native, as it was changed by the driver. */
3395 || !strcmp (ix86_tune_string, "native"))
3398 ix86_tune_string = "generic64";
3400 ix86_tune_string = "generic32";
3402 /* If this call is for setting the option attribute, allow the
3403 generic32/generic64 that was previously set. */
3404 else if (!main_args_p
3405 && (!strcmp (ix86_tune_string, "generic32")
3406 || !strcmp (ix86_tune_string, "generic64")))
3408 else if (!strncmp (ix86_tune_string, "generic", 7))
3409 error ("bad value (%s) for %stune=%s %s",
3410 ix86_tune_string, prefix, suffix, sw);
3411 else if (!strcmp (ix86_tune_string, "x86-64"))
3412 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3413 "%stune=k8%s or %stune=generic%s instead as appropriate",
3414 prefix, suffix, prefix, suffix, prefix, suffix);
3418 if (ix86_arch_string)
3419 ix86_tune_string = ix86_arch_string;
3420 if (!ix86_tune_string)
3422 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3423 ix86_tune_defaulted = 1;
3426 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3427 need to use a sensible tune option. */
3428 if (!strcmp (ix86_tune_string, "generic")
3429 || !strcmp (ix86_tune_string, "x86-64")
3430 || !strcmp (ix86_tune_string, "i686"))
3433 ix86_tune_string = "generic64";
3435 ix86_tune_string = "generic32";
3439 if (ix86_stringop_string)
3441 if (!strcmp (ix86_stringop_string, "rep_byte"))
3442 stringop_alg = rep_prefix_1_byte;
3443 else if (!strcmp (ix86_stringop_string, "libcall"))
3444 stringop_alg = libcall;
3445 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3446 stringop_alg = rep_prefix_4_byte;
3447 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3449 /* rep; movq isn't available in 32-bit code. */
3450 stringop_alg = rep_prefix_8_byte;
3451 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3452 stringop_alg = loop_1_byte;
3453 else if (!strcmp (ix86_stringop_string, "loop"))
3454 stringop_alg = loop;
3455 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3456 stringop_alg = unrolled_loop;
3458 error ("bad value (%s) for %sstringop-strategy=%s %s",
3459 ix86_stringop_string, prefix, suffix, sw);
3462 if (!ix86_arch_string)
3463 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3465 ix86_arch_specified = 1;
3467 /* Validate -mabi= value. */
3468 if (ix86_abi_string)
3470 if (strcmp (ix86_abi_string, "sysv") == 0)
3471 ix86_abi = SYSV_ABI;
3472 else if (strcmp (ix86_abi_string, "ms") == 0)
3475 error ("unknown ABI (%s) for %sabi=%s %s",
3476 ix86_abi_string, prefix, suffix, sw);
3479 ix86_abi = DEFAULT_ABI;
3481 if (ix86_cmodel_string != 0)
3483 if (!strcmp (ix86_cmodel_string, "small"))
3484 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3485 else if (!strcmp (ix86_cmodel_string, "medium"))
3486 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3487 else if (!strcmp (ix86_cmodel_string, "large"))
3488 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3490 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3491 else if (!strcmp (ix86_cmodel_string, "32"))
3492 ix86_cmodel = CM_32;
3493 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3494 ix86_cmodel = CM_KERNEL;
3496 error ("bad value (%s) for %scmodel=%s %s",
3497 ix86_cmodel_string, prefix, suffix, sw);
3501 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3502 use of rip-relative addressing. This eliminates fixups that
3503 would otherwise be needed if this object is to be placed in a
3504 DLL, and is essentially just as efficient as direct addressing. */
3505 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3506 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3507 else if (TARGET_64BIT)
3508 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3510 ix86_cmodel = CM_32;
3512 if (ix86_asm_string != 0)
3515 && !strcmp (ix86_asm_string, "intel"))
3516 ix86_asm_dialect = ASM_INTEL;
3517 else if (!strcmp (ix86_asm_string, "att"))
3518 ix86_asm_dialect = ASM_ATT;
3520 error ("bad value (%s) for %sasm=%s %s",
3521 ix86_asm_string, prefix, suffix, sw);
3523 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3524 error ("code model %qs not supported in the %s bit mode",
3525 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3526 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3527 sorry ("%i-bit mode not compiled in",
3528 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3530 for (i = 0; i < pta_size; i++)
3531 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3533 ix86_schedule = processor_alias_table[i].schedule;
3534 ix86_arch = processor_alias_table[i].processor;
3535 /* Default cpu tuning to the architecture. */
3536 ix86_tune = ix86_arch;
3538 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3539 error ("CPU you selected does not support x86-64 "
3542 if (processor_alias_table[i].flags & PTA_MMX
3543 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3544 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3545 if (processor_alias_table[i].flags & PTA_3DNOW
3546 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3547 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3548 if (processor_alias_table[i].flags & PTA_3DNOW_A
3549 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3550 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3551 if (processor_alias_table[i].flags & PTA_SSE
3552 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3553 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3554 if (processor_alias_table[i].flags & PTA_SSE2
3555 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3556 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3557 if (processor_alias_table[i].flags & PTA_SSE3
3558 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3559 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3560 if (processor_alias_table[i].flags & PTA_SSSE3
3561 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3562 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3563 if (processor_alias_table[i].flags & PTA_SSE4_1
3564 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3565 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3566 if (processor_alias_table[i].flags & PTA_SSE4_2
3567 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3568 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3569 if (processor_alias_table[i].flags & PTA_AVX
3570 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3571 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3572 if (processor_alias_table[i].flags & PTA_FMA
3573 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3574 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3575 if (processor_alias_table[i].flags & PTA_SSE4A
3576 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3577 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3578 if (processor_alias_table[i].flags & PTA_FMA4
3579 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3580 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3581 if (processor_alias_table[i].flags & PTA_XOP
3582 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3583 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3584 if (processor_alias_table[i].flags & PTA_LWP
3585 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3586 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3587 if (processor_alias_table[i].flags & PTA_ABM
3588 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3589 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3590 if (processor_alias_table[i].flags & PTA_BMI
3591 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3592 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3593 if (processor_alias_table[i].flags & PTA_TBM
3594 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3595 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3596 if (processor_alias_table[i].flags & PTA_CX16
3597 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3598 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3599 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3600 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3601 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3602 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3603 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3604 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3605 if (processor_alias_table[i].flags & PTA_MOVBE
3606 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3607 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3608 if (processor_alias_table[i].flags & PTA_AES
3609 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3610 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3611 if (processor_alias_table[i].flags & PTA_PCLMUL
3612 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3613 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3614 if (processor_alias_table[i].flags & PTA_FSGSBASE
3615 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3616 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3617 if (processor_alias_table[i].flags & PTA_RDRND
3618 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3619 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3620 if (processor_alias_table[i].flags & PTA_F16C
3621 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3622 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3623 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3624 x86_prefetch_sse = true;
3629 if (!strcmp (ix86_arch_string, "generic"))
3630 error ("generic CPU can be used only for %stune=%s %s",
3631 prefix, suffix, sw);
3632 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3633 error ("bad value (%s) for %sarch=%s %s",
3634 ix86_arch_string, prefix, suffix, sw);
3636 ix86_arch_mask = 1u << ix86_arch;
3637 for (i = 0; i < X86_ARCH_LAST; ++i)
3638 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3640 for (i = 0; i < pta_size; i++)
3641 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3643 ix86_schedule = processor_alias_table[i].schedule;
3644 ix86_tune = processor_alias_table[i].processor;
3647 if (!(processor_alias_table[i].flags & PTA_64BIT))
3649 if (ix86_tune_defaulted)
3651 ix86_tune_string = "x86-64";
3652 for (i = 0; i < pta_size; i++)
3653 if (! strcmp (ix86_tune_string,
3654 processor_alias_table[i].name))
3656 ix86_schedule = processor_alias_table[i].schedule;
3657 ix86_tune = processor_alias_table[i].processor;
3660 error ("CPU you selected does not support x86-64 "
3666 /* Adjust tuning when compiling for 32-bit ABI. */
3669 case PROCESSOR_GENERIC64:
3670 ix86_tune = PROCESSOR_GENERIC32;
3671 ix86_schedule = CPU_PENTIUMPRO;
3674 case PROCESSOR_CORE2_64:
3675 ix86_tune = PROCESSOR_CORE2_32;
3678 case PROCESSOR_COREI7_64:
3679 ix86_tune = PROCESSOR_COREI7_32;
3686 /* Intel CPUs have always interpreted SSE prefetch instructions as
3687 NOPs; so, we can enable SSE prefetch instructions even when
3688 -mtune (rather than -march) points us to a processor that has them.
3689 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3690 higher processors. */
3692 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3693 x86_prefetch_sse = true;
3697 if (ix86_tune_specified && i == pta_size)
3698 error ("bad value (%s) for %stune=%s %s",
3699 ix86_tune_string, prefix, suffix, sw);
3701 ix86_tune_mask = 1u << ix86_tune;
3702 for (i = 0; i < X86_TUNE_LAST; ++i)
3703 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3705 #ifndef USE_IX86_FRAME_POINTER
3706 #define USE_IX86_FRAME_POINTER 0
3709 #ifndef USE_X86_64_FRAME_POINTER
3710 #define USE_X86_64_FRAME_POINTER 0
3713 /* Set the default values for switches whose default depends on TARGET_64BIT
3714 in case they weren't overwritten by command line options. */
3717 if (optimize > 1 && !global_options_set.x_flag_zee)
3719 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3720 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3721 if (flag_asynchronous_unwind_tables == 2)
3722 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3723 if (flag_pcc_struct_return == 2)
3724 flag_pcc_struct_return = 0;
3728 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3729 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3730 if (flag_asynchronous_unwind_tables == 2)
3731 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3732 if (flag_pcc_struct_return == 2)
3733 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3737 ix86_cost = &ix86_size_cost;
3739 ix86_cost = processor_target_table[ix86_tune].cost;
3741 /* Arrange to set up i386_stack_locals for all functions. */
3742 init_machine_status = ix86_init_machine_status;
3744 /* Validate -mregparm= value. */
3745 if (ix86_regparm_string)
3748 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3749 i = atoi (ix86_regparm_string);
3750 if (i < 0 || i > REGPARM_MAX)
3751 error ("%sregparm=%d%s is not between 0 and %d",
3752 prefix, i, suffix, REGPARM_MAX);
3757 ix86_regparm = REGPARM_MAX;
3759 /* If the user has provided any of the -malign-* options,
3760 warn and use that value only if -falign-* is not set.
3761 Remove this code in GCC 3.2 or later. */
3762 if (ix86_align_loops_string)
3764 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3765 prefix, suffix, suffix);
3766 if (align_loops == 0)
3768 i = atoi (ix86_align_loops_string);
3769 if (i < 0 || i > MAX_CODE_ALIGN)
3770 error ("%salign-loops=%d%s is not between 0 and %d",
3771 prefix, i, suffix, MAX_CODE_ALIGN);
3773 align_loops = 1 << i;
3777 if (ix86_align_jumps_string)
3779 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3780 prefix, suffix, suffix);
3781 if (align_jumps == 0)
3783 i = atoi (ix86_align_jumps_string);
3784 if (i < 0 || i > MAX_CODE_ALIGN)
3785 error ("%salign-loops=%d%s is not between 0 and %d",
3786 prefix, i, suffix, MAX_CODE_ALIGN);
3788 align_jumps = 1 << i;
3792 if (ix86_align_funcs_string)
3794 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3795 prefix, suffix, suffix);
3796 if (align_functions == 0)
3798 i = atoi (ix86_align_funcs_string);
3799 if (i < 0 || i > MAX_CODE_ALIGN)
3800 error ("%salign-loops=%d%s is not between 0 and %d",
3801 prefix, i, suffix, MAX_CODE_ALIGN);
3803 align_functions = 1 << i;
3807 /* Default align_* from the processor table. */
3808 if (align_loops == 0)
3810 align_loops = processor_target_table[ix86_tune].align_loop;
3811 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3813 if (align_jumps == 0)
3815 align_jumps = processor_target_table[ix86_tune].align_jump;
3816 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3818 if (align_functions == 0)
3820 align_functions = processor_target_table[ix86_tune].align_func;
3823 /* Validate -mbranch-cost= value, or provide default. */
3824 ix86_branch_cost = ix86_cost->branch_cost;
3825 if (ix86_branch_cost_string)
3827 i = atoi (ix86_branch_cost_string);
3829 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3831 ix86_branch_cost = i;
3833 if (ix86_section_threshold_string)
3835 i = atoi (ix86_section_threshold_string);
3837 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3839 ix86_section_threshold = i;
3842 if (ix86_tls_dialect_string)
3844 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3845 ix86_tls_dialect = TLS_DIALECT_GNU;
3846 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3847 ix86_tls_dialect = TLS_DIALECT_GNU2;
3849 error ("bad value (%s) for %stls-dialect=%s %s",
3850 ix86_tls_dialect_string, prefix, suffix, sw);
3853 if (ix87_precision_string)
3855 i = atoi (ix87_precision_string);
3856 if (i != 32 && i != 64 && i != 80)
3857 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3862 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3864 /* Enable by default the SSE and MMX builtins. Do allow the user to
3865 explicitly disable any of these. In particular, disabling SSE and
3866 MMX for kernel code is extremely useful. */
3867 if (!ix86_arch_specified)
3869 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3870 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3873 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3877 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3879 if (!ix86_arch_specified)
3881 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3883 /* i386 ABI does not specify red zone. It still makes sense to use it
3884 when programmer takes care to stack from being destroyed. */
3885 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3886 target_flags |= MASK_NO_RED_ZONE;
3889 /* Keep nonleaf frame pointers. */
3890 if (flag_omit_frame_pointer)
3891 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3892 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3893 flag_omit_frame_pointer = 1;
3895 /* If we're doing fast math, we don't care about comparison order
3896 wrt NaNs. This lets us use a shorter comparison sequence. */
3897 if (flag_finite_math_only)
3898 target_flags &= ~MASK_IEEE_FP;
3900 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3901 since the insns won't need emulation. */
3902 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3903 target_flags &= ~MASK_NO_FANCY_MATH_387;
3905 /* Likewise, if the target doesn't have a 387, or we've specified
3906 software floating point, don't use 387 inline intrinsics. */
3908 target_flags |= MASK_NO_FANCY_MATH_387;
3910 /* Turn on MMX builtins for -msse. */
3913 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3914 x86_prefetch_sse = true;
3917 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3918 if (TARGET_SSE4_2 || TARGET_ABM)
3919 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3921 /* Validate -mpreferred-stack-boundary= value or default it to
3922 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3923 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3924 if (ix86_preferred_stack_boundary_string)
3926 int min = (TARGET_64BIT ? 4 : 2);
3927 int max = (TARGET_SEH ? 4 : 12);
3929 i = atoi (ix86_preferred_stack_boundary_string);
3930 if (i < min || i > max)
3933 error ("%spreferred-stack-boundary%s is not supported "
3934 "for this target", prefix, suffix);
3936 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
3937 prefix, i, suffix, min, max);
3940 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3943 /* Set the default value for -mstackrealign. */
3944 if (ix86_force_align_arg_pointer == -1)
3945 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3947 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3949 /* Validate -mincoming-stack-boundary= value or default it to
3950 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3951 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3952 if (ix86_incoming_stack_boundary_string)
3954 i = atoi (ix86_incoming_stack_boundary_string);
3955 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3956 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3957 i, TARGET_64BIT ? 4 : 2);
3960 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3961 ix86_incoming_stack_boundary
3962 = ix86_user_incoming_stack_boundary;
3966 /* Accept -msseregparm only if at least SSE support is enabled. */
3967 if (TARGET_SSEREGPARM
3969 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3971 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3972 if (ix86_fpmath_string != 0)
3974 if (! strcmp (ix86_fpmath_string, "387"))
3975 ix86_fpmath = FPMATH_387;
3976 else if (! strcmp (ix86_fpmath_string, "sse"))
3980 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3981 ix86_fpmath = FPMATH_387;
3984 ix86_fpmath = FPMATH_SSE;
3986 else if (! strcmp (ix86_fpmath_string, "387,sse")
3987 || ! strcmp (ix86_fpmath_string, "387+sse")
3988 || ! strcmp (ix86_fpmath_string, "sse,387")
3989 || ! strcmp (ix86_fpmath_string, "sse+387")
3990 || ! strcmp (ix86_fpmath_string, "both"))
3994 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3995 ix86_fpmath = FPMATH_387;
3997 else if (!TARGET_80387)
3999 warning (0, "387 instruction set disabled, using SSE arithmetics");
4000 ix86_fpmath = FPMATH_SSE;
4003 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4006 error ("bad value (%s) for %sfpmath=%s %s",
4007 ix86_fpmath_string, prefix, suffix, sw);
4010 /* If the i387 is disabled, then do not return values in it. */
4012 target_flags &= ~MASK_FLOAT_RETURNS;
4014 /* Use external vectorized library in vectorizing intrinsics. */
4015 if (ix86_veclibabi_string)
4017 if (strcmp (ix86_veclibabi_string, "svml") == 0)
4018 ix86_veclib_handler = ix86_veclibabi_svml;
4019 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
4020 ix86_veclib_handler = ix86_veclibabi_acml;
4022 error ("unknown vectorization library ABI type (%s) for "
4023 "%sveclibabi=%s %s", ix86_veclibabi_string,
4024 prefix, suffix, sw);
4027 if ((!USE_IX86_FRAME_POINTER
4028 || (x86_accumulate_outgoing_args & ix86_tune_mask))
4029 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4031 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4033 /* ??? Unwind info is not correct around the CFG unless either a frame
4034 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4035 unwind info generation to be aware of the CFG and propagating states
4037 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
4038 || flag_exceptions || flag_non_call_exceptions)
4039 && flag_omit_frame_pointer
4040 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4042 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4043 warning (0, "unwind tables currently require either a frame pointer "
4044 "or %saccumulate-outgoing-args%s for correctness",
4046 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4049 /* If stack probes are required, the space used for large function
4050 arguments on the stack must also be probed, so enable
4051 -maccumulate-outgoing-args so this happens in the prologue. */
4052 if (TARGET_STACK_PROBE
4053 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4055 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
4056 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4057 "for correctness", prefix, suffix);
4058 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4061 /* For sane SSE instruction set generation we need fcomi instruction.
4062 It is safe to enable all CMOVE instructions. */
4066 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4069 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4070 p = strchr (internal_label_prefix, 'X');
4071 internal_label_prefix_len = p - internal_label_prefix;
4075 /* When scheduling description is not available, disable scheduler pass
4076 so it won't slow down the compilation and make x87 code slower. */
4077 if (!TARGET_SCHEDULE)
4078 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
4080 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4081 ix86_cost->simultaneous_prefetches,
4082 global_options.x_param_values,
4083 global_options_set.x_param_values);
4084 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
4085 global_options.x_param_values,
4086 global_options_set.x_param_values);
4087 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
4088 global_options.x_param_values,
4089 global_options_set.x_param_values);
4090 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
4091 global_options.x_param_values,
4092 global_options_set.x_param_values);
4094 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4095 if (flag_prefetch_loop_arrays < 0
4098 && software_prefetching_beneficial_p ())
4099 flag_prefetch_loop_arrays = 1;
4101 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4102 can be optimized to ap = __builtin_next_arg (0). */
4103 if (!TARGET_64BIT && !flag_split_stack)
4104 targetm.expand_builtin_va_start = NULL;
4108 ix86_gen_leave = gen_leave_rex64;
4109 ix86_gen_add3 = gen_adddi3;
4110 ix86_gen_sub3 = gen_subdi3;
4111 ix86_gen_sub3_carry = gen_subdi3_carry;
4112 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4113 ix86_gen_monitor = gen_sse3_monitor64;
4114 ix86_gen_andsp = gen_anddi3;
4115 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4116 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4117 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4121 ix86_gen_leave = gen_leave;
4122 ix86_gen_add3 = gen_addsi3;
4123 ix86_gen_sub3 = gen_subsi3;
4124 ix86_gen_sub3_carry = gen_subsi3_carry;
4125 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4126 ix86_gen_monitor = gen_sse3_monitor;
4127 ix86_gen_andsp = gen_andsi3;
4128 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4129 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4130 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4134 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4136 target_flags |= MASK_CLD & ~target_flags_explicit;
4139 if (!TARGET_64BIT && flag_pic)
4141 if (flag_fentry > 0)
4142 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4146 else if (TARGET_SEH)
4148 if (flag_fentry == 0)
4149 sorry ("-mno-fentry isn%'t compatible with SEH");
4152 else if (flag_fentry < 0)
4154 #if defined(PROFILE_BEFORE_PROLOGUE)
4161 /* Save the initial options in case the user does function specific options */
4163 target_option_default_node = target_option_current_node
4164 = build_target_option_node ();
4168 /* When not optimize for size, enable vzeroupper optimization for
4169 TARGET_AVX with -fexpensive-optimizations. */
4171 && flag_expensive_optimizations
4172 && !(target_flags_explicit & MASK_VZEROUPPER))
4173 target_flags |= MASK_VZEROUPPER;
4177 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4178 target_flags &= ~MASK_VZEROUPPER;
4182 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4185 function_pass_avx256_p (const_rtx val)
4190 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4193 if (GET_CODE (val) == PARALLEL)
4198 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4200 r = XVECEXP (val, 0, i);
4201 if (GET_CODE (r) == EXPR_LIST
4203 && REG_P (XEXP (r, 0))
4204 && (GET_MODE (XEXP (r, 0)) == OImode
4205 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4213 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4216 ix86_option_override (void)
4218 ix86_option_override_internal (true);
4221 /* Update register usage after having seen the compiler flags. */
4224 ix86_conditional_register_usage (void)
4229 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4231 if (fixed_regs[i] > 1)
4232 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4233 if (call_used_regs[i] > 1)
4234 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4237 /* The PIC register, if it exists, is fixed. */
4238 j = PIC_OFFSET_TABLE_REGNUM;
4239 if (j != INVALID_REGNUM)
4240 fixed_regs[j] = call_used_regs[j] = 1;
4242 /* The MS_ABI changes the set of call-used registers. */
4243 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4245 call_used_regs[SI_REG] = 0;
4246 call_used_regs[DI_REG] = 0;
4247 call_used_regs[XMM6_REG] = 0;
4248 call_used_regs[XMM7_REG] = 0;
4249 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4250 call_used_regs[i] = 0;
4253 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4254 other call-clobbered regs for 64-bit. */
4257 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4259 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4260 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4261 && call_used_regs[i])
4262 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4265 /* If MMX is disabled, squash the registers. */
4267 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4268 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4269 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4271 /* If SSE is disabled, squash the registers. */
4273 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4274 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4275 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4277 /* If the FPU is disabled, squash the registers. */
4278 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4279 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4280 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4281 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4283 /* If 32-bit, squash the 64-bit registers. */
4286 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4288 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4294 /* Save the current options */
4297 ix86_function_specific_save (struct cl_target_option *ptr)
4299 ptr->arch = ix86_arch;
4300 ptr->schedule = ix86_schedule;
4301 ptr->tune = ix86_tune;
4302 ptr->fpmath = ix86_fpmath;
4303 ptr->branch_cost = ix86_branch_cost;
4304 ptr->tune_defaulted = ix86_tune_defaulted;
4305 ptr->arch_specified = ix86_arch_specified;
4306 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4307 ptr->ix86_target_flags_explicit = target_flags_explicit;
4309 /* The fields are char but the variables are not; make sure the
4310 values fit in the fields. */
4311 gcc_assert (ptr->arch == ix86_arch);
4312 gcc_assert (ptr->schedule == ix86_schedule);
4313 gcc_assert (ptr->tune == ix86_tune);
4314 gcc_assert (ptr->fpmath == ix86_fpmath);
4315 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4318 /* Restore the current options */
4321 ix86_function_specific_restore (struct cl_target_option *ptr)
4323 enum processor_type old_tune = ix86_tune;
4324 enum processor_type old_arch = ix86_arch;
4325 unsigned int ix86_arch_mask, ix86_tune_mask;
4328 ix86_arch = (enum processor_type) ptr->arch;
4329 ix86_schedule = (enum attr_cpu) ptr->schedule;
4330 ix86_tune = (enum processor_type) ptr->tune;
4331 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4332 ix86_branch_cost = ptr->branch_cost;
4333 ix86_tune_defaulted = ptr->tune_defaulted;
4334 ix86_arch_specified = ptr->arch_specified;
4335 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4336 target_flags_explicit = ptr->ix86_target_flags_explicit;
4338 /* Recreate the arch feature tests if the arch changed */
4339 if (old_arch != ix86_arch)
4341 ix86_arch_mask = 1u << ix86_arch;
4342 for (i = 0; i < X86_ARCH_LAST; ++i)
4343 ix86_arch_features[i]
4344 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4347 /* Recreate the tune optimization tests */
4348 if (old_tune != ix86_tune)
4350 ix86_tune_mask = 1u << ix86_tune;
4351 for (i = 0; i < X86_TUNE_LAST; ++i)
4352 ix86_tune_features[i]
4353 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4357 /* Print the current options */
4360 ix86_function_specific_print (FILE *file, int indent,
4361 struct cl_target_option *ptr)
4364 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4365 NULL, NULL, NULL, false);
4367 fprintf (file, "%*sarch = %d (%s)\n",
4370 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4371 ? cpu_names[ptr->arch]
4374 fprintf (file, "%*stune = %d (%s)\n",
4377 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4378 ? cpu_names[ptr->tune]
4381 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4382 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4383 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4384 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4388 fprintf (file, "%*s%s\n", indent, "", target_string);
4389 free (target_string);
4394 /* Inner function to process the attribute((target(...))), take an argument and
4395 set the current options from the argument. If we have a list, recursively go
4399 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4404 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4405 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4406 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4407 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4422 enum ix86_opt_type type;
4427 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4428 IX86_ATTR_ISA ("abm", OPT_mabm),
4429 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4430 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4431 IX86_ATTR_ISA ("aes", OPT_maes),
4432 IX86_ATTR_ISA ("avx", OPT_mavx),
4433 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4434 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4435 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4436 IX86_ATTR_ISA ("sse", OPT_msse),
4437 IX86_ATTR_ISA ("sse2", OPT_msse2),
4438 IX86_ATTR_ISA ("sse3", OPT_msse3),
4439 IX86_ATTR_ISA ("sse4", OPT_msse4),
4440 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4441 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4442 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4443 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4444 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4445 IX86_ATTR_ISA ("xop", OPT_mxop),
4446 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4447 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4448 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4449 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4451 /* string options */
4452 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4453 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4454 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4457 IX86_ATTR_YES ("cld",
4461 IX86_ATTR_NO ("fancy-math-387",
4462 OPT_mfancy_math_387,
4463 MASK_NO_FANCY_MATH_387),
4465 IX86_ATTR_YES ("ieee-fp",
4469 IX86_ATTR_YES ("inline-all-stringops",
4470 OPT_minline_all_stringops,
4471 MASK_INLINE_ALL_STRINGOPS),
4473 IX86_ATTR_YES ("inline-stringops-dynamically",
4474 OPT_minline_stringops_dynamically,
4475 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4477 IX86_ATTR_NO ("align-stringops",
4478 OPT_mno_align_stringops,
4479 MASK_NO_ALIGN_STRINGOPS),
4481 IX86_ATTR_YES ("recip",
4487 /* If this is a list, recurse to get the options. */
4488 if (TREE_CODE (args) == TREE_LIST)
4492 for (; args; args = TREE_CHAIN (args))
4493 if (TREE_VALUE (args)
4494 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4500 else if (TREE_CODE (args) != STRING_CST)
4503 /* Handle multiple arguments separated by commas. */
4504 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4506 while (next_optstr && *next_optstr != '\0')
4508 char *p = next_optstr;
4510 char *comma = strchr (next_optstr, ',');
4511 const char *opt_string;
4512 size_t len, opt_len;
4517 enum ix86_opt_type type = ix86_opt_unknown;
4523 len = comma - next_optstr;
4524 next_optstr = comma + 1;
4532 /* Recognize no-xxx. */
4533 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4542 /* Find the option. */
4545 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4547 type = attrs[i].type;
4548 opt_len = attrs[i].len;
4549 if (ch == attrs[i].string[0]
4550 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4551 && memcmp (p, attrs[i].string, opt_len) == 0)
4554 mask = attrs[i].mask;
4555 opt_string = attrs[i].string;
4560 /* Process the option. */
4563 error ("attribute(target(\"%s\")) is unknown", orig_p);
4567 else if (type == ix86_opt_isa)
4568 ix86_handle_option (opt, p, opt_set_p);
4570 else if (type == ix86_opt_yes || type == ix86_opt_no)
4572 if (type == ix86_opt_no)
4573 opt_set_p = !opt_set_p;
4576 target_flags |= mask;
4578 target_flags &= ~mask;
4581 else if (type == ix86_opt_str)
4585 error ("option(\"%s\") was already specified", opt_string);
4589 p_strings[opt] = xstrdup (p + opt_len);
4599 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4602 ix86_valid_target_attribute_tree (tree args)
4604 const char *orig_arch_string = ix86_arch_string;
4605 const char *orig_tune_string = ix86_tune_string;
4606 const char *orig_fpmath_string = ix86_fpmath_string;
4607 int orig_tune_defaulted = ix86_tune_defaulted;
4608 int orig_arch_specified = ix86_arch_specified;
4609 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4612 struct cl_target_option *def
4613 = TREE_TARGET_OPTION (target_option_default_node);
4615 /* Process each of the options on the chain. */
4616 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4619 /* If the changed options are different from the default, rerun
4620 ix86_option_override_internal, and then save the options away.
4621 The string options are are attribute options, and will be undone
4622 when we copy the save structure. */
4623 if (ix86_isa_flags != def->x_ix86_isa_flags
4624 || target_flags != def->x_target_flags
4625 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4626 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4627 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4629 /* If we are using the default tune= or arch=, undo the string assigned,
4630 and use the default. */
4631 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4632 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4633 else if (!orig_arch_specified)
4634 ix86_arch_string = NULL;
4636 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4637 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4638 else if (orig_tune_defaulted)
4639 ix86_tune_string = NULL;
4641 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4642 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4643 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4644 else if (!TARGET_64BIT && TARGET_SSE)
4645 ix86_fpmath_string = "sse,387";
4647 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4648 ix86_option_override_internal (false);
4650 /* Add any builtin functions with the new isa if any. */
4651 ix86_add_new_builtins (ix86_isa_flags);
4653 /* Save the current options unless we are validating options for
4655 t = build_target_option_node ();
4657 ix86_arch_string = orig_arch_string;
4658 ix86_tune_string = orig_tune_string;
4659 ix86_fpmath_string = orig_fpmath_string;
4661 /* Free up memory allocated to hold the strings */
4662 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4663 if (option_strings[i])
4664 free (option_strings[i]);
4670 /* Hook to validate attribute((target("string"))). */
4673 ix86_valid_target_attribute_p (tree fndecl,
4674 tree ARG_UNUSED (name),
4676 int ARG_UNUSED (flags))
4678 struct cl_target_option cur_target;
4680 tree old_optimize = build_optimization_node ();
4681 tree new_target, new_optimize;
4682 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4684 /* If the function changed the optimization levels as well as setting target
4685 options, start with the optimizations specified. */
4686 if (func_optimize && func_optimize != old_optimize)
4687 cl_optimization_restore (&global_options,
4688 TREE_OPTIMIZATION (func_optimize));
4690 /* The target attributes may also change some optimization flags, so update
4691 the optimization options if necessary. */
4692 cl_target_option_save (&cur_target, &global_options);
4693 new_target = ix86_valid_target_attribute_tree (args);
4694 new_optimize = build_optimization_node ();
4701 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4703 if (old_optimize != new_optimize)
4704 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4707 cl_target_option_restore (&global_options, &cur_target);
4709 if (old_optimize != new_optimize)
4710 cl_optimization_restore (&global_options,
4711 TREE_OPTIMIZATION (old_optimize));
4717 /* Hook to determine if one function can safely inline another. */
4720 ix86_can_inline_p (tree caller, tree callee)
4723 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4724 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4726 /* If callee has no option attributes, then it is ok to inline. */
4730 /* If caller has no option attributes, but callee does then it is not ok to
4732 else if (!caller_tree)
4737 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4738 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4740 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4741 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4743 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4744 != callee_opts->x_ix86_isa_flags)
4747 /* See if we have the same non-isa options. */
4748 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4751 /* See if arch, tune, etc. are the same. */
4752 else if (caller_opts->arch != callee_opts->arch)
4755 else if (caller_opts->tune != callee_opts->tune)
4758 else if (caller_opts->fpmath != callee_opts->fpmath)
4761 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4772 /* Remember the last target of ix86_set_current_function. */
4773 static GTY(()) tree ix86_previous_fndecl;
4775 /* Establish appropriate back-end context for processing the function
4776 FNDECL. The argument might be NULL to indicate processing at top
4777 level, outside of any function scope. */
4779 ix86_set_current_function (tree fndecl)
4781 /* Only change the context if the function changes. This hook is called
4782 several times in the course of compiling a function, and we don't want to
4783 slow things down too much or call target_reinit when it isn't safe. */
4784 if (fndecl && fndecl != ix86_previous_fndecl)
4786 tree old_tree = (ix86_previous_fndecl
4787 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4790 tree new_tree = (fndecl
4791 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4794 ix86_previous_fndecl = fndecl;
4795 if (old_tree == new_tree)
4800 cl_target_option_restore (&global_options,
4801 TREE_TARGET_OPTION (new_tree));
4807 struct cl_target_option *def
4808 = TREE_TARGET_OPTION (target_option_current_node);
4810 cl_target_option_restore (&global_options, def);
4817 /* Return true if this goes in large data/bss. */
4820 ix86_in_large_data_p (tree exp)
4822 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4825 /* Functions are never large data. */
4826 if (TREE_CODE (exp) == FUNCTION_DECL)
4829 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4831 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4832 if (strcmp (section, ".ldata") == 0
4833 || strcmp (section, ".lbss") == 0)
4839 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4841 /* If this is an incomplete type with size 0, then we can't put it
4842 in data because it might be too big when completed. */
4843 if (!size || size > ix86_section_threshold)
4850 /* Switch to the appropriate section for output of DECL.
4851 DECL is either a `VAR_DECL' node or a constant of some sort.
4852 RELOC indicates whether forming the initial value of DECL requires
4853 link-time relocations. */
4855 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4859 x86_64_elf_select_section (tree decl, int reloc,
4860 unsigned HOST_WIDE_INT align)
4862 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4863 && ix86_in_large_data_p (decl))
4865 const char *sname = NULL;
4866 unsigned int flags = SECTION_WRITE;
4867 switch (categorize_decl_for_section (decl, reloc))
4872 case SECCAT_DATA_REL:
4873 sname = ".ldata.rel";
4875 case SECCAT_DATA_REL_LOCAL:
4876 sname = ".ldata.rel.local";
4878 case SECCAT_DATA_REL_RO:
4879 sname = ".ldata.rel.ro";
4881 case SECCAT_DATA_REL_RO_LOCAL:
4882 sname = ".ldata.rel.ro.local";
4886 flags |= SECTION_BSS;
4889 case SECCAT_RODATA_MERGE_STR:
4890 case SECCAT_RODATA_MERGE_STR_INIT:
4891 case SECCAT_RODATA_MERGE_CONST:
4895 case SECCAT_SRODATA:
4902 /* We don't split these for medium model. Place them into
4903 default sections and hope for best. */
4908 /* We might get called with string constants, but get_named_section
4909 doesn't like them as they are not DECLs. Also, we need to set
4910 flags in that case. */
4912 return get_section (sname, flags, NULL);
4913 return get_named_section (decl, sname, reloc);
4916 return default_elf_select_section (decl, reloc, align);
4919 /* Build up a unique section name, expressed as a
4920 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4921 RELOC indicates whether the initial value of EXP requires
4922 link-time relocations. */
4924 static void ATTRIBUTE_UNUSED
4925 x86_64_elf_unique_section (tree decl, int reloc)
4927 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4928 && ix86_in_large_data_p (decl))
4930 const char *prefix = NULL;
4931 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4932 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4934 switch (categorize_decl_for_section (decl, reloc))
4937 case SECCAT_DATA_REL:
4938 case SECCAT_DATA_REL_LOCAL:
4939 case SECCAT_DATA_REL_RO:
4940 case SECCAT_DATA_REL_RO_LOCAL:
4941 prefix = one_only ? ".ld" : ".ldata";
4944 prefix = one_only ? ".lb" : ".lbss";
4947 case SECCAT_RODATA_MERGE_STR:
4948 case SECCAT_RODATA_MERGE_STR_INIT:
4949 case SECCAT_RODATA_MERGE_CONST:
4950 prefix = one_only ? ".lr" : ".lrodata";
4952 case SECCAT_SRODATA:
4959 /* We don't split these for medium model. Place them into
4960 default sections and hope for best. */
4965 const char *name, *linkonce;
4968 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4969 name = targetm.strip_name_encoding (name);
4971 /* If we're using one_only, then there needs to be a .gnu.linkonce
4972 prefix to the section name. */
4973 linkonce = one_only ? ".gnu.linkonce" : "";
4975 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4977 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4981 default_unique_section (decl, reloc);
4984 #ifdef COMMON_ASM_OP
4985 /* This says how to output assembler code to declare an
4986 uninitialized external linkage data object.
4988 For medium model x86-64 we need to use .largecomm opcode for
4991 x86_elf_aligned_common (FILE *file,
4992 const char *name, unsigned HOST_WIDE_INT size,
4995 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4996 && size > (unsigned int)ix86_section_threshold)
4997 fputs (".largecomm\t", file);
4999 fputs (COMMON_ASM_OP, file);
5000 assemble_name (file, name);
5001 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5002 size, align / BITS_PER_UNIT);
5006 /* Utility function for targets to use in implementing
5007 ASM_OUTPUT_ALIGNED_BSS. */
5010 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5011 const char *name, unsigned HOST_WIDE_INT size,
5014 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5015 && size > (unsigned int)ix86_section_threshold)
5016 switch_to_section (get_named_section (decl, ".lbss", 0));
5018 switch_to_section (bss_section);
5019 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5020 #ifdef ASM_DECLARE_OBJECT_NAME
5021 last_assemble_variable_decl = decl;
5022 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5024 /* Standard thing is just output label for the object. */
5025 ASM_OUTPUT_LABEL (file, name);
5026 #endif /* ASM_DECLARE_OBJECT_NAME */
5027 ASM_OUTPUT_SKIP (file, size ? size : 1);
5030 static const struct default_options ix86_option_optimization_table[] =
5032 /* Turn off -fschedule-insns by default. It tends to make the
5033 problem with not enough registers even worse. */
5034 #ifdef INSN_SCHEDULING
5035 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
5038 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
5039 SUBTARGET_OPTIMIZATION_OPTIONS,
5041 { OPT_LEVELS_NONE, 0, NULL, 0 }
5044 /* Implement TARGET_OPTION_INIT_STRUCT. */
5047 ix86_option_init_struct (struct gcc_options *opts)
5050 /* The Darwin libraries never set errno, so we might as well
5051 avoid calling them when that's the only reason we would. */
5052 opts->x_flag_errno_math = 0;
5054 opts->x_flag_pcc_struct_return = 2;
5055 opts->x_flag_asynchronous_unwind_tables = 2;
5056 opts->x_flag_vect_cost_model = 1;
5059 /* Decide whether we must probe the stack before any space allocation
5060 on this target. It's essentially TARGET_STACK_PROBE except when
5061 -fstack-check causes the stack to be already probed differently. */
5064 ix86_target_stack_probe (void)
5066 /* Do not probe the stack twice if static stack checking is enabled. */
5067 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5070 return TARGET_STACK_PROBE;
5073 /* Decide whether we can make a sibling call to a function. DECL is the
5074 declaration of the function being targeted by the call and EXP is the
5075 CALL_EXPR representing the call. */
5078 ix86_function_ok_for_sibcall (tree decl, tree exp)
5080 tree type, decl_or_type;
5083 /* If we are generating position-independent code, we cannot sibcall
5084 optimize any indirect call, or a direct call to a global function,
5085 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5089 && (!decl || !targetm.binds_local_p (decl)))
5092 /* If we need to align the outgoing stack, then sibcalling would
5093 unalign the stack, which may break the called function. */
5094 if (ix86_minimum_incoming_stack_boundary (true)
5095 < PREFERRED_STACK_BOUNDARY)
5100 decl_or_type = decl;
5101 type = TREE_TYPE (decl);
5105 /* We're looking at the CALL_EXPR, we need the type of the function. */
5106 type = CALL_EXPR_FN (exp); /* pointer expression */
5107 type = TREE_TYPE (type); /* pointer type */
5108 type = TREE_TYPE (type); /* function type */
5109 decl_or_type = type;
5112 /* Check that the return value locations are the same. Like
5113 if we are returning floats on the 80387 register stack, we cannot
5114 make a sibcall from a function that doesn't return a float to a
5115 function that does or, conversely, from a function that does return
5116 a float to a function that doesn't; the necessary stack adjustment
5117 would not be executed. This is also the place we notice
5118 differences in the return value ABI. Note that it is ok for one
5119 of the functions to have void return type as long as the return
5120 value of the other is passed in a register. */
5121 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5122 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5124 if (STACK_REG_P (a) || STACK_REG_P (b))
5126 if (!rtx_equal_p (a, b))
5129 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5131 /* Disable sibcall if we need to generate vzeroupper after
5133 if (TARGET_VZEROUPPER
5134 && cfun->machine->callee_return_avx256_p
5135 && !cfun->machine->caller_return_avx256_p)
5138 else if (!rtx_equal_p (a, b))
5143 /* The SYSV ABI has more call-clobbered registers;
5144 disallow sibcalls from MS to SYSV. */
5145 if (cfun->machine->call_abi == MS_ABI
5146 && ix86_function_type_abi (type) == SYSV_ABI)
5151 /* If this call is indirect, we'll need to be able to use a
5152 call-clobbered register for the address of the target function.
5153 Make sure that all such registers are not used for passing
5154 parameters. Note that DLLIMPORT functions are indirect. */
5156 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5158 if (ix86_function_regparm (type, NULL) >= 3)
5160 /* ??? Need to count the actual number of registers to be used,
5161 not the possible number of registers. Fix later. */
5167 /* Otherwise okay. That also includes certain types of indirect calls. */
5171 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5172 and "sseregparm" calling convention attributes;
5173 arguments as in struct attribute_spec.handler. */
5176 ix86_handle_cconv_attribute (tree *node, tree name,
5178 int flags ATTRIBUTE_UNUSED,
5181 if (TREE_CODE (*node) != FUNCTION_TYPE
5182 && TREE_CODE (*node) != METHOD_TYPE
5183 && TREE_CODE (*node) != FIELD_DECL
5184 && TREE_CODE (*node) != TYPE_DECL)
5186 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5188 *no_add_attrs = true;
5192 /* Can combine regparm with all attributes but fastcall. */
5193 if (is_attribute_p ("regparm", name))
5197 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5199 error ("fastcall and regparm attributes are not compatible");
5202 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5204 error ("regparam and thiscall attributes are not compatible");
5207 cst = TREE_VALUE (args);
5208 if (TREE_CODE (cst) != INTEGER_CST)
5210 warning (OPT_Wattributes,
5211 "%qE attribute requires an integer constant argument",
5213 *no_add_attrs = true;
5215 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5217 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5219 *no_add_attrs = true;
5227 /* Do not warn when emulating the MS ABI. */
5228 if ((TREE_CODE (*node) != FUNCTION_TYPE
5229 && TREE_CODE (*node) != METHOD_TYPE)
5230 || ix86_function_type_abi (*node) != MS_ABI)
5231 warning (OPT_Wattributes, "%qE attribute ignored",
5233 *no_add_attrs = true;
5237 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5238 if (is_attribute_p ("fastcall", name))
5240 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5242 error ("fastcall and cdecl attributes are not compatible");
5244 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5246 error ("fastcall and stdcall attributes are not compatible");
5248 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5250 error ("fastcall and regparm attributes are not compatible");
5252 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5254 error ("fastcall and thiscall attributes are not compatible");
5258 /* Can combine stdcall with fastcall (redundant), regparm and
5260 else if (is_attribute_p ("stdcall", name))
5262 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5264 error ("stdcall and cdecl attributes are not compatible");
5266 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5268 error ("stdcall and fastcall attributes are not compatible");
5270 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5272 error ("stdcall and thiscall attributes are not compatible");
5276 /* Can combine cdecl with regparm and sseregparm. */
5277 else if (is_attribute_p ("cdecl", name))
5279 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5281 error ("stdcall and cdecl attributes are not compatible");
5283 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5285 error ("fastcall and cdecl attributes are not compatible");
5287 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5289 error ("cdecl and thiscall attributes are not compatible");
5292 else if (is_attribute_p ("thiscall", name))
5294 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5295 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5297 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5299 error ("stdcall and thiscall attributes are not compatible");
5301 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5303 error ("fastcall and thiscall attributes are not compatible");
5305 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5307 error ("cdecl and thiscall attributes are not compatible");
5311 /* Can combine sseregparm with all attributes. */
5316 /* Return 0 if the attributes for two types are incompatible, 1 if they
5317 are compatible, and 2 if they are nearly compatible (which causes a
5318 warning to be generated). */
5321 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5323 /* Check for mismatch of non-default calling convention. */
5324 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5326 if (TREE_CODE (type1) != FUNCTION_TYPE
5327 && TREE_CODE (type1) != METHOD_TYPE)
5330 /* Check for mismatched fastcall/regparm types. */
5331 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5332 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5333 || (ix86_function_regparm (type1, NULL)
5334 != ix86_function_regparm (type2, NULL)))
5337 /* Check for mismatched sseregparm types. */
5338 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5339 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5342 /* Check for mismatched thiscall types. */
5343 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5344 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5347 /* Check for mismatched return types (cdecl vs stdcall). */
5348 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5349 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5355 /* Return the regparm value for a function with the indicated TYPE and DECL.
5356 DECL may be NULL when calling function indirectly
5357 or considering a libcall. */
5360 ix86_function_regparm (const_tree type, const_tree decl)
5366 return (ix86_function_type_abi (type) == SYSV_ABI
5367 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5369 regparm = ix86_regparm;
5370 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5373 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5377 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5380 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5383 /* Use register calling convention for local functions when possible. */
5385 && TREE_CODE (decl) == FUNCTION_DECL
5387 && !(profile_flag && !flag_fentry))
5389 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5390 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5393 int local_regparm, globals = 0, regno;
5395 /* Make sure no regparm register is taken by a
5396 fixed register variable. */
5397 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5398 if (fixed_regs[local_regparm])
5401 /* We don't want to use regparm(3) for nested functions as
5402 these use a static chain pointer in the third argument. */
5403 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5406 /* In 32-bit mode save a register for the split stack. */
5407 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5410 /* Each fixed register usage increases register pressure,
5411 so less registers should be used for argument passing.
5412 This functionality can be overriden by an explicit
5414 for (regno = 0; regno <= DI_REG; regno++)
5415 if (fixed_regs[regno])
5419 = globals < local_regparm ? local_regparm - globals : 0;
5421 if (local_regparm > regparm)
5422 regparm = local_regparm;
5429 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5430 DFmode (2) arguments in SSE registers for a function with the
5431 indicated TYPE and DECL. DECL may be NULL when calling function
5432 indirectly or considering a libcall. Otherwise return 0. */
5435 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5437 gcc_assert (!TARGET_64BIT);
5439 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5440 by the sseregparm attribute. */
5441 if (TARGET_SSEREGPARM
5442 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5449 error ("calling %qD with attribute sseregparm without "
5450 "SSE/SSE2 enabled", decl);
5452 error ("calling %qT with attribute sseregparm without "
5453 "SSE/SSE2 enabled", type);
5461 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5462 (and DFmode for SSE2) arguments in SSE registers. */
5463 if (decl && TARGET_SSE_MATH && optimize
5464 && !(profile_flag && !flag_fentry))
5466 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5467 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5469 return TARGET_SSE2 ? 2 : 1;
5475 /* Return true if EAX is live at the start of the function. Used by
5476 ix86_expand_prologue to determine if we need special help before
5477 calling allocate_stack_worker. */
5480 ix86_eax_live_at_start_p (void)
5482 /* Cheat. Don't bother working forward from ix86_function_regparm
5483 to the function type to whether an actual argument is located in
5484 eax. Instead just look at cfg info, which is still close enough
5485 to correct at this point. This gives false positives for broken
5486 functions that might use uninitialized data that happens to be
5487 allocated in eax, but who cares? */
5488 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5492 ix86_keep_aggregate_return_pointer (tree fntype)
5496 attr = lookup_attribute ("callee_pop_aggregate_return",
5497 TYPE_ATTRIBUTES (fntype));
5499 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5501 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5504 /* Value is the number of bytes of arguments automatically
5505 popped when returning from a subroutine call.
5506 FUNDECL is the declaration node of the function (as a tree),
5507 FUNTYPE is the data type of the function (as a tree),
5508 or for a library call it is an identifier node for the subroutine name.
5509 SIZE is the number of bytes of arguments passed on the stack.
5511 On the 80386, the RTD insn may be used to pop them if the number
5512 of args is fixed, but if the number is variable then the caller
5513 must pop them all. RTD can't be used for library calls now
5514 because the library is compiled with the Unix compiler.
5515 Use of RTD is a selectable option, since it is incompatible with
5516 standard Unix calling sequences. If the option is not selected,
5517 the caller must always pop the args.
5519 The attribute stdcall is equivalent to RTD on a per module basis. */
5522 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5526 /* None of the 64-bit ABIs pop arguments. */
5530 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5532 /* Cdecl functions override -mrtd, and never pop the stack. */
5533 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5535 /* Stdcall and fastcall functions will pop the stack if not
5537 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5538 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5539 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5542 if (rtd && ! stdarg_p (funtype))
5546 /* Lose any fake structure return argument if it is passed on the stack. */
5547 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5548 && !ix86_keep_aggregate_return_pointer (funtype))
5550 int nregs = ix86_function_regparm (funtype, fundecl);
5552 return GET_MODE_SIZE (Pmode);
5558 /* Argument support functions. */
5560 /* Return true when register may be used to pass function parameters. */
5562 ix86_function_arg_regno_p (int regno)
5565 const int *parm_regs;
5570 return (regno < REGPARM_MAX
5571 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5573 return (regno < REGPARM_MAX
5574 || (TARGET_MMX && MMX_REGNO_P (regno)
5575 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5576 || (TARGET_SSE && SSE_REGNO_P (regno)
5577 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5582 if (SSE_REGNO_P (regno) && TARGET_SSE)
5587 if (TARGET_SSE && SSE_REGNO_P (regno)
5588 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5592 /* TODO: The function should depend on current function ABI but
5593 builtins.c would need updating then. Therefore we use the
5596 /* RAX is used as hidden argument to va_arg functions. */
5597 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5600 if (ix86_abi == MS_ABI)
5601 parm_regs = x86_64_ms_abi_int_parameter_registers;
5603 parm_regs = x86_64_int_parameter_registers;
5604 for (i = 0; i < (ix86_abi == MS_ABI
5605 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5606 if (regno == parm_regs[i])
5611 /* Return if we do not know how to pass TYPE solely in registers. */
5614 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5616 if (must_pass_in_stack_var_size_or_pad (mode, type))
5619 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5620 The layout_type routine is crafty and tries to trick us into passing
5621 currently unsupported vector types on the stack by using TImode. */
5622 return (!TARGET_64BIT && mode == TImode
5623 && type && TREE_CODE (type) != VECTOR_TYPE);
5626 /* It returns the size, in bytes, of the area reserved for arguments passed
5627 in registers for the function represented by fndecl dependent to the used
5630 ix86_reg_parm_stack_space (const_tree fndecl)
5632 enum calling_abi call_abi = SYSV_ABI;
5633 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5634 call_abi = ix86_function_abi (fndecl);
5636 call_abi = ix86_function_type_abi (fndecl);
5637 if (call_abi == MS_ABI)
5642 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5645 ix86_function_type_abi (const_tree fntype)
5647 if (TARGET_64BIT && fntype != NULL)
5649 enum calling_abi abi = ix86_abi;
5650 if (abi == SYSV_ABI)
5652 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5655 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5663 ix86_function_ms_hook_prologue (const_tree fn)
5665 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5667 if (decl_function_context (fn) != NULL_TREE)
5668 error_at (DECL_SOURCE_LOCATION (fn),
5669 "ms_hook_prologue is not compatible with nested function");
5676 static enum calling_abi
5677 ix86_function_abi (const_tree fndecl)
5681 return ix86_function_type_abi (TREE_TYPE (fndecl));
5684 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5687 ix86_cfun_abi (void)
5689 if (! cfun || ! TARGET_64BIT)
5691 return cfun->machine->call_abi;
5694 /* Write the extra assembler code needed to declare a function properly. */
5697 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5700 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5704 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5705 unsigned int filler_cc = 0xcccccccc;
5707 for (i = 0; i < filler_count; i += 4)
5708 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5711 #ifdef SUBTARGET_ASM_UNWIND_INIT
5712 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5715 ASM_OUTPUT_LABEL (asm_out_file, fname);
5717 /* Output magic byte marker, if hot-patch attribute is set. */
5722 /* leaq [%rsp + 0], %rsp */
5723 asm_fprintf (asm_out_file, ASM_BYTE
5724 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5728 /* movl.s %edi, %edi
5730 movl.s %esp, %ebp */
5731 asm_fprintf (asm_out_file, ASM_BYTE
5732 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5738 extern void init_regs (void);
5740 /* Implementation of call abi switching target hook. Specific to FNDECL
5741 the specific call register sets are set. See also
5742 ix86_conditional_register_usage for more details. */
5744 ix86_call_abi_override (const_tree fndecl)
5746 if (fndecl == NULL_TREE)
5747 cfun->machine->call_abi = ix86_abi;
5749 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5752 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5753 re-initialization of init_regs each time we switch function context since
5754 this is needed only during RTL expansion. */
5756 ix86_maybe_switch_abi (void)
5759 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5763 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5764 for a call to a function whose data type is FNTYPE.
5765 For a library call, FNTYPE is 0. */
5768 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5769 tree fntype, /* tree ptr for function decl */
5770 rtx libname, /* SYMBOL_REF of library name or 0 */
5774 struct cgraph_local_info *i;
5777 memset (cum, 0, sizeof (*cum));
5779 /* Initialize for the current callee. */
5782 cfun->machine->callee_pass_avx256_p = false;
5783 cfun->machine->callee_return_avx256_p = false;
5788 i = cgraph_local_info (fndecl);
5789 cum->call_abi = ix86_function_abi (fndecl);
5790 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5795 cum->call_abi = ix86_function_type_abi (fntype);
5797 fnret_type = TREE_TYPE (fntype);
5802 if (TARGET_VZEROUPPER && fnret_type)
5804 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5806 if (function_pass_avx256_p (fnret_value))
5808 /* The return value of this function uses 256bit AVX modes. */
5810 cfun->machine->callee_return_avx256_p = true;
5812 cfun->machine->caller_return_avx256_p = true;
5816 cum->caller = caller;
5818 /* Set up the number of registers to use for passing arguments. */
5820 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5821 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5822 "or subtarget optimization implying it");
5823 cum->nregs = ix86_regparm;
5826 cum->nregs = (cum->call_abi == SYSV_ABI
5827 ? X86_64_REGPARM_MAX
5828 : X86_64_MS_REGPARM_MAX);
5832 cum->sse_nregs = SSE_REGPARM_MAX;
5835 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5836 ? X86_64_SSE_REGPARM_MAX
5837 : X86_64_MS_SSE_REGPARM_MAX);
5841 cum->mmx_nregs = MMX_REGPARM_MAX;
5842 cum->warn_avx = true;
5843 cum->warn_sse = true;
5844 cum->warn_mmx = true;
5846 /* Because type might mismatch in between caller and callee, we need to
5847 use actual type of function for local calls.
5848 FIXME: cgraph_analyze can be told to actually record if function uses
5849 va_start so for local functions maybe_vaarg can be made aggressive
5851 FIXME: once typesytem is fixed, we won't need this code anymore. */
5853 fntype = TREE_TYPE (fndecl);
5854 cum->maybe_vaarg = (fntype
5855 ? (!prototype_p (fntype) || stdarg_p (fntype))
5860 /* If there are variable arguments, then we won't pass anything
5861 in registers in 32-bit mode. */
5862 if (stdarg_p (fntype))
5873 /* Use ecx and edx registers if function has fastcall attribute,
5874 else look for regparm information. */
5877 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5880 cum->fastcall = 1; /* Same first register as in fastcall. */
5882 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5888 cum->nregs = ix86_function_regparm (fntype, fndecl);
5891 /* Set up the number of SSE registers used for passing SFmode
5892 and DFmode arguments. Warn for mismatching ABI. */
5893 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5897 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5898 But in the case of vector types, it is some vector mode.
5900 When we have only some of our vector isa extensions enabled, then there
5901 are some modes for which vector_mode_supported_p is false. For these
5902 modes, the generic vector support in gcc will choose some non-vector mode
5903 in order to implement the type. By computing the natural mode, we'll
5904 select the proper ABI location for the operand and not depend on whatever
5905 the middle-end decides to do with these vector types.
5907 The midde-end can't deal with the vector types > 16 bytes. In this
5908 case, we return the original mode and warn ABI change if CUM isn't
5911 static enum machine_mode
5912 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5914 enum machine_mode mode = TYPE_MODE (type);
5916 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5918 HOST_WIDE_INT size = int_size_in_bytes (type);
5919 if ((size == 8 || size == 16 || size == 32)
5920 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5921 && TYPE_VECTOR_SUBPARTS (type) > 1)
5923 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5925 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5926 mode = MIN_MODE_VECTOR_FLOAT;
5928 mode = MIN_MODE_VECTOR_INT;
5930 /* Get the mode which has this inner mode and number of units. */
5931 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5932 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5933 && GET_MODE_INNER (mode) == innermode)
5935 if (size == 32 && !TARGET_AVX)
5937 static bool warnedavx;
5944 warning (0, "AVX vector argument without AVX "
5945 "enabled changes the ABI");
5947 return TYPE_MODE (type);
5960 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5961 this may not agree with the mode that the type system has chosen for the
5962 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5963 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5966 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5971 if (orig_mode != BLKmode)
5972 tmp = gen_rtx_REG (orig_mode, regno);
5975 tmp = gen_rtx_REG (mode, regno);
5976 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5977 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5983 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5984 of this code is to classify each 8bytes of incoming argument by the register
5985 class and assign registers accordingly. */
5987 /* Return the union class of CLASS1 and CLASS2.
5988 See the x86-64 PS ABI for details. */
5990 static enum x86_64_reg_class
5991 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5993 /* Rule #1: If both classes are equal, this is the resulting class. */
5994 if (class1 == class2)
5997 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5999 if (class1 == X86_64_NO_CLASS)
6001 if (class2 == X86_64_NO_CLASS)
6004 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6005 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6006 return X86_64_MEMORY_CLASS;
6008 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6009 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6010 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6011 return X86_64_INTEGERSI_CLASS;
6012 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6013 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6014 return X86_64_INTEGER_CLASS;
6016 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6018 if (class1 == X86_64_X87_CLASS
6019 || class1 == X86_64_X87UP_CLASS
6020 || class1 == X86_64_COMPLEX_X87_CLASS
6021 || class2 == X86_64_X87_CLASS
6022 || class2 == X86_64_X87UP_CLASS
6023 || class2 == X86_64_COMPLEX_X87_CLASS)
6024 return X86_64_MEMORY_CLASS;
6026 /* Rule #6: Otherwise class SSE is used. */
6027 return X86_64_SSE_CLASS;
6030 /* Classify the argument of type TYPE and mode MODE.
6031 CLASSES will be filled by the register class used to pass each word
6032 of the operand. The number of words is returned. In case the parameter
6033 should be passed in memory, 0 is returned. As a special case for zero
6034 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6036 BIT_OFFSET is used internally for handling records and specifies offset
6037 of the offset in bits modulo 256 to avoid overflow cases.
6039 See the x86-64 PS ABI for details.
6043 classify_argument (enum machine_mode mode, const_tree type,
6044 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6046 HOST_WIDE_INT bytes =
6047 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6048 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6050 /* Variable sized entities are always passed/returned in memory. */
6054 if (mode != VOIDmode
6055 && targetm.calls.must_pass_in_stack (mode, type))
6058 if (type && AGGREGATE_TYPE_P (type))
6062 enum x86_64_reg_class subclasses[MAX_CLASSES];
6064 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6068 for (i = 0; i < words; i++)
6069 classes[i] = X86_64_NO_CLASS;
6071 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6072 signalize memory class, so handle it as special case. */
6075 classes[0] = X86_64_NO_CLASS;
6079 /* Classify each field of record and merge classes. */
6080 switch (TREE_CODE (type))
6083 /* And now merge the fields of structure. */
6084 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6086 if (TREE_CODE (field) == FIELD_DECL)
6090 if (TREE_TYPE (field) == error_mark_node)
6093 /* Bitfields are always classified as integer. Handle them
6094 early, since later code would consider them to be
6095 misaligned integers. */
6096 if (DECL_BIT_FIELD (field))
6098 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6099 i < ((int_bit_position (field) + (bit_offset % 64))
6100 + tree_low_cst (DECL_SIZE (field), 0)
6103 merge_classes (X86_64_INTEGER_CLASS,
6110 type = TREE_TYPE (field);
6112 /* Flexible array member is ignored. */
6113 if (TYPE_MODE (type) == BLKmode
6114 && TREE_CODE (type) == ARRAY_TYPE
6115 && TYPE_SIZE (type) == NULL_TREE
6116 && TYPE_DOMAIN (type) != NULL_TREE
6117 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6122 if (!warned && warn_psabi)
6125 inform (input_location,
6126 "the ABI of passing struct with"
6127 " a flexible array member has"
6128 " changed in GCC 4.4");
6132 num = classify_argument (TYPE_MODE (type), type,
6134 (int_bit_position (field)
6135 + bit_offset) % 256);
6138 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6139 for (i = 0; i < num && (i + pos) < words; i++)
6141 merge_classes (subclasses[i], classes[i + pos]);
6148 /* Arrays are handled as small records. */
6151 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6152 TREE_TYPE (type), subclasses, bit_offset);
6156 /* The partial classes are now full classes. */
6157 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6158 subclasses[0] = X86_64_SSE_CLASS;
6159 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6160 && !((bit_offset % 64) == 0 && bytes == 4))
6161 subclasses[0] = X86_64_INTEGER_CLASS;
6163 for (i = 0; i < words; i++)
6164 classes[i] = subclasses[i % num];
6169 case QUAL_UNION_TYPE:
6170 /* Unions are similar to RECORD_TYPE but offset is always 0.
6172 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6174 if (TREE_CODE (field) == FIELD_DECL)
6178 if (TREE_TYPE (field) == error_mark_node)
6181 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6182 TREE_TYPE (field), subclasses,
6186 for (i = 0; i < num; i++)
6187 classes[i] = merge_classes (subclasses[i], classes[i]);
6198 /* When size > 16 bytes, if the first one isn't
6199 X86_64_SSE_CLASS or any other ones aren't
6200 X86_64_SSEUP_CLASS, everything should be passed in
6202 if (classes[0] != X86_64_SSE_CLASS)
6205 for (i = 1; i < words; i++)
6206 if (classes[i] != X86_64_SSEUP_CLASS)
6210 /* Final merger cleanup. */
6211 for (i = 0; i < words; i++)
6213 /* If one class is MEMORY, everything should be passed in
6215 if (classes[i] == X86_64_MEMORY_CLASS)
6218 /* The X86_64_SSEUP_CLASS should be always preceded by
6219 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6220 if (classes[i] == X86_64_SSEUP_CLASS
6221 && classes[i - 1] != X86_64_SSE_CLASS
6222 && classes[i - 1] != X86_64_SSEUP_CLASS)
6224 /* The first one should never be X86_64_SSEUP_CLASS. */
6225 gcc_assert (i != 0);
6226 classes[i] = X86_64_SSE_CLASS;
6229 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6230 everything should be passed in memory. */
6231 if (classes[i] == X86_64_X87UP_CLASS
6232 && (classes[i - 1] != X86_64_X87_CLASS))
6236 /* The first one should never be X86_64_X87UP_CLASS. */
6237 gcc_assert (i != 0);
6238 if (!warned && warn_psabi)
6241 inform (input_location,
6242 "the ABI of passing union with long double"
6243 " has changed in GCC 4.4");
6251 /* Compute alignment needed. We align all types to natural boundaries with
6252 exception of XFmode that is aligned to 64bits. */
6253 if (mode != VOIDmode && mode != BLKmode)
6255 int mode_alignment = GET_MODE_BITSIZE (mode);
6258 mode_alignment = 128;
6259 else if (mode == XCmode)
6260 mode_alignment = 256;
6261 if (COMPLEX_MODE_P (mode))
6262 mode_alignment /= 2;
6263 /* Misaligned fields are always returned in memory. */
6264 if (bit_offset % mode_alignment)
6268 /* for V1xx modes, just use the base mode */
6269 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6270 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6271 mode = GET_MODE_INNER (mode);
6273 /* Classification of atomic types. */
6278 classes[0] = X86_64_SSE_CLASS;
6281 classes[0] = X86_64_SSE_CLASS;
6282 classes[1] = X86_64_SSEUP_CLASS;
6292 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6296 classes[0] = X86_64_INTEGERSI_CLASS;
6299 else if (size <= 64)
6301 classes[0] = X86_64_INTEGER_CLASS;
6304 else if (size <= 64+32)
6306 classes[0] = X86_64_INTEGER_CLASS;
6307 classes[1] = X86_64_INTEGERSI_CLASS;
6310 else if (size <= 64+64)
6312 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6320 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6324 /* OImode shouldn't be used directly. */
6329 if (!(bit_offset % 64))
6330 classes[0] = X86_64_SSESF_CLASS;
6332 classes[0] = X86_64_SSE_CLASS;
6335 classes[0] = X86_64_SSEDF_CLASS;
6338 classes[0] = X86_64_X87_CLASS;
6339 classes[1] = X86_64_X87UP_CLASS;
6342 classes[0] = X86_64_SSE_CLASS;
6343 classes[1] = X86_64_SSEUP_CLASS;
6346 classes[0] = X86_64_SSE_CLASS;
6347 if (!(bit_offset % 64))
6353 if (!warned && warn_psabi)
6356 inform (input_location,
6357 "the ABI of passing structure with complex float"
6358 " member has changed in GCC 4.4");
6360 classes[1] = X86_64_SSESF_CLASS;
6364 classes[0] = X86_64_SSEDF_CLASS;
6365 classes[1] = X86_64_SSEDF_CLASS;
6368 classes[0] = X86_64_COMPLEX_X87_CLASS;
6371 /* This modes is larger than 16 bytes. */
6379 classes[0] = X86_64_SSE_CLASS;
6380 classes[1] = X86_64_SSEUP_CLASS;
6381 classes[2] = X86_64_SSEUP_CLASS;
6382 classes[3] = X86_64_SSEUP_CLASS;
6390 classes[0] = X86_64_SSE_CLASS;
6391 classes[1] = X86_64_SSEUP_CLASS;
6399 classes[0] = X86_64_SSE_CLASS;
6405 gcc_assert (VECTOR_MODE_P (mode));
6410 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6412 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6413 classes[0] = X86_64_INTEGERSI_CLASS;
6415 classes[0] = X86_64_INTEGER_CLASS;
6416 classes[1] = X86_64_INTEGER_CLASS;
6417 return 1 + (bytes > 8);
6421 /* Examine the argument and return set number of register required in each
6422 class. Return 0 iff parameter should be passed in memory. */
6424 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6425 int *int_nregs, int *sse_nregs)
6427 enum x86_64_reg_class regclass[MAX_CLASSES];
6428 int n = classify_argument (mode, type, regclass, 0);
6434 for (n--; n >= 0; n--)
6435 switch (regclass[n])
6437 case X86_64_INTEGER_CLASS:
6438 case X86_64_INTEGERSI_CLASS:
6441 case X86_64_SSE_CLASS:
6442 case X86_64_SSESF_CLASS:
6443 case X86_64_SSEDF_CLASS:
6446 case X86_64_NO_CLASS:
6447 case X86_64_SSEUP_CLASS:
6449 case X86_64_X87_CLASS:
6450 case X86_64_X87UP_CLASS:
6454 case X86_64_COMPLEX_X87_CLASS:
6455 return in_return ? 2 : 0;
6456 case X86_64_MEMORY_CLASS:
6462 /* Construct container for the argument used by GCC interface. See
6463 FUNCTION_ARG for the detailed description. */
6466 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6467 const_tree type, int in_return, int nintregs, int nsseregs,
6468 const int *intreg, int sse_regno)
6470 /* The following variables hold the static issued_error state. */
6471 static bool issued_sse_arg_error;
6472 static bool issued_sse_ret_error;
6473 static bool issued_x87_ret_error;
6475 enum machine_mode tmpmode;
6477 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6478 enum x86_64_reg_class regclass[MAX_CLASSES];
6482 int needed_sseregs, needed_intregs;
6483 rtx exp[MAX_CLASSES];
6486 n = classify_argument (mode, type, regclass, 0);
6489 if (!examine_argument (mode, type, in_return, &needed_intregs,
6492 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6495 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6496 some less clueful developer tries to use floating-point anyway. */
6497 if (needed_sseregs && !TARGET_SSE)
6501 if (!issued_sse_ret_error)
6503 error ("SSE register return with SSE disabled");
6504 issued_sse_ret_error = true;
6507 else if (!issued_sse_arg_error)
6509 error ("SSE register argument with SSE disabled");
6510 issued_sse_arg_error = true;
6515 /* Likewise, error if the ABI requires us to return values in the
6516 x87 registers and the user specified -mno-80387. */
6517 if (!TARGET_80387 && in_return)
6518 for (i = 0; i < n; i++)
6519 if (regclass[i] == X86_64_X87_CLASS
6520 || regclass[i] == X86_64_X87UP_CLASS
6521 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6523 if (!issued_x87_ret_error)
6525 error ("x87 register return with x87 disabled");
6526 issued_x87_ret_error = true;
6531 /* First construct simple cases. Avoid SCmode, since we want to use
6532 single register to pass this type. */
6533 if (n == 1 && mode != SCmode)
6534 switch (regclass[0])
6536 case X86_64_INTEGER_CLASS:
6537 case X86_64_INTEGERSI_CLASS:
6538 return gen_rtx_REG (mode, intreg[0]);
6539 case X86_64_SSE_CLASS:
6540 case X86_64_SSESF_CLASS:
6541 case X86_64_SSEDF_CLASS:
6542 if (mode != BLKmode)
6543 return gen_reg_or_parallel (mode, orig_mode,
6544 SSE_REGNO (sse_regno));
6546 case X86_64_X87_CLASS:
6547 case X86_64_COMPLEX_X87_CLASS:
6548 return gen_rtx_REG (mode, FIRST_STACK_REG);
6549 case X86_64_NO_CLASS:
6550 /* Zero sized array, struct or class. */
6555 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6556 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6557 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6559 && regclass[0] == X86_64_SSE_CLASS
6560 && regclass[1] == X86_64_SSEUP_CLASS
6561 && regclass[2] == X86_64_SSEUP_CLASS
6562 && regclass[3] == X86_64_SSEUP_CLASS
6564 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6567 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6568 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6569 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6570 && regclass[1] == X86_64_INTEGER_CLASS
6571 && (mode == CDImode || mode == TImode || mode == TFmode)
6572 && intreg[0] + 1 == intreg[1])
6573 return gen_rtx_REG (mode, intreg[0]);
6575 /* Otherwise figure out the entries of the PARALLEL. */
6576 for (i = 0; i < n; i++)
6580 switch (regclass[i])
6582 case X86_64_NO_CLASS:
6584 case X86_64_INTEGER_CLASS:
6585 case X86_64_INTEGERSI_CLASS:
6586 /* Merge TImodes on aligned occasions here too. */
6587 if (i * 8 + 8 > bytes)
6588 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6589 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6593 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6594 if (tmpmode == BLKmode)
6596 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6597 gen_rtx_REG (tmpmode, *intreg),
6601 case X86_64_SSESF_CLASS:
6602 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6603 gen_rtx_REG (SFmode,
6604 SSE_REGNO (sse_regno)),
6608 case X86_64_SSEDF_CLASS:
6609 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6610 gen_rtx_REG (DFmode,
6611 SSE_REGNO (sse_regno)),
6615 case X86_64_SSE_CLASS:
6623 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6633 && regclass[1] == X86_64_SSEUP_CLASS
6634 && regclass[2] == X86_64_SSEUP_CLASS
6635 && regclass[3] == X86_64_SSEUP_CLASS);
6642 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6643 gen_rtx_REG (tmpmode,
6644 SSE_REGNO (sse_regno)),
6653 /* Empty aligned struct, union or class. */
6657 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6658 for (i = 0; i < nexps; i++)
6659 XVECEXP (ret, 0, i) = exp [i];
6663 /* Update the data in CUM to advance over an argument of mode MODE
6664 and data type TYPE. (TYPE is null for libcalls where that information
6665 may not be available.) */
6668 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6669 const_tree type, HOST_WIDE_INT bytes,
6670 HOST_WIDE_INT words)
6686 cum->words += words;
6687 cum->nregs -= words;
6688 cum->regno += words;
6690 if (cum->nregs <= 0)
6698 /* OImode shouldn't be used directly. */
6702 if (cum->float_in_sse < 2)
6705 if (cum->float_in_sse < 1)
6722 if (!type || !AGGREGATE_TYPE_P (type))
6724 cum->sse_words += words;
6725 cum->sse_nregs -= 1;
6726 cum->sse_regno += 1;
6727 if (cum->sse_nregs <= 0)
6741 if (!type || !AGGREGATE_TYPE_P (type))
6743 cum->mmx_words += words;
6744 cum->mmx_nregs -= 1;
6745 cum->mmx_regno += 1;
6746 if (cum->mmx_nregs <= 0)
6757 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6758 const_tree type, HOST_WIDE_INT words, bool named)
6760 int int_nregs, sse_nregs;
6762 /* Unnamed 256bit vector mode parameters are passed on stack. */
6763 if (!named && VALID_AVX256_REG_MODE (mode))
6766 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6767 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6769 cum->nregs -= int_nregs;
6770 cum->sse_nregs -= sse_nregs;
6771 cum->regno += int_nregs;
6772 cum->sse_regno += sse_nregs;
6776 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6777 cum->words = (cum->words + align - 1) & ~(align - 1);
6778 cum->words += words;
6783 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6784 HOST_WIDE_INT words)
6786 /* Otherwise, this should be passed indirect. */
6787 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6789 cum->words += words;
6797 /* Update the data in CUM to advance over an argument of mode MODE and
6798 data type TYPE. (TYPE is null for libcalls where that information
6799 may not be available.) */
6802 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6803 const_tree type, bool named)
6805 HOST_WIDE_INT bytes, words;
6807 if (mode == BLKmode)
6808 bytes = int_size_in_bytes (type);
6810 bytes = GET_MODE_SIZE (mode);
6811 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6814 mode = type_natural_mode (type, NULL);
6816 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6817 function_arg_advance_ms_64 (cum, bytes, words);
6818 else if (TARGET_64BIT)
6819 function_arg_advance_64 (cum, mode, type, words, named);
6821 function_arg_advance_32 (cum, mode, type, bytes, words);
6824 /* Define where to put the arguments to a function.
6825 Value is zero to push the argument on the stack,
6826 or a hard register in which to store the argument.
6828 MODE is the argument's machine mode.
6829 TYPE is the data type of the argument (as a tree).
6830 This is null for libcalls where that information may
6832 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6833 the preceding args and about the function being called.
6834 NAMED is nonzero if this argument is a named parameter
6835 (otherwise it is an extra parameter matching an ellipsis). */
6838 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6839 enum machine_mode orig_mode, const_tree type,
6840 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6842 static bool warnedsse, warnedmmx;
6844 /* Avoid the AL settings for the Unix64 ABI. */
6845 if (mode == VOIDmode)
6861 if (words <= cum->nregs)
6863 int regno = cum->regno;
6865 /* Fastcall allocates the first two DWORD (SImode) or
6866 smaller arguments to ECX and EDX if it isn't an
6872 || (type && AGGREGATE_TYPE_P (type)))
6875 /* ECX not EAX is the first allocated register. */
6876 if (regno == AX_REG)
6879 return gen_rtx_REG (mode, regno);
6884 if (cum->float_in_sse < 2)
6887 if (cum->float_in_sse < 1)
6891 /* In 32bit, we pass TImode in xmm registers. */
6898 if (!type || !AGGREGATE_TYPE_P (type))
6900 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6903 warning (0, "SSE vector argument without SSE enabled "
6907 return gen_reg_or_parallel (mode, orig_mode,
6908 cum->sse_regno + FIRST_SSE_REG);
6913 /* OImode shouldn't be used directly. */
6922 if (!type || !AGGREGATE_TYPE_P (type))
6925 return gen_reg_or_parallel (mode, orig_mode,
6926 cum->sse_regno + FIRST_SSE_REG);
6936 if (!type || !AGGREGATE_TYPE_P (type))
6938 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6941 warning (0, "MMX vector argument without MMX enabled "
6945 return gen_reg_or_parallel (mode, orig_mode,
6946 cum->mmx_regno + FIRST_MMX_REG);
6955 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6956 enum machine_mode orig_mode, const_tree type, bool named)
6958 /* Handle a hidden AL argument containing number of registers
6959 for varargs x86-64 functions. */
6960 if (mode == VOIDmode)
6961 return GEN_INT (cum->maybe_vaarg
6962 ? (cum->sse_nregs < 0
6963 ? X86_64_SSE_REGPARM_MAX
6978 /* Unnamed 256bit vector mode parameters are passed on stack. */
6984 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6986 &x86_64_int_parameter_registers [cum->regno],
6991 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6992 enum machine_mode orig_mode, bool named,
6993 HOST_WIDE_INT bytes)
6997 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6998 We use value of -2 to specify that current function call is MSABI. */
6999 if (mode == VOIDmode)
7000 return GEN_INT (-2);
7002 /* If we've run out of registers, it goes on the stack. */
7003 if (cum->nregs == 0)
7006 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7008 /* Only floating point modes are passed in anything but integer regs. */
7009 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7012 regno = cum->regno + FIRST_SSE_REG;
7017 /* Unnamed floating parameters are passed in both the
7018 SSE and integer registers. */
7019 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7020 t2 = gen_rtx_REG (mode, regno);
7021 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7022 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7023 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7026 /* Handle aggregated types passed in register. */
7027 if (orig_mode == BLKmode)
7029 if (bytes > 0 && bytes <= 8)
7030 mode = (bytes > 4 ? DImode : SImode);
7031 if (mode == BLKmode)
7035 return gen_reg_or_parallel (mode, orig_mode, regno);
7038 /* Return where to put the arguments to a function.
7039 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7041 MODE is the argument's machine mode. TYPE is the data type of the
7042 argument. It is null for libcalls where that information may not be
7043 available. CUM gives information about the preceding args and about
7044 the function being called. NAMED is nonzero if this argument is a
7045 named parameter (otherwise it is an extra parameter matching an
7049 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
7050 const_tree type, bool named)
7052 enum machine_mode mode = omode;
7053 HOST_WIDE_INT bytes, words;
7056 if (mode == BLKmode)
7057 bytes = int_size_in_bytes (type);
7059 bytes = GET_MODE_SIZE (mode);
7060 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7062 /* To simplify the code below, represent vector types with a vector mode
7063 even if MMX/SSE are not active. */
7064 if (type && TREE_CODE (type) == VECTOR_TYPE)
7065 mode = type_natural_mode (type, cum);
7067 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7068 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7069 else if (TARGET_64BIT)
7070 arg = function_arg_64 (cum, mode, omode, type, named);
7072 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7074 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
7076 /* This argument uses 256bit AVX modes. */
7078 cfun->machine->callee_pass_avx256_p = true;
7080 cfun->machine->caller_pass_avx256_p = true;
7086 /* A C expression that indicates when an argument must be passed by
7087 reference. If nonzero for an argument, a copy of that argument is
7088 made in memory and a pointer to the argument is passed instead of
7089 the argument itself. The pointer is passed in whatever way is
7090 appropriate for passing a pointer to that type. */
7093 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
7094 enum machine_mode mode ATTRIBUTE_UNUSED,
7095 const_tree type, bool named ATTRIBUTE_UNUSED)
7097 /* See Windows x64 Software Convention. */
7098 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7100 int msize = (int) GET_MODE_SIZE (mode);
7103 /* Arrays are passed by reference. */
7104 if (TREE_CODE (type) == ARRAY_TYPE)
7107 if (AGGREGATE_TYPE_P (type))
7109 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7110 are passed by reference. */
7111 msize = int_size_in_bytes (type);
7115 /* __m128 is passed by reference. */
7117 case 1: case 2: case 4: case 8:
7123 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7129 /* Return true when TYPE should be 128bit aligned for 32bit argument
7130 passing ABI. XXX: This function is obsolete and is only used for
7131 checking psABI compatibility with previous versions of GCC. */
7134 ix86_compat_aligned_value_p (const_tree type)
7136 enum machine_mode mode = TYPE_MODE (type);
7137 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7141 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7143 if (TYPE_ALIGN (type) < 128)
7146 if (AGGREGATE_TYPE_P (type))
7148 /* Walk the aggregates recursively. */
7149 switch (TREE_CODE (type))
7153 case QUAL_UNION_TYPE:
7157 /* Walk all the structure fields. */
7158 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7160 if (TREE_CODE (field) == FIELD_DECL
7161 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7168 /* Just for use if some languages passes arrays by value. */
7169 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7180 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7181 XXX: This function is obsolete and is only used for checking psABI
7182 compatibility with previous versions of GCC. */
7185 ix86_compat_function_arg_boundary (enum machine_mode mode,
7186 const_tree type, unsigned int align)
7188 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7189 natural boundaries. */
7190 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7192 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7193 make an exception for SSE modes since these require 128bit
7196 The handling here differs from field_alignment. ICC aligns MMX
7197 arguments to 4 byte boundaries, while structure fields are aligned
7198 to 8 byte boundaries. */
7201 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7202 align = PARM_BOUNDARY;
7206 if (!ix86_compat_aligned_value_p (type))
7207 align = PARM_BOUNDARY;
7210 if (align > BIGGEST_ALIGNMENT)
7211 align = BIGGEST_ALIGNMENT;
7215 /* Return true when TYPE should be 128bit aligned for 32bit argument
7219 ix86_contains_aligned_value_p (const_tree type)
7221 enum machine_mode mode = TYPE_MODE (type);
7223 if (mode == XFmode || mode == XCmode)
7226 if (TYPE_ALIGN (type) < 128)
7229 if (AGGREGATE_TYPE_P (type))
7231 /* Walk the aggregates recursively. */
7232 switch (TREE_CODE (type))
7236 case QUAL_UNION_TYPE:
7240 /* Walk all the structure fields. */
7241 for (field = TYPE_FIELDS (type);
7243 field = DECL_CHAIN (field))
7245 if (TREE_CODE (field) == FIELD_DECL
7246 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7253 /* Just for use if some languages passes arrays by value. */
7254 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7263 return TYPE_ALIGN (type) >= 128;
7268 /* Gives the alignment boundary, in bits, of an argument with the
7269 specified mode and type. */
7272 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7277 /* Since the main variant type is used for call, we convert it to
7278 the main variant type. */
7279 type = TYPE_MAIN_VARIANT (type);
7280 align = TYPE_ALIGN (type);
7283 align = GET_MODE_ALIGNMENT (mode);
7284 if (align < PARM_BOUNDARY)
7285 align = PARM_BOUNDARY;
7289 unsigned int saved_align = align;
7293 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7296 if (mode == XFmode || mode == XCmode)
7297 align = PARM_BOUNDARY;
7299 else if (!ix86_contains_aligned_value_p (type))
7300 align = PARM_BOUNDARY;
7303 align = PARM_BOUNDARY;
7308 && align != ix86_compat_function_arg_boundary (mode, type,
7312 inform (input_location,
7313 "The ABI for passing parameters with %d-byte"
7314 " alignment has changed in GCC 4.6",
7315 align / BITS_PER_UNIT);
7322 /* Return true if N is a possible register number of function value. */
7325 ix86_function_value_regno_p (const unsigned int regno)
7332 case FIRST_FLOAT_REG:
7333 /* TODO: The function should depend on current function ABI but
7334 builtins.c would need updating then. Therefore we use the
7336 if (TARGET_64BIT && ix86_abi == MS_ABI)
7338 return TARGET_FLOAT_RETURNS_IN_80387;
7344 if (TARGET_MACHO || TARGET_64BIT)
7352 /* Define how to find the value returned by a function.
7353 VALTYPE is the data type of the value (as a tree).
7354 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7355 otherwise, FUNC is 0. */
7358 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7359 const_tree fntype, const_tree fn)
7363 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7364 we normally prevent this case when mmx is not available. However
7365 some ABIs may require the result to be returned like DImode. */
7366 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7367 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7369 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7370 we prevent this case when sse is not available. However some ABIs
7371 may require the result to be returned like integer TImode. */
7372 else if (mode == TImode
7373 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7374 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7376 /* 32-byte vector modes in %ymm0. */
7377 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7378 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7380 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7381 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7382 regno = FIRST_FLOAT_REG;
7384 /* Most things go in %eax. */
7387 /* Override FP return register with %xmm0 for local functions when
7388 SSE math is enabled or for functions with sseregparm attribute. */
7389 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7391 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7392 if ((sse_level >= 1 && mode == SFmode)
7393 || (sse_level == 2 && mode == DFmode))
7394 regno = FIRST_SSE_REG;
7397 /* OImode shouldn't be used directly. */
7398 gcc_assert (mode != OImode);
7400 return gen_rtx_REG (orig_mode, regno);
7404 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7409 /* Handle libcalls, which don't provide a type node. */
7410 if (valtype == NULL)
7422 return gen_rtx_REG (mode, FIRST_SSE_REG);
7425 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7429 return gen_rtx_REG (mode, AX_REG);
7433 ret = construct_container (mode, orig_mode, valtype, 1,
7434 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7435 x86_64_int_return_registers, 0);
7437 /* For zero sized structures, construct_container returns NULL, but we
7438 need to keep rest of compiler happy by returning meaningful value. */
7440 ret = gen_rtx_REG (orig_mode, AX_REG);
7446 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7448 unsigned int regno = AX_REG;
7452 switch (GET_MODE_SIZE (mode))
7455 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7456 && !COMPLEX_MODE_P (mode))
7457 regno = FIRST_SSE_REG;
7461 if (mode == SFmode || mode == DFmode)
7462 regno = FIRST_SSE_REG;
7468 return gen_rtx_REG (orig_mode, regno);
7472 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7473 enum machine_mode orig_mode, enum machine_mode mode)
7475 const_tree fn, fntype;
7478 if (fntype_or_decl && DECL_P (fntype_or_decl))
7479 fn = fntype_or_decl;
7480 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7482 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7483 return function_value_ms_64 (orig_mode, mode);
7484 else if (TARGET_64BIT)
7485 return function_value_64 (orig_mode, mode, valtype);
7487 return function_value_32 (orig_mode, mode, fntype, fn);
7491 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7492 bool outgoing ATTRIBUTE_UNUSED)
7494 enum machine_mode mode, orig_mode;
7496 orig_mode = TYPE_MODE (valtype);
7497 mode = type_natural_mode (valtype, NULL);
7498 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7502 ix86_libcall_value (enum machine_mode mode)
7504 return ix86_function_value_1 (NULL, NULL, mode, mode);
7507 /* Return true iff type is returned in memory. */
7509 static bool ATTRIBUTE_UNUSED
7510 return_in_memory_32 (const_tree type, enum machine_mode mode)
7514 if (mode == BLKmode)
7517 size = int_size_in_bytes (type);
7519 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7522 if (VECTOR_MODE_P (mode) || mode == TImode)
7524 /* User-created vectors small enough to fit in EAX. */
7528 /* MMX/3dNow values are returned in MM0,
7529 except when it doesn't exits or the ABI prescribes otherwise. */
7531 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7533 /* SSE values are returned in XMM0, except when it doesn't exist. */
7537 /* AVX values are returned in YMM0, except when it doesn't exist. */
7548 /* OImode shouldn't be used directly. */
7549 gcc_assert (mode != OImode);
7554 static bool ATTRIBUTE_UNUSED
7555 return_in_memory_64 (const_tree type, enum machine_mode mode)
7557 int needed_intregs, needed_sseregs;
7558 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7561 static bool ATTRIBUTE_UNUSED
7562 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7564 HOST_WIDE_INT size = int_size_in_bytes (type);
7566 /* __m128 is returned in xmm0. */
7567 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7568 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7571 /* Otherwise, the size must be exactly in [1248]. */
7572 return size != 1 && size != 2 && size != 4 && size != 8;
7576 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7578 #ifdef SUBTARGET_RETURN_IN_MEMORY
7579 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7581 const enum machine_mode mode = type_natural_mode (type, NULL);
7585 if (ix86_function_type_abi (fntype) == MS_ABI)
7586 return return_in_memory_ms_64 (type, mode);
7588 return return_in_memory_64 (type, mode);
7591 return return_in_memory_32 (type, mode);
7595 /* When returning SSE vector types, we have a choice of either
7596 (1) being abi incompatible with a -march switch, or
7597 (2) generating an error.
7598 Given no good solution, I think the safest thing is one warning.
7599 The user won't be able to use -Werror, but....
7601 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7602 called in response to actually generating a caller or callee that
7603 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7604 via aggregate_value_p for general type probing from tree-ssa. */
7607 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7609 static bool warnedsse, warnedmmx;
7611 if (!TARGET_64BIT && type)
7613 /* Look at the return type of the function, not the function type. */
7614 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7616 if (!TARGET_SSE && !warnedsse)
7619 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7622 warning (0, "SSE vector return without SSE enabled "
7627 if (!TARGET_MMX && !warnedmmx)
7629 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7632 warning (0, "MMX vector return without MMX enabled "
7642 /* Create the va_list data type. */
7644 /* Returns the calling convention specific va_list date type.
7645 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7648 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7650 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7652 /* For i386 we use plain pointer to argument area. */
7653 if (!TARGET_64BIT || abi == MS_ABI)
7654 return build_pointer_type (char_type_node);
7656 record = lang_hooks.types.make_type (RECORD_TYPE);
7657 type_decl = build_decl (BUILTINS_LOCATION,
7658 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7660 f_gpr = build_decl (BUILTINS_LOCATION,
7661 FIELD_DECL, get_identifier ("gp_offset"),
7662 unsigned_type_node);
7663 f_fpr = build_decl (BUILTINS_LOCATION,
7664 FIELD_DECL, get_identifier ("fp_offset"),
7665 unsigned_type_node);
7666 f_ovf = build_decl (BUILTINS_LOCATION,
7667 FIELD_DECL, get_identifier ("overflow_arg_area"),
7669 f_sav = build_decl (BUILTINS_LOCATION,
7670 FIELD_DECL, get_identifier ("reg_save_area"),
7673 va_list_gpr_counter_field = f_gpr;
7674 va_list_fpr_counter_field = f_fpr;
7676 DECL_FIELD_CONTEXT (f_gpr) = record;
7677 DECL_FIELD_CONTEXT (f_fpr) = record;
7678 DECL_FIELD_CONTEXT (f_ovf) = record;
7679 DECL_FIELD_CONTEXT (f_sav) = record;
7681 TYPE_STUB_DECL (record) = type_decl;
7682 TYPE_NAME (record) = type_decl;
7683 TYPE_FIELDS (record) = f_gpr;
7684 DECL_CHAIN (f_gpr) = f_fpr;
7685 DECL_CHAIN (f_fpr) = f_ovf;
7686 DECL_CHAIN (f_ovf) = f_sav;
7688 layout_type (record);
7690 /* The correct type is an array type of one element. */
7691 return build_array_type (record, build_index_type (size_zero_node));
7694 /* Setup the builtin va_list data type and for 64-bit the additional
7695 calling convention specific va_list data types. */
7698 ix86_build_builtin_va_list (void)
7700 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7702 /* Initialize abi specific va_list builtin types. */
7706 if (ix86_abi == MS_ABI)
7708 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7709 if (TREE_CODE (t) != RECORD_TYPE)
7710 t = build_variant_type_copy (t);
7711 sysv_va_list_type_node = t;
7716 if (TREE_CODE (t) != RECORD_TYPE)
7717 t = build_variant_type_copy (t);
7718 sysv_va_list_type_node = t;
7720 if (ix86_abi != MS_ABI)
7722 t = ix86_build_builtin_va_list_abi (MS_ABI);
7723 if (TREE_CODE (t) != RECORD_TYPE)
7724 t = build_variant_type_copy (t);
7725 ms_va_list_type_node = t;
7730 if (TREE_CODE (t) != RECORD_TYPE)
7731 t = build_variant_type_copy (t);
7732 ms_va_list_type_node = t;
7739 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7742 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7748 /* GPR size of varargs save area. */
7749 if (cfun->va_list_gpr_size)
7750 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7752 ix86_varargs_gpr_size = 0;
7754 /* FPR size of varargs save area. We don't need it if we don't pass
7755 anything in SSE registers. */
7756 if (TARGET_SSE && cfun->va_list_fpr_size)
7757 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7759 ix86_varargs_fpr_size = 0;
7761 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7764 save_area = frame_pointer_rtx;
7765 set = get_varargs_alias_set ();
7767 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7768 if (max > X86_64_REGPARM_MAX)
7769 max = X86_64_REGPARM_MAX;
7771 for (i = cum->regno; i < max; i++)
7773 mem = gen_rtx_MEM (Pmode,
7774 plus_constant (save_area, i * UNITS_PER_WORD));
7775 MEM_NOTRAP_P (mem) = 1;
7776 set_mem_alias_set (mem, set);
7777 emit_move_insn (mem, gen_rtx_REG (Pmode,
7778 x86_64_int_parameter_registers[i]));
7781 if (ix86_varargs_fpr_size)
7783 enum machine_mode smode;
7786 /* Now emit code to save SSE registers. The AX parameter contains number
7787 of SSE parameter registers used to call this function, though all we
7788 actually check here is the zero/non-zero status. */
7790 label = gen_label_rtx ();
7791 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7792 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7795 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7796 we used movdqa (i.e. TImode) instead? Perhaps even better would
7797 be if we could determine the real mode of the data, via a hook
7798 into pass_stdarg. Ignore all that for now. */
7800 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7801 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7803 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7804 if (max > X86_64_SSE_REGPARM_MAX)
7805 max = X86_64_SSE_REGPARM_MAX;
7807 for (i = cum->sse_regno; i < max; ++i)
7809 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7810 mem = gen_rtx_MEM (smode, mem);
7811 MEM_NOTRAP_P (mem) = 1;
7812 set_mem_alias_set (mem, set);
7813 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7815 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7823 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7825 alias_set_type set = get_varargs_alias_set ();
7828 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7832 mem = gen_rtx_MEM (Pmode,
7833 plus_constant (virtual_incoming_args_rtx,
7834 i * UNITS_PER_WORD));
7835 MEM_NOTRAP_P (mem) = 1;
7836 set_mem_alias_set (mem, set);
7838 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7839 emit_move_insn (mem, reg);
7844 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7845 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7848 CUMULATIVE_ARGS next_cum;
7851 /* This argument doesn't appear to be used anymore. Which is good,
7852 because the old code here didn't suppress rtl generation. */
7853 gcc_assert (!no_rtl);
7858 fntype = TREE_TYPE (current_function_decl);
7860 /* For varargs, we do not want to skip the dummy va_dcl argument.
7861 For stdargs, we do want to skip the last named argument. */
7863 if (stdarg_p (fntype))
7864 ix86_function_arg_advance (&next_cum, mode, type, true);
7866 if (cum->call_abi == MS_ABI)
7867 setup_incoming_varargs_ms_64 (&next_cum);
7869 setup_incoming_varargs_64 (&next_cum);
7872 /* Checks if TYPE is of kind va_list char *. */
7875 is_va_list_char_pointer (tree type)
7879 /* For 32-bit it is always true. */
7882 canonic = ix86_canonical_va_list_type (type);
7883 return (canonic == ms_va_list_type_node
7884 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7887 /* Implement va_start. */
7890 ix86_va_start (tree valist, rtx nextarg)
7892 HOST_WIDE_INT words, n_gpr, n_fpr;
7893 tree f_gpr, f_fpr, f_ovf, f_sav;
7894 tree gpr, fpr, ovf, sav, t;
7898 if (flag_split_stack
7899 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7901 unsigned int scratch_regno;
7903 /* When we are splitting the stack, we can't refer to the stack
7904 arguments using internal_arg_pointer, because they may be on
7905 the old stack. The split stack prologue will arrange to
7906 leave a pointer to the old stack arguments in a scratch
7907 register, which we here copy to a pseudo-register. The split
7908 stack prologue can't set the pseudo-register directly because
7909 it (the prologue) runs before any registers have been saved. */
7911 scratch_regno = split_stack_prologue_scratch_regno ();
7912 if (scratch_regno != INVALID_REGNUM)
7916 reg = gen_reg_rtx (Pmode);
7917 cfun->machine->split_stack_varargs_pointer = reg;
7920 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7924 push_topmost_sequence ();
7925 emit_insn_after (seq, entry_of_function ());
7926 pop_topmost_sequence ();
7930 /* Only 64bit target needs something special. */
7931 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7933 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7934 std_expand_builtin_va_start (valist, nextarg);
7939 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7940 next = expand_binop (ptr_mode, add_optab,
7941 cfun->machine->split_stack_varargs_pointer,
7942 crtl->args.arg_offset_rtx,
7943 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7944 convert_move (va_r, next, 0);
7949 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7950 f_fpr = DECL_CHAIN (f_gpr);
7951 f_ovf = DECL_CHAIN (f_fpr);
7952 f_sav = DECL_CHAIN (f_ovf);
7954 valist = build_simple_mem_ref (valist);
7955 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7956 /* The following should be folded into the MEM_REF offset. */
7957 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7959 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7961 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7963 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7966 /* Count number of gp and fp argument registers used. */
7967 words = crtl->args.info.words;
7968 n_gpr = crtl->args.info.regno;
7969 n_fpr = crtl->args.info.sse_regno;
7971 if (cfun->va_list_gpr_size)
7973 type = TREE_TYPE (gpr);
7974 t = build2 (MODIFY_EXPR, type,
7975 gpr, build_int_cst (type, n_gpr * 8));
7976 TREE_SIDE_EFFECTS (t) = 1;
7977 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7980 if (TARGET_SSE && cfun->va_list_fpr_size)
7982 type = TREE_TYPE (fpr);
7983 t = build2 (MODIFY_EXPR, type, fpr,
7984 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7985 TREE_SIDE_EFFECTS (t) = 1;
7986 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7989 /* Find the overflow area. */
7990 type = TREE_TYPE (ovf);
7991 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7992 ovf_rtx = crtl->args.internal_arg_pointer;
7994 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7995 t = make_tree (type, ovf_rtx);
7997 t = build2 (POINTER_PLUS_EXPR, type, t,
7998 size_int (words * UNITS_PER_WORD));
7999 t = build2 (MODIFY_EXPR, type, ovf, t);
8000 TREE_SIDE_EFFECTS (t) = 1;
8001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8003 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8005 /* Find the register save area.
8006 Prologue of the function save it right above stack frame. */
8007 type = TREE_TYPE (sav);
8008 t = make_tree (type, frame_pointer_rtx);
8009 if (!ix86_varargs_gpr_size)
8010 t = build2 (POINTER_PLUS_EXPR, type, t,
8011 size_int (-8 * X86_64_REGPARM_MAX));
8012 t = build2 (MODIFY_EXPR, type, sav, t);
8013 TREE_SIDE_EFFECTS (t) = 1;
8014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8018 /* Implement va_arg. */
8021 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8024 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8025 tree f_gpr, f_fpr, f_ovf, f_sav;
8026 tree gpr, fpr, ovf, sav, t;
8028 tree lab_false, lab_over = NULL_TREE;
8033 enum machine_mode nat_mode;
8034 unsigned int arg_boundary;
8036 /* Only 64bit target needs something special. */
8037 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8038 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8040 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8041 f_fpr = DECL_CHAIN (f_gpr);
8042 f_ovf = DECL_CHAIN (f_fpr);
8043 f_sav = DECL_CHAIN (f_ovf);
8045 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8046 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8047 valist = build_va_arg_indirect_ref (valist);
8048 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8049 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8050 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8052 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8054 type = build_pointer_type (type);
8055 size = int_size_in_bytes (type);
8056 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8058 nat_mode = type_natural_mode (type, NULL);
8067 /* Unnamed 256bit vector mode parameters are passed on stack. */
8068 if (ix86_cfun_abi () == SYSV_ABI)
8075 container = construct_container (nat_mode, TYPE_MODE (type),
8076 type, 0, X86_64_REGPARM_MAX,
8077 X86_64_SSE_REGPARM_MAX, intreg,
8082 /* Pull the value out of the saved registers. */
8084 addr = create_tmp_var (ptr_type_node, "addr");
8088 int needed_intregs, needed_sseregs;
8090 tree int_addr, sse_addr;
8092 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8093 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8095 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8097 need_temp = (!REG_P (container)
8098 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8099 || TYPE_ALIGN (type) > 128));
8101 /* In case we are passing structure, verify that it is consecutive block
8102 on the register save area. If not we need to do moves. */
8103 if (!need_temp && !REG_P (container))
8105 /* Verify that all registers are strictly consecutive */
8106 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8110 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8112 rtx slot = XVECEXP (container, 0, i);
8113 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8114 || INTVAL (XEXP (slot, 1)) != i * 16)
8122 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8124 rtx slot = XVECEXP (container, 0, i);
8125 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8126 || INTVAL (XEXP (slot, 1)) != i * 8)
8138 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8139 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8142 /* First ensure that we fit completely in registers. */
8145 t = build_int_cst (TREE_TYPE (gpr),
8146 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8147 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8148 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8149 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8150 gimplify_and_add (t, pre_p);
8154 t = build_int_cst (TREE_TYPE (fpr),
8155 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8156 + X86_64_REGPARM_MAX * 8);
8157 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8158 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8159 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8160 gimplify_and_add (t, pre_p);
8163 /* Compute index to start of area used for integer regs. */
8166 /* int_addr = gpr + sav; */
8167 t = fold_convert (sizetype, gpr);
8168 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8169 gimplify_assign (int_addr, t, pre_p);
8173 /* sse_addr = fpr + sav; */
8174 t = fold_convert (sizetype, fpr);
8175 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8176 gimplify_assign (sse_addr, t, pre_p);
8180 int i, prev_size = 0;
8181 tree temp = create_tmp_var (type, "va_arg_tmp");
8184 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8185 gimplify_assign (addr, t, pre_p);
8187 for (i = 0; i < XVECLEN (container, 0); i++)
8189 rtx slot = XVECEXP (container, 0, i);
8190 rtx reg = XEXP (slot, 0);
8191 enum machine_mode mode = GET_MODE (reg);
8197 tree dest_addr, dest;
8198 int cur_size = GET_MODE_SIZE (mode);
8200 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8201 prev_size = INTVAL (XEXP (slot, 1));
8202 if (prev_size + cur_size > size)
8204 cur_size = size - prev_size;
8205 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8206 if (mode == BLKmode)
8209 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8210 if (mode == GET_MODE (reg))
8211 addr_type = build_pointer_type (piece_type);
8213 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8215 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8218 if (SSE_REGNO_P (REGNO (reg)))
8220 src_addr = sse_addr;
8221 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8225 src_addr = int_addr;
8226 src_offset = REGNO (reg) * 8;
8228 src_addr = fold_convert (addr_type, src_addr);
8229 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8230 size_int (src_offset));
8232 dest_addr = fold_convert (daddr_type, addr);
8233 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8234 size_int (prev_size));
8235 if (cur_size == GET_MODE_SIZE (mode))
8237 src = build_va_arg_indirect_ref (src_addr);
8238 dest = build_va_arg_indirect_ref (dest_addr);
8240 gimplify_assign (dest, src, pre_p);
8245 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8246 3, dest_addr, src_addr,
8247 size_int (cur_size));
8248 gimplify_and_add (copy, pre_p);
8250 prev_size += cur_size;
8256 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8257 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8258 gimplify_assign (gpr, t, pre_p);
8263 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8264 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8265 gimplify_assign (fpr, t, pre_p);
8268 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8270 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8273 /* ... otherwise out of the overflow area. */
8275 /* When we align parameter on stack for caller, if the parameter
8276 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8277 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8278 here with caller. */
8279 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8280 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8281 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8283 /* Care for on-stack alignment if needed. */
8284 if (arg_boundary <= 64 || size == 0)
8288 HOST_WIDE_INT align = arg_boundary / 8;
8289 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8290 size_int (align - 1));
8291 t = fold_convert (sizetype, t);
8292 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8294 t = fold_convert (TREE_TYPE (ovf), t);
8297 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8298 gimplify_assign (addr, t, pre_p);
8300 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8301 size_int (rsize * UNITS_PER_WORD));
8302 gimplify_assign (unshare_expr (ovf), t, pre_p);
8305 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8307 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8308 addr = fold_convert (ptrtype, addr);
8311 addr = build_va_arg_indirect_ref (addr);
8312 return build_va_arg_indirect_ref (addr);
8315 /* Return true if OPNUM's MEM should be matched
8316 in movabs* patterns. */
8319 ix86_check_movabs (rtx insn, int opnum)
8323 set = PATTERN (insn);
8324 if (GET_CODE (set) == PARALLEL)
8325 set = XVECEXP (set, 0, 0);
8326 gcc_assert (GET_CODE (set) == SET);
8327 mem = XEXP (set, opnum);
8328 while (GET_CODE (mem) == SUBREG)
8329 mem = SUBREG_REG (mem);
8330 gcc_assert (MEM_P (mem));
8331 return volatile_ok || !MEM_VOLATILE_P (mem);
8334 /* Initialize the table of extra 80387 mathematical constants. */
8337 init_ext_80387_constants (void)
8339 static const char * cst[5] =
8341 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8342 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8343 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8344 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8345 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8349 for (i = 0; i < 5; i++)
8351 real_from_string (&ext_80387_constants_table[i], cst[i]);
8352 /* Ensure each constant is rounded to XFmode precision. */
8353 real_convert (&ext_80387_constants_table[i],
8354 XFmode, &ext_80387_constants_table[i]);
8357 ext_80387_constants_init = 1;
8360 /* Return non-zero if the constant is something that
8361 can be loaded with a special instruction. */
8364 standard_80387_constant_p (rtx x)
8366 enum machine_mode mode = GET_MODE (x);
8370 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8373 if (x == CONST0_RTX (mode))
8375 if (x == CONST1_RTX (mode))
8378 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8380 /* For XFmode constants, try to find a special 80387 instruction when
8381 optimizing for size or on those CPUs that benefit from them. */
8383 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8387 if (! ext_80387_constants_init)
8388 init_ext_80387_constants ();
8390 for (i = 0; i < 5; i++)
8391 if (real_identical (&r, &ext_80387_constants_table[i]))
8395 /* Load of the constant -0.0 or -1.0 will be split as
8396 fldz;fchs or fld1;fchs sequence. */
8397 if (real_isnegzero (&r))
8399 if (real_identical (&r, &dconstm1))
8405 /* Return the opcode of the special instruction to be used to load
8409 standard_80387_constant_opcode (rtx x)
8411 switch (standard_80387_constant_p (x))
8435 /* Return the CONST_DOUBLE representing the 80387 constant that is
8436 loaded by the specified special instruction. The argument IDX
8437 matches the return value from standard_80387_constant_p. */
8440 standard_80387_constant_rtx (int idx)
8444 if (! ext_80387_constants_init)
8445 init_ext_80387_constants ();
8461 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8465 /* Return 1 if X is all 0s and 2 if x is all 1s
8466 in supported SSE vector mode. */
8469 standard_sse_constant_p (rtx x)
8471 enum machine_mode mode = GET_MODE (x);
8473 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8475 if (vector_all_ones_operand (x, mode))
8491 /* Return the opcode of the special instruction to be used to load
8495 standard_sse_constant_opcode (rtx insn, rtx x)
8497 switch (standard_sse_constant_p (x))
8500 switch (get_attr_mode (insn))
8503 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8505 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8506 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8508 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8510 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8511 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8513 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8515 return "vxorps\t%x0, %x0, %x0";
8517 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8518 return "vxorps\t%x0, %x0, %x0";
8520 return "vxorpd\t%x0, %x0, %x0";
8522 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8523 return "vxorps\t%x0, %x0, %x0";
8525 return "vpxor\t%x0, %x0, %x0";
8530 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8537 /* Returns true if OP contains a symbol reference */
8540 symbolic_reference_mentioned_p (rtx op)
8545 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8548 fmt = GET_RTX_FORMAT (GET_CODE (op));
8549 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8555 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8556 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8560 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8567 /* Return true if it is appropriate to emit `ret' instructions in the
8568 body of a function. Do this only if the epilogue is simple, needing a
8569 couple of insns. Prior to reloading, we can't tell how many registers
8570 must be saved, so return false then. Return false if there is no frame
8571 marker to de-allocate. */
8574 ix86_can_use_return_insn_p (void)
8576 struct ix86_frame frame;
8578 if (! reload_completed || frame_pointer_needed)
8581 /* Don't allow more than 32k pop, since that's all we can do
8582 with one instruction. */
8583 if (crtl->args.pops_args && crtl->args.size >= 32768)
8586 ix86_compute_frame_layout (&frame);
8587 return (frame.stack_pointer_offset == UNITS_PER_WORD
8588 && (frame.nregs + frame.nsseregs) == 0);
8591 /* Value should be nonzero if functions must have frame pointers.
8592 Zero means the frame pointer need not be set up (and parms may
8593 be accessed via the stack pointer) in functions that seem suitable. */
8596 ix86_frame_pointer_required (void)
8598 /* If we accessed previous frames, then the generated code expects
8599 to be able to access the saved ebp value in our frame. */
8600 if (cfun->machine->accesses_prev_frame)
8603 /* Several x86 os'es need a frame pointer for other reasons,
8604 usually pertaining to setjmp. */
8605 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8608 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8609 turns off the frame pointer by default. Turn it back on now if
8610 we've not got a leaf function. */
8611 if (TARGET_OMIT_LEAF_FRAME_POINTER
8612 && (!current_function_is_leaf
8613 || ix86_current_function_calls_tls_descriptor))
8616 if (crtl->profile && !flag_fentry)
8622 /* Record that the current function accesses previous call frames. */
8625 ix86_setup_frame_addresses (void)
8627 cfun->machine->accesses_prev_frame = 1;
8630 #ifndef USE_HIDDEN_LINKONCE
8631 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8632 # define USE_HIDDEN_LINKONCE 1
8634 # define USE_HIDDEN_LINKONCE 0
8638 static int pic_labels_used;
8640 /* Fills in the label name that should be used for a pc thunk for
8641 the given register. */
8644 get_pc_thunk_name (char name[32], unsigned int regno)
8646 gcc_assert (!TARGET_64BIT);
8648 if (USE_HIDDEN_LINKONCE)
8649 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8651 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8655 /* This function generates code for -fpic that loads %ebx with
8656 the return address of the caller and then returns. */
8659 ix86_code_end (void)
8664 for (regno = AX_REG; regno <= SP_REG; regno++)
8669 if (!(pic_labels_used & (1 << regno)))
8672 get_pc_thunk_name (name, regno);
8674 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8675 get_identifier (name),
8676 build_function_type (void_type_node, void_list_node));
8677 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8678 NULL_TREE, void_type_node);
8679 TREE_PUBLIC (decl) = 1;
8680 TREE_STATIC (decl) = 1;
8685 switch_to_section (darwin_sections[text_coal_section]);
8686 fputs ("\t.weak_definition\t", asm_out_file);
8687 assemble_name (asm_out_file, name);
8688 fputs ("\n\t.private_extern\t", asm_out_file);
8689 assemble_name (asm_out_file, name);
8690 putc ('\n', asm_out_file);
8691 ASM_OUTPUT_LABEL (asm_out_file, name);
8692 DECL_WEAK (decl) = 1;
8696 if (USE_HIDDEN_LINKONCE)
8698 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8700 targetm.asm_out.unique_section (decl, 0);
8701 switch_to_section (get_named_section (decl, NULL, 0));
8703 targetm.asm_out.globalize_label (asm_out_file, name);
8704 fputs ("\t.hidden\t", asm_out_file);
8705 assemble_name (asm_out_file, name);
8706 putc ('\n', asm_out_file);
8707 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8711 switch_to_section (text_section);
8712 ASM_OUTPUT_LABEL (asm_out_file, name);
8715 DECL_INITIAL (decl) = make_node (BLOCK);
8716 current_function_decl = decl;
8717 init_function_start (decl);
8718 first_function_block_is_cold = false;
8719 /* Make sure unwind info is emitted for the thunk if needed. */
8720 final_start_function (emit_barrier (), asm_out_file, 1);
8722 /* Pad stack IP move with 4 instructions (two NOPs count
8723 as one instruction). */
8724 if (TARGET_PAD_SHORT_FUNCTION)
8729 fputs ("\tnop\n", asm_out_file);
8732 xops[0] = gen_rtx_REG (Pmode, regno);
8733 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8734 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8735 fputs ("\tret\n", asm_out_file);
8736 final_end_function ();
8737 init_insn_lengths ();
8738 free_after_compilation (cfun);
8740 current_function_decl = NULL;
8743 if (flag_split_stack)
8744 file_end_indicate_split_stack ();
8747 /* Emit code for the SET_GOT patterns. */
8750 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8756 if (TARGET_VXWORKS_RTP && flag_pic)
8758 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8759 xops[2] = gen_rtx_MEM (Pmode,
8760 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8761 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8763 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8764 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8765 an unadorned address. */
8766 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8767 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8768 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8772 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8774 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8776 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8779 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8782 output_asm_insn ("call\t%a2", xops);
8783 #ifdef DWARF2_UNWIND_INFO
8784 /* The call to next label acts as a push. */
8785 if (dwarf2out_do_frame ())
8789 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8790 gen_rtx_PLUS (Pmode,
8793 RTX_FRAME_RELATED_P (insn) = 1;
8794 dwarf2out_frame_debug (insn, true);
8801 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8802 is what will be referenced by the Mach-O PIC subsystem. */
8804 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8807 targetm.asm_out.internal_label (asm_out_file, "L",
8808 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8812 output_asm_insn ("pop%z0\t%0", xops);
8813 #ifdef DWARF2_UNWIND_INFO
8814 /* The pop is a pop and clobbers dest, but doesn't restore it
8815 for unwind info purposes. */
8816 if (dwarf2out_do_frame ())
8820 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8821 dwarf2out_frame_debug (insn, true);
8822 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8823 gen_rtx_PLUS (Pmode,
8826 RTX_FRAME_RELATED_P (insn) = 1;
8827 dwarf2out_frame_debug (insn, true);
8836 get_pc_thunk_name (name, REGNO (dest));
8837 pic_labels_used |= 1 << REGNO (dest);
8839 #ifdef DWARF2_UNWIND_INFO
8840 /* Ensure all queued register saves are flushed before the
8842 if (dwarf2out_do_frame ())
8843 dwarf2out_flush_queued_reg_saves ();
8845 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8846 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8847 output_asm_insn ("call\t%X2", xops);
8848 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8849 is what will be referenced by the Mach-O PIC subsystem. */
8852 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8854 targetm.asm_out.internal_label (asm_out_file, "L",
8855 CODE_LABEL_NUMBER (label));
8862 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8863 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8865 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8870 /* Generate an "push" pattern for input ARG. */
8875 struct machine_function *m = cfun->machine;
8877 if (m->fs.cfa_reg == stack_pointer_rtx)
8878 m->fs.cfa_offset += UNITS_PER_WORD;
8879 m->fs.sp_offset += UNITS_PER_WORD;
8881 return gen_rtx_SET (VOIDmode,
8883 gen_rtx_PRE_DEC (Pmode,
8884 stack_pointer_rtx)),
8888 /* Generate an "pop" pattern for input ARG. */
8893 return gen_rtx_SET (VOIDmode,
8896 gen_rtx_POST_INC (Pmode,
8897 stack_pointer_rtx)));
8900 /* Return >= 0 if there is an unused call-clobbered register available
8901 for the entire function. */
8904 ix86_select_alt_pic_regnum (void)
8906 if (current_function_is_leaf
8908 && !ix86_current_function_calls_tls_descriptor)
8911 /* Can't use the same register for both PIC and DRAP. */
8913 drap = REGNO (crtl->drap_reg);
8916 for (i = 2; i >= 0; --i)
8917 if (i != drap && !df_regs_ever_live_p (i))
8921 return INVALID_REGNUM;
8924 /* Return 1 if we need to save REGNO. */
8926 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8928 if (pic_offset_table_rtx
8929 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8930 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8932 || crtl->calls_eh_return
8933 || crtl->uses_const_pool))
8935 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8940 if (crtl->calls_eh_return && maybe_eh_return)
8945 unsigned test = EH_RETURN_DATA_REGNO (i);
8946 if (test == INVALID_REGNUM)
8953 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8956 return (df_regs_ever_live_p (regno)
8957 && !call_used_regs[regno]
8958 && !fixed_regs[regno]
8959 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8962 /* Return number of saved general prupose registers. */
8965 ix86_nsaved_regs (void)
8970 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8971 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8976 /* Return number of saved SSE registrers. */
8979 ix86_nsaved_sseregs (void)
8984 if (ix86_cfun_abi () != MS_ABI)
8986 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8987 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8992 /* Given FROM and TO register numbers, say whether this elimination is
8993 allowed. If stack alignment is needed, we can only replace argument
8994 pointer with hard frame pointer, or replace frame pointer with stack
8995 pointer. Otherwise, frame pointer elimination is automatically
8996 handled and all other eliminations are valid. */
8999 ix86_can_eliminate (const int from, const int to)
9001 if (stack_realign_fp)
9002 return ((from == ARG_POINTER_REGNUM
9003 && to == HARD_FRAME_POINTER_REGNUM)
9004 || (from == FRAME_POINTER_REGNUM
9005 && to == STACK_POINTER_REGNUM));
9007 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9010 /* Return the offset between two registers, one to be eliminated, and the other
9011 its replacement, at the start of a routine. */
9014 ix86_initial_elimination_offset (int from, int to)
9016 struct ix86_frame frame;
9017 ix86_compute_frame_layout (&frame);
9019 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9020 return frame.hard_frame_pointer_offset;
9021 else if (from == FRAME_POINTER_REGNUM
9022 && to == HARD_FRAME_POINTER_REGNUM)
9023 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9026 gcc_assert (to == STACK_POINTER_REGNUM);
9028 if (from == ARG_POINTER_REGNUM)
9029 return frame.stack_pointer_offset;
9031 gcc_assert (from == FRAME_POINTER_REGNUM);
9032 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9036 /* In a dynamically-aligned function, we can't know the offset from
9037 stack pointer to frame pointer, so we must ensure that setjmp
9038 eliminates fp against the hard fp (%ebp) rather than trying to
9039 index from %esp up to the top of the frame across a gap that is
9040 of unknown (at compile-time) size. */
9042 ix86_builtin_setjmp_frame_value (void)
9044 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9047 /* On the x86 -fsplit-stack and -fstack-protector both use the same
9048 field in the TCB, so they can not be used together. */
9051 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
9052 struct gcc_options *opts ATTRIBUTE_UNUSED)
9056 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
9058 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
9061 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
9064 error ("%<-fsplit-stack%> requires "
9065 "assembler support for CFI directives");
9073 /* When using -fsplit-stack, the allocation routines set a field in
9074 the TCB to the bottom of the stack plus this much space, measured
9077 #define SPLIT_STACK_AVAILABLE 256
9079 /* Fill structure ix86_frame about frame of currently computed function. */
9082 ix86_compute_frame_layout (struct ix86_frame *frame)
9084 unsigned int stack_alignment_needed;
9085 HOST_WIDE_INT offset;
9086 unsigned int preferred_alignment;
9087 HOST_WIDE_INT size = get_frame_size ();
9088 HOST_WIDE_INT to_allocate;
9090 frame->nregs = ix86_nsaved_regs ();
9091 frame->nsseregs = ix86_nsaved_sseregs ();
9093 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9094 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9096 /* MS ABI seem to require stack alignment to be always 16 except for function
9097 prologues and leaf. */
9098 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
9099 && (!current_function_is_leaf || cfun->calls_alloca != 0
9100 || ix86_current_function_calls_tls_descriptor))
9102 preferred_alignment = 16;
9103 stack_alignment_needed = 16;
9104 crtl->preferred_stack_boundary = 128;
9105 crtl->stack_alignment_needed = 128;
9108 gcc_assert (!size || stack_alignment_needed);
9109 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9110 gcc_assert (preferred_alignment <= stack_alignment_needed);
9112 /* For SEH we have to limit the amount of code movement into the prologue.
9113 At present we do this via a BLOCKAGE, at which point there's very little
9114 scheduling that can be done, which means that there's very little point
9115 in doing anything except PUSHs. */
9117 cfun->machine->use_fast_prologue_epilogue = false;
9119 /* During reload iteration the amount of registers saved can change.
9120 Recompute the value as needed. Do not recompute when amount of registers
9121 didn't change as reload does multiple calls to the function and does not
9122 expect the decision to change within single iteration. */
9123 else if (!optimize_function_for_size_p (cfun)
9124 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9126 int count = frame->nregs;
9127 struct cgraph_node *node = cgraph_node (current_function_decl);
9129 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9131 /* The fast prologue uses move instead of push to save registers. This
9132 is significantly longer, but also executes faster as modern hardware
9133 can execute the moves in parallel, but can't do that for push/pop.
9135 Be careful about choosing what prologue to emit: When function takes
9136 many instructions to execute we may use slow version as well as in
9137 case function is known to be outside hot spot (this is known with
9138 feedback only). Weight the size of function by number of registers
9139 to save as it is cheap to use one or two push instructions but very
9140 slow to use many of them. */
9142 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9143 if (node->frequency < NODE_FREQUENCY_NORMAL
9144 || (flag_branch_probabilities
9145 && node->frequency < NODE_FREQUENCY_HOT))
9146 cfun->machine->use_fast_prologue_epilogue = false;
9148 cfun->machine->use_fast_prologue_epilogue
9149 = !expensive_function_p (count);
9151 if (TARGET_PROLOGUE_USING_MOVE
9152 && cfun->machine->use_fast_prologue_epilogue)
9153 frame->save_regs_using_mov = true;
9155 frame->save_regs_using_mov = false;
9157 /* If static stack checking is enabled and done with probes, the registers
9158 need to be saved before allocating the frame. */
9159 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9160 frame->save_regs_using_mov = false;
9162 /* Skip return address. */
9163 offset = UNITS_PER_WORD;
9165 /* Skip pushed static chain. */
9166 if (ix86_static_chain_on_stack)
9167 offset += UNITS_PER_WORD;
9169 /* Skip saved base pointer. */
9170 if (frame_pointer_needed)
9171 offset += UNITS_PER_WORD;
9172 frame->hfp_save_offset = offset;
9174 /* The traditional frame pointer location is at the top of the frame. */
9175 frame->hard_frame_pointer_offset = offset;
9177 /* Register save area */
9178 offset += frame->nregs * UNITS_PER_WORD;
9179 frame->reg_save_offset = offset;
9181 /* Align and set SSE register save area. */
9182 if (frame->nsseregs)
9184 /* The only ABI that has saved SSE registers (Win64) also has a
9185 16-byte aligned default stack, and thus we don't need to be
9186 within the re-aligned local stack frame to save them. */
9187 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9188 offset = (offset + 16 - 1) & -16;
9189 offset += frame->nsseregs * 16;
9191 frame->sse_reg_save_offset = offset;
9193 /* The re-aligned stack starts here. Values before this point are not
9194 directly comparable with values below this point. In order to make
9195 sure that no value happens to be the same before and after, force
9196 the alignment computation below to add a non-zero value. */
9197 if (stack_realign_fp)
9198 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9201 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9202 offset += frame->va_arg_size;
9204 /* Align start of frame for local function. */
9205 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9207 /* Frame pointer points here. */
9208 frame->frame_pointer_offset = offset;
9212 /* Add outgoing arguments area. Can be skipped if we eliminated
9213 all the function calls as dead code.
9214 Skipping is however impossible when function calls alloca. Alloca
9215 expander assumes that last crtl->outgoing_args_size
9216 of stack frame are unused. */
9217 if (ACCUMULATE_OUTGOING_ARGS
9218 && (!current_function_is_leaf || cfun->calls_alloca
9219 || ix86_current_function_calls_tls_descriptor))
9221 offset += crtl->outgoing_args_size;
9222 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9225 frame->outgoing_arguments_size = 0;
9227 /* Align stack boundary. Only needed if we're calling another function
9229 if (!current_function_is_leaf || cfun->calls_alloca
9230 || ix86_current_function_calls_tls_descriptor)
9231 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9233 /* We've reached end of stack frame. */
9234 frame->stack_pointer_offset = offset;
9236 /* Size prologue needs to allocate. */
9237 to_allocate = offset - frame->sse_reg_save_offset;
9239 if ((!to_allocate && frame->nregs <= 1)
9240 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9241 frame->save_regs_using_mov = false;
9243 if (ix86_using_red_zone ()
9244 && current_function_sp_is_unchanging
9245 && current_function_is_leaf
9246 && !ix86_current_function_calls_tls_descriptor)
9248 frame->red_zone_size = to_allocate;
9249 if (frame->save_regs_using_mov)
9250 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9251 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9252 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9255 frame->red_zone_size = 0;
9256 frame->stack_pointer_offset -= frame->red_zone_size;
9258 /* The SEH frame pointer location is near the bottom of the frame.
9259 This is enforced by the fact that the difference between the
9260 stack pointer and the frame pointer is limited to 240 bytes in
9261 the unwind data structure. */
9266 /* If we can leave the frame pointer where it is, do so. */
9267 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9268 if (diff > 240 || (diff & 15) != 0)
9270 /* Ideally we'd determine what portion of the local stack frame
9271 (within the constraint of the lowest 240) is most heavily used.
9272 But without that complication, simply bias the frame pointer
9273 by 128 bytes so as to maximize the amount of the local stack
9274 frame that is addressable with 8-bit offsets. */
9275 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9280 /* This is semi-inlined memory_address_length, but simplified
9281 since we know that we're always dealing with reg+offset, and
9282 to avoid having to create and discard all that rtl. */
9285 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9291 /* EBP and R13 cannot be encoded without an offset. */
9292 len = (regno == BP_REG || regno == R13_REG);
9294 else if (IN_RANGE (offset, -128, 127))
9297 /* ESP and R12 must be encoded with a SIB byte. */
9298 if (regno == SP_REG || regno == R12_REG)
9304 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9305 The valid base registers are taken from CFUN->MACHINE->FS. */
9308 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9310 const struct machine_function *m = cfun->machine;
9311 rtx base_reg = NULL;
9312 HOST_WIDE_INT base_offset = 0;
9314 if (m->use_fast_prologue_epilogue)
9316 /* Choose the base register most likely to allow the most scheduling
9317 opportunities. Generally FP is valid througout the function,
9318 while DRAP must be reloaded within the epilogue. But choose either
9319 over the SP due to increased encoding size. */
9323 base_reg = hard_frame_pointer_rtx;
9324 base_offset = m->fs.fp_offset - cfa_offset;
9326 else if (m->fs.drap_valid)
9328 base_reg = crtl->drap_reg;
9329 base_offset = 0 - cfa_offset;
9331 else if (m->fs.sp_valid)
9333 base_reg = stack_pointer_rtx;
9334 base_offset = m->fs.sp_offset - cfa_offset;
9339 HOST_WIDE_INT toffset;
9342 /* Choose the base register with the smallest address encoding.
9343 With a tie, choose FP > DRAP > SP. */
9346 base_reg = stack_pointer_rtx;
9347 base_offset = m->fs.sp_offset - cfa_offset;
9348 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9350 if (m->fs.drap_valid)
9352 toffset = 0 - cfa_offset;
9353 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9356 base_reg = crtl->drap_reg;
9357 base_offset = toffset;
9363 toffset = m->fs.fp_offset - cfa_offset;
9364 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9367 base_reg = hard_frame_pointer_rtx;
9368 base_offset = toffset;
9373 gcc_assert (base_reg != NULL);
9375 return plus_constant (base_reg, base_offset);
9378 /* Emit code to save registers in the prologue. */
9381 ix86_emit_save_regs (void)
9386 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9387 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9389 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9390 RTX_FRAME_RELATED_P (insn) = 1;
9394 /* Emit a single register save at CFA - CFA_OFFSET. */
9397 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9398 HOST_WIDE_INT cfa_offset)
9400 struct machine_function *m = cfun->machine;
9401 rtx reg = gen_rtx_REG (mode, regno);
9402 rtx mem, addr, base, insn;
9404 addr = choose_baseaddr (cfa_offset);
9405 mem = gen_frame_mem (mode, addr);
9407 /* For SSE saves, we need to indicate the 128-bit alignment. */
9408 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9410 insn = emit_move_insn (mem, reg);
9411 RTX_FRAME_RELATED_P (insn) = 1;
9414 if (GET_CODE (base) == PLUS)
9415 base = XEXP (base, 0);
9416 gcc_checking_assert (REG_P (base));
9418 /* When saving registers into a re-aligned local stack frame, avoid
9419 any tricky guessing by dwarf2out. */
9420 if (m->fs.realigned)
9422 gcc_checking_assert (stack_realign_drap);
9424 if (regno == REGNO (crtl->drap_reg))
9426 /* A bit of a hack. We force the DRAP register to be saved in
9427 the re-aligned stack frame, which provides us with a copy
9428 of the CFA that will last past the prologue. Install it. */
9429 gcc_checking_assert (cfun->machine->fs.fp_valid);
9430 addr = plus_constant (hard_frame_pointer_rtx,
9431 cfun->machine->fs.fp_offset - cfa_offset);
9432 mem = gen_rtx_MEM (mode, addr);
9433 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9437 /* The frame pointer is a stable reference within the
9438 aligned frame. Use it. */
9439 gcc_checking_assert (cfun->machine->fs.fp_valid);
9440 addr = plus_constant (hard_frame_pointer_rtx,
9441 cfun->machine->fs.fp_offset - cfa_offset);
9442 mem = gen_rtx_MEM (mode, addr);
9443 add_reg_note (insn, REG_CFA_EXPRESSION,
9444 gen_rtx_SET (VOIDmode, mem, reg));
9448 /* The memory may not be relative to the current CFA register,
9449 which means that we may need to generate a new pattern for
9450 use by the unwind info. */
9451 else if (base != m->fs.cfa_reg)
9453 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9454 mem = gen_rtx_MEM (mode, addr);
9455 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9459 /* Emit code to save registers using MOV insns.
9460 First register is stored at CFA - CFA_OFFSET. */
9462 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9466 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9467 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9469 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9470 cfa_offset -= UNITS_PER_WORD;
9474 /* Emit code to save SSE registers using MOV insns.
9475 First register is stored at CFA - CFA_OFFSET. */
9477 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9481 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9482 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9484 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9489 static GTY(()) rtx queued_cfa_restores;
9491 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9492 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9493 Don't add the note if the previously saved value will be left untouched
9494 within stack red-zone till return, as unwinders can find the same value
9495 in the register and on the stack. */
9498 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9500 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9505 add_reg_note (insn, REG_CFA_RESTORE, reg);
9506 RTX_FRAME_RELATED_P (insn) = 1;
9510 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9513 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9516 ix86_add_queued_cfa_restore_notes (rtx insn)
9519 if (!queued_cfa_restores)
9521 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9523 XEXP (last, 1) = REG_NOTES (insn);
9524 REG_NOTES (insn) = queued_cfa_restores;
9525 queued_cfa_restores = NULL_RTX;
9526 RTX_FRAME_RELATED_P (insn) = 1;
9529 /* Expand prologue or epilogue stack adjustment.
9530 The pattern exist to put a dependency on all ebp-based memory accesses.
9531 STYLE should be negative if instructions should be marked as frame related,
9532 zero if %r11 register is live and cannot be freely used and positive
9536 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9537 int style, bool set_cfa)
9539 struct machine_function *m = cfun->machine;
9541 bool add_frame_related_expr = false;
9544 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9545 else if (x86_64_immediate_operand (offset, DImode))
9546 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9550 /* r11 is used by indirect sibcall return as well, set before the
9551 epilogue and used after the epilogue. */
9553 tmp = gen_rtx_REG (DImode, R11_REG);
9556 gcc_assert (src != hard_frame_pointer_rtx
9557 && dest != hard_frame_pointer_rtx);
9558 tmp = hard_frame_pointer_rtx;
9560 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9562 add_frame_related_expr = true;
9564 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9567 insn = emit_insn (insn);
9569 ix86_add_queued_cfa_restore_notes (insn);
9575 gcc_assert (m->fs.cfa_reg == src);
9576 m->fs.cfa_offset += INTVAL (offset);
9577 m->fs.cfa_reg = dest;
9579 r = gen_rtx_PLUS (Pmode, src, offset);
9580 r = gen_rtx_SET (VOIDmode, dest, r);
9581 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9582 RTX_FRAME_RELATED_P (insn) = 1;
9586 RTX_FRAME_RELATED_P (insn) = 1;
9587 if (add_frame_related_expr)
9589 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9590 r = gen_rtx_SET (VOIDmode, dest, r);
9591 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9595 if (dest == stack_pointer_rtx)
9597 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9598 bool valid = m->fs.sp_valid;
9600 if (src == hard_frame_pointer_rtx)
9602 valid = m->fs.fp_valid;
9603 ooffset = m->fs.fp_offset;
9605 else if (src == crtl->drap_reg)
9607 valid = m->fs.drap_valid;
9612 /* Else there are two possibilities: SP itself, which we set
9613 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9614 taken care of this by hand along the eh_return path. */
9615 gcc_checking_assert (src == stack_pointer_rtx
9616 || offset == const0_rtx);
9619 m->fs.sp_offset = ooffset - INTVAL (offset);
9620 m->fs.sp_valid = valid;
9624 /* Find an available register to be used as dynamic realign argument
9625 pointer regsiter. Such a register will be written in prologue and
9626 used in begin of body, so it must not be
9627 1. parameter passing register.
9629 We reuse static-chain register if it is available. Otherwise, we
9630 use DI for i386 and R13 for x86-64. We chose R13 since it has
9633 Return: the regno of chosen register. */
9636 find_drap_reg (void)
9638 tree decl = cfun->decl;
9642 /* Use R13 for nested function or function need static chain.
9643 Since function with tail call may use any caller-saved
9644 registers in epilogue, DRAP must not use caller-saved
9645 register in such case. */
9646 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9653 /* Use DI for nested function or function need static chain.
9654 Since function with tail call may use any caller-saved
9655 registers in epilogue, DRAP must not use caller-saved
9656 register in such case. */
9657 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9660 /* Reuse static chain register if it isn't used for parameter
9662 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9663 && !lookup_attribute ("fastcall",
9664 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9665 && !lookup_attribute ("thiscall",
9666 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9673 /* Return minimum incoming stack alignment. */
9676 ix86_minimum_incoming_stack_boundary (bool sibcall)
9678 unsigned int incoming_stack_boundary;
9680 /* Prefer the one specified at command line. */
9681 if (ix86_user_incoming_stack_boundary)
9682 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9683 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9684 if -mstackrealign is used, it isn't used for sibcall check and
9685 estimated stack alignment is 128bit. */
9688 && ix86_force_align_arg_pointer
9689 && crtl->stack_alignment_estimated == 128)
9690 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9692 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9694 /* Incoming stack alignment can be changed on individual functions
9695 via force_align_arg_pointer attribute. We use the smallest
9696 incoming stack boundary. */
9697 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9698 && lookup_attribute (ix86_force_align_arg_pointer_string,
9699 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9700 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9702 /* The incoming stack frame has to be aligned at least at
9703 parm_stack_boundary. */
9704 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9705 incoming_stack_boundary = crtl->parm_stack_boundary;
9707 /* Stack at entrance of main is aligned by runtime. We use the
9708 smallest incoming stack boundary. */
9709 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9710 && DECL_NAME (current_function_decl)
9711 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9712 && DECL_FILE_SCOPE_P (current_function_decl))
9713 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9715 return incoming_stack_boundary;
9718 /* Update incoming stack boundary and estimated stack alignment. */
9721 ix86_update_stack_boundary (void)
9723 ix86_incoming_stack_boundary
9724 = ix86_minimum_incoming_stack_boundary (false);
9726 /* x86_64 vararg needs 16byte stack alignment for register save
9730 && crtl->stack_alignment_estimated < 128)
9731 crtl->stack_alignment_estimated = 128;
9734 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9735 needed or an rtx for DRAP otherwise. */
9738 ix86_get_drap_rtx (void)
9740 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9741 crtl->need_drap = true;
9743 if (stack_realign_drap)
9745 /* Assign DRAP to vDRAP and returns vDRAP */
9746 unsigned int regno = find_drap_reg ();
9751 arg_ptr = gen_rtx_REG (Pmode, regno);
9752 crtl->drap_reg = arg_ptr;
9755 drap_vreg = copy_to_reg (arg_ptr);
9759 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9762 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9763 RTX_FRAME_RELATED_P (insn) = 1;
9771 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9774 ix86_internal_arg_pointer (void)
9776 return virtual_incoming_args_rtx;
9779 struct scratch_reg {
9784 /* Return a short-lived scratch register for use on function entry.
9785 In 32-bit mode, it is valid only after the registers are saved
9786 in the prologue. This register must be released by means of
9787 release_scratch_register_on_entry once it is dead. */
9790 get_scratch_register_on_entry (struct scratch_reg *sr)
9798 /* We always use R11 in 64-bit mode. */
9803 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9805 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9806 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9807 int regparm = ix86_function_regparm (fntype, decl);
9809 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9811 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9812 for the static chain register. */
9813 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9814 && drap_regno != AX_REG)
9816 else if (regparm < 2 && drap_regno != DX_REG)
9818 /* ecx is the static chain register. */
9819 else if (regparm < 3 && !fastcall_p && !static_chain_p
9820 && drap_regno != CX_REG)
9822 else if (ix86_save_reg (BX_REG, true))
9824 /* esi is the static chain register. */
9825 else if (!(regparm == 3 && static_chain_p)
9826 && ix86_save_reg (SI_REG, true))
9828 else if (ix86_save_reg (DI_REG, true))
9832 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9837 sr->reg = gen_rtx_REG (Pmode, regno);
9840 rtx insn = emit_insn (gen_push (sr->reg));
9841 RTX_FRAME_RELATED_P (insn) = 1;
9845 /* Release a scratch register obtained from the preceding function. */
9848 release_scratch_register_on_entry (struct scratch_reg *sr)
9852 rtx x, insn = emit_insn (gen_pop (sr->reg));
9854 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9855 RTX_FRAME_RELATED_P (insn) = 1;
9856 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9857 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9862 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9864 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9867 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9869 /* We skip the probe for the first interval + a small dope of 4 words and
9870 probe that many bytes past the specified size to maintain a protection
9871 area at the botton of the stack. */
9872 const int dope = 4 * UNITS_PER_WORD;
9873 rtx size_rtx = GEN_INT (size);
9875 /* See if we have a constant small number of probes to generate. If so,
9876 that's the easy case. The run-time loop is made up of 11 insns in the
9877 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9878 for n # of intervals. */
9879 if (size <= 5 * PROBE_INTERVAL)
9881 HOST_WIDE_INT i, adjust;
9882 bool first_probe = true;
9884 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9885 values of N from 1 until it exceeds SIZE. If only one probe is
9886 needed, this will not generate any code. Then adjust and probe
9887 to PROBE_INTERVAL + SIZE. */
9888 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9892 adjust = 2 * PROBE_INTERVAL + dope;
9893 first_probe = false;
9896 adjust = PROBE_INTERVAL;
9898 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9899 plus_constant (stack_pointer_rtx, -adjust)));
9900 emit_stack_probe (stack_pointer_rtx);
9904 adjust = size + PROBE_INTERVAL + dope;
9906 adjust = size + PROBE_INTERVAL - i;
9908 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9909 plus_constant (stack_pointer_rtx, -adjust)));
9910 emit_stack_probe (stack_pointer_rtx);
9912 /* Adjust back to account for the additional first interval. */
9913 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9914 plus_constant (stack_pointer_rtx,
9915 PROBE_INTERVAL + dope)));
9918 /* Otherwise, do the same as above, but in a loop. Note that we must be
9919 extra careful with variables wrapping around because we might be at
9920 the very top (or the very bottom) of the address space and we have
9921 to be able to handle this case properly; in particular, we use an
9922 equality test for the loop condition. */
9925 HOST_WIDE_INT rounded_size;
9926 struct scratch_reg sr;
9928 get_scratch_register_on_entry (&sr);
9931 /* Step 1: round SIZE to the previous multiple of the interval. */
9933 rounded_size = size & -PROBE_INTERVAL;
9936 /* Step 2: compute initial and final value of the loop counter. */
9938 /* SP = SP_0 + PROBE_INTERVAL. */
9939 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9940 plus_constant (stack_pointer_rtx,
9941 - (PROBE_INTERVAL + dope))));
9943 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9944 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9945 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9946 gen_rtx_PLUS (Pmode, sr.reg,
9947 stack_pointer_rtx)));
9952 while (SP != LAST_ADDR)
9954 SP = SP + PROBE_INTERVAL
9958 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9959 values of N from 1 until it is equal to ROUNDED_SIZE. */
9961 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9964 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9965 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9967 if (size != rounded_size)
9969 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9970 plus_constant (stack_pointer_rtx,
9971 rounded_size - size)));
9972 emit_stack_probe (stack_pointer_rtx);
9975 /* Adjust back to account for the additional first interval. */
9976 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9977 plus_constant (stack_pointer_rtx,
9978 PROBE_INTERVAL + dope)));
9980 release_scratch_register_on_entry (&sr);
9983 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9984 cfun->machine->fs.sp_offset += size;
9986 /* Make sure nothing is scheduled before we are done. */
9987 emit_insn (gen_blockage ());
9990 /* Adjust the stack pointer up to REG while probing it. */
9993 output_adjust_stack_and_probe (rtx reg)
9995 static int labelno = 0;
9996 char loop_lab[32], end_lab[32];
9999 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10000 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10002 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10004 /* Jump to END_LAB if SP == LAST_ADDR. */
10005 xops[0] = stack_pointer_rtx;
10007 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10008 fputs ("\tje\t", asm_out_file);
10009 assemble_name_raw (asm_out_file, end_lab);
10010 fputc ('\n', asm_out_file);
10012 /* SP = SP + PROBE_INTERVAL. */
10013 xops[1] = GEN_INT (PROBE_INTERVAL);
10014 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10017 xops[1] = const0_rtx;
10018 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10020 fprintf (asm_out_file, "\tjmp\t");
10021 assemble_name_raw (asm_out_file, loop_lab);
10022 fputc ('\n', asm_out_file);
10024 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10029 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10030 inclusive. These are offsets from the current stack pointer. */
10033 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10035 /* See if we have a constant small number of probes to generate. If so,
10036 that's the easy case. The run-time loop is made up of 7 insns in the
10037 generic case while the compile-time loop is made up of n insns for n #
10039 if (size <= 7 * PROBE_INTERVAL)
10043 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10044 it exceeds SIZE. If only one probe is needed, this will not
10045 generate any code. Then probe at FIRST + SIZE. */
10046 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10047 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
10049 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
10052 /* Otherwise, do the same as above, but in a loop. Note that we must be
10053 extra careful with variables wrapping around because we might be at
10054 the very top (or the very bottom) of the address space and we have
10055 to be able to handle this case properly; in particular, we use an
10056 equality test for the loop condition. */
10059 HOST_WIDE_INT rounded_size, last;
10060 struct scratch_reg sr;
10062 get_scratch_register_on_entry (&sr);
10065 /* Step 1: round SIZE to the previous multiple of the interval. */
10067 rounded_size = size & -PROBE_INTERVAL;
10070 /* Step 2: compute initial and final value of the loop counter. */
10072 /* TEST_OFFSET = FIRST. */
10073 emit_move_insn (sr.reg, GEN_INT (-first));
10075 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10076 last = first + rounded_size;
10079 /* Step 3: the loop
10081 while (TEST_ADDR != LAST_ADDR)
10083 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10087 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10088 until it is equal to ROUNDED_SIZE. */
10090 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10093 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10094 that SIZE is equal to ROUNDED_SIZE. */
10096 if (size != rounded_size)
10097 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
10100 rounded_size - size));
10102 release_scratch_register_on_entry (&sr);
10105 /* Make sure nothing is scheduled before we are done. */
10106 emit_insn (gen_blockage ());
10109 /* Probe a range of stack addresses from REG to END, inclusive. These are
10110 offsets from the current stack pointer. */
10113 output_probe_stack_range (rtx reg, rtx end)
10115 static int labelno = 0;
10116 char loop_lab[32], end_lab[32];
10119 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10120 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10122 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10124 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10127 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10128 fputs ("\tje\t", asm_out_file);
10129 assemble_name_raw (asm_out_file, end_lab);
10130 fputc ('\n', asm_out_file);
10132 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10133 xops[1] = GEN_INT (PROBE_INTERVAL);
10134 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10136 /* Probe at TEST_ADDR. */
10137 xops[0] = stack_pointer_rtx;
10139 xops[2] = const0_rtx;
10140 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10142 fprintf (asm_out_file, "\tjmp\t");
10143 assemble_name_raw (asm_out_file, loop_lab);
10144 fputc ('\n', asm_out_file);
10146 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10151 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10152 to be generated in correct form. */
10154 ix86_finalize_stack_realign_flags (void)
10156 /* Check if stack realign is really needed after reload, and
10157 stores result in cfun */
10158 unsigned int incoming_stack_boundary
10159 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10160 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10161 unsigned int stack_realign = (incoming_stack_boundary
10162 < (current_function_is_leaf
10163 ? crtl->max_used_stack_slot_alignment
10164 : crtl->stack_alignment_needed));
10166 if (crtl->stack_realign_finalized)
10168 /* After stack_realign_needed is finalized, we can't no longer
10170 gcc_assert (crtl->stack_realign_needed == stack_realign);
10174 crtl->stack_realign_needed = stack_realign;
10175 crtl->stack_realign_finalized = true;
10179 /* Expand the prologue into a bunch of separate insns. */
10182 ix86_expand_prologue (void)
10184 struct machine_function *m = cfun->machine;
10187 struct ix86_frame frame;
10188 HOST_WIDE_INT allocate;
10189 bool int_registers_saved;
10191 ix86_finalize_stack_realign_flags ();
10193 /* DRAP should not coexist with stack_realign_fp */
10194 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10196 memset (&m->fs, 0, sizeof (m->fs));
10198 /* Initialize CFA state for before the prologue. */
10199 m->fs.cfa_reg = stack_pointer_rtx;
10200 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10202 /* Track SP offset to the CFA. We continue tracking this after we've
10203 swapped the CFA register away from SP. In the case of re-alignment
10204 this is fudged; we're interested to offsets within the local frame. */
10205 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10206 m->fs.sp_valid = true;
10208 ix86_compute_frame_layout (&frame);
10210 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10212 /* We should have already generated an error for any use of
10213 ms_hook on a nested function. */
10214 gcc_checking_assert (!ix86_static_chain_on_stack);
10216 /* Check if profiling is active and we shall use profiling before
10217 prologue variant. If so sorry. */
10218 if (crtl->profile && flag_fentry != 0)
10219 sorry ("ms_hook_prologue attribute isn%'t compatible "
10220 "with -mfentry for 32-bit");
10222 /* In ix86_asm_output_function_label we emitted:
10223 8b ff movl.s %edi,%edi
10225 8b ec movl.s %esp,%ebp
10227 This matches the hookable function prologue in Win32 API
10228 functions in Microsoft Windows XP Service Pack 2 and newer.
10229 Wine uses this to enable Windows apps to hook the Win32 API
10230 functions provided by Wine.
10232 What that means is that we've already set up the frame pointer. */
10234 if (frame_pointer_needed
10235 && !(crtl->drap_reg && crtl->stack_realign_needed))
10239 /* We've decided to use the frame pointer already set up.
10240 Describe this to the unwinder by pretending that both
10241 push and mov insns happen right here.
10243 Putting the unwind info here at the end of the ms_hook
10244 is done so that we can make absolutely certain we get
10245 the required byte sequence at the start of the function,
10246 rather than relying on an assembler that can produce
10247 the exact encoding required.
10249 However it does mean (in the unpatched case) that we have
10250 a 1 insn window where the asynchronous unwind info is
10251 incorrect. However, if we placed the unwind info at
10252 its correct location we would have incorrect unwind info
10253 in the patched case. Which is probably all moot since
10254 I don't expect Wine generates dwarf2 unwind info for the
10255 system libraries that use this feature. */
10257 insn = emit_insn (gen_blockage ());
10259 push = gen_push (hard_frame_pointer_rtx);
10260 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10261 stack_pointer_rtx);
10262 RTX_FRAME_RELATED_P (push) = 1;
10263 RTX_FRAME_RELATED_P (mov) = 1;
10265 RTX_FRAME_RELATED_P (insn) = 1;
10266 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10267 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10269 /* Note that gen_push incremented m->fs.cfa_offset, even
10270 though we didn't emit the push insn here. */
10271 m->fs.cfa_reg = hard_frame_pointer_rtx;
10272 m->fs.fp_offset = m->fs.cfa_offset;
10273 m->fs.fp_valid = true;
10277 /* The frame pointer is not needed so pop %ebp again.
10278 This leaves us with a pristine state. */
10279 emit_insn (gen_pop (hard_frame_pointer_rtx));
10283 /* The first insn of a function that accepts its static chain on the
10284 stack is to push the register that would be filled in by a direct
10285 call. This insn will be skipped by the trampoline. */
10286 else if (ix86_static_chain_on_stack)
10288 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10289 emit_insn (gen_blockage ());
10291 /* We don't want to interpret this push insn as a register save,
10292 only as a stack adjustment. The real copy of the register as
10293 a save will be done later, if needed. */
10294 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10295 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10296 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10297 RTX_FRAME_RELATED_P (insn) = 1;
10300 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10301 of DRAP is needed and stack realignment is really needed after reload */
10302 if (stack_realign_drap)
10304 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10306 /* Only need to push parameter pointer reg if it is caller saved. */
10307 if (!call_used_regs[REGNO (crtl->drap_reg)])
10309 /* Push arg pointer reg */
10310 insn = emit_insn (gen_push (crtl->drap_reg));
10311 RTX_FRAME_RELATED_P (insn) = 1;
10314 /* Grab the argument pointer. */
10315 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10316 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10317 RTX_FRAME_RELATED_P (insn) = 1;
10318 m->fs.cfa_reg = crtl->drap_reg;
10319 m->fs.cfa_offset = 0;
10321 /* Align the stack. */
10322 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10324 GEN_INT (-align_bytes)));
10325 RTX_FRAME_RELATED_P (insn) = 1;
10327 /* Replicate the return address on the stack so that return
10328 address can be reached via (argp - 1) slot. This is needed
10329 to implement macro RETURN_ADDR_RTX and intrinsic function
10330 expand_builtin_return_addr etc. */
10331 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10332 t = gen_frame_mem (Pmode, t);
10333 insn = emit_insn (gen_push (t));
10334 RTX_FRAME_RELATED_P (insn) = 1;
10336 /* For the purposes of frame and register save area addressing,
10337 we've started over with a new frame. */
10338 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10339 m->fs.realigned = true;
10342 if (frame_pointer_needed && !m->fs.fp_valid)
10344 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10345 slower on all targets. Also sdb doesn't like it. */
10346 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10347 RTX_FRAME_RELATED_P (insn) = 1;
10349 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10351 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10352 RTX_FRAME_RELATED_P (insn) = 1;
10354 if (m->fs.cfa_reg == stack_pointer_rtx)
10355 m->fs.cfa_reg = hard_frame_pointer_rtx;
10356 m->fs.fp_offset = m->fs.sp_offset;
10357 m->fs.fp_valid = true;
10361 int_registers_saved = (frame.nregs == 0);
10363 if (!int_registers_saved)
10365 /* If saving registers via PUSH, do so now. */
10366 if (!frame.save_regs_using_mov)
10368 ix86_emit_save_regs ();
10369 int_registers_saved = true;
10370 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10373 /* When using red zone we may start register saving before allocating
10374 the stack frame saving one cycle of the prologue. However, avoid
10375 doing this if we have to probe the stack; at least on x86_64 the
10376 stack probe can turn into a call that clobbers a red zone location. */
10377 else if (ix86_using_red_zone ()
10378 && (! TARGET_STACK_PROBE
10379 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10381 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10382 int_registers_saved = true;
10386 if (stack_realign_fp)
10388 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10389 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10391 /* The computation of the size of the re-aligned stack frame means
10392 that we must allocate the size of the register save area before
10393 performing the actual alignment. Otherwise we cannot guarantee
10394 that there's enough storage above the realignment point. */
10395 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10396 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10397 GEN_INT (m->fs.sp_offset
10398 - frame.sse_reg_save_offset),
10401 /* Align the stack. */
10402 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10404 GEN_INT (-align_bytes)));
10406 /* For the purposes of register save area addressing, the stack
10407 pointer is no longer valid. As for the value of sp_offset,
10408 see ix86_compute_frame_layout, which we need to match in order
10409 to pass verification of stack_pointer_offset at the end. */
10410 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10411 m->fs.sp_valid = false;
10414 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10416 if (flag_stack_usage)
10418 /* We start to count from ARG_POINTER. */
10419 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10421 /* If it was realigned, take into account the fake frame. */
10422 if (stack_realign_drap)
10424 if (ix86_static_chain_on_stack)
10425 stack_size += UNITS_PER_WORD;
10427 if (!call_used_regs[REGNO (crtl->drap_reg)])
10428 stack_size += UNITS_PER_WORD;
10430 /* This over-estimates by 1 minimal-stack-alignment-unit but
10431 mitigates that by counting in the new return address slot. */
10432 current_function_dynamic_stack_size
10433 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10436 current_function_static_stack_size = stack_size;
10439 /* The stack has already been decremented by the instruction calling us
10440 so we need to probe unconditionally to preserve the protection area. */
10441 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10443 /* We expect the registers to be saved when probes are used. */
10444 gcc_assert (int_registers_saved);
10446 if (STACK_CHECK_MOVING_SP)
10448 ix86_adjust_stack_and_probe (allocate);
10453 HOST_WIDE_INT size = allocate;
10455 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10456 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10458 if (TARGET_STACK_PROBE)
10459 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10461 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10467 else if (!ix86_target_stack_probe ()
10468 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10470 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10471 GEN_INT (-allocate), -1,
10472 m->fs.cfa_reg == stack_pointer_rtx);
10476 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10478 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10480 bool eax_live = false;
10481 bool r10_live = false;
10484 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10485 if (!TARGET_64BIT_MS_ABI)
10486 eax_live = ix86_eax_live_at_start_p ();
10490 emit_insn (gen_push (eax));
10491 allocate -= UNITS_PER_WORD;
10495 r10 = gen_rtx_REG (Pmode, R10_REG);
10496 emit_insn (gen_push (r10));
10497 allocate -= UNITS_PER_WORD;
10500 emit_move_insn (eax, GEN_INT (allocate));
10501 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10503 /* Use the fact that AX still contains ALLOCATE. */
10504 adjust_stack_insn = (TARGET_64BIT
10505 ? gen_pro_epilogue_adjust_stack_di_sub
10506 : gen_pro_epilogue_adjust_stack_si_sub);
10508 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10509 stack_pointer_rtx, eax));
10511 /* Note that SEH directives need to continue tracking the stack
10512 pointer even after the frame pointer has been set up. */
10513 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10515 if (m->fs.cfa_reg == stack_pointer_rtx)
10516 m->fs.cfa_offset += allocate;
10518 RTX_FRAME_RELATED_P (insn) = 1;
10519 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10520 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10521 plus_constant (stack_pointer_rtx,
10524 m->fs.sp_offset += allocate;
10526 if (r10_live && eax_live)
10528 t = choose_baseaddr (m->fs.sp_offset - allocate);
10529 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10530 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10531 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10533 else if (eax_live || r10_live)
10535 t = choose_baseaddr (m->fs.sp_offset - allocate);
10536 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10539 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10541 /* If we havn't already set up the frame pointer, do so now. */
10542 if (frame_pointer_needed && !m->fs.fp_valid)
10544 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10545 GEN_INT (frame.stack_pointer_offset
10546 - frame.hard_frame_pointer_offset));
10547 insn = emit_insn (insn);
10548 RTX_FRAME_RELATED_P (insn) = 1;
10549 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10551 if (m->fs.cfa_reg == stack_pointer_rtx)
10552 m->fs.cfa_reg = hard_frame_pointer_rtx;
10553 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10554 m->fs.fp_valid = true;
10557 if (!int_registers_saved)
10558 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10559 if (frame.nsseregs)
10560 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10562 pic_reg_used = false;
10563 if (pic_offset_table_rtx
10564 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10567 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10569 if (alt_pic_reg_used != INVALID_REGNUM)
10570 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10572 pic_reg_used = true;
10579 if (ix86_cmodel == CM_LARGE_PIC)
10581 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10582 rtx label = gen_label_rtx ();
10583 emit_label (label);
10584 LABEL_PRESERVE_P (label) = 1;
10585 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10586 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10587 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10588 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10589 pic_offset_table_rtx, tmp_reg));
10592 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10595 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10598 /* In the pic_reg_used case, make sure that the got load isn't deleted
10599 when mcount needs it. Blockage to avoid call movement across mcount
10600 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10602 if (crtl->profile && !flag_fentry && pic_reg_used)
10603 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10605 if (crtl->drap_reg && !crtl->stack_realign_needed)
10607 /* vDRAP is setup but after reload it turns out stack realign
10608 isn't necessary, here we will emit prologue to setup DRAP
10609 without stack realign adjustment */
10610 t = choose_baseaddr (0);
10611 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10614 /* Prevent instructions from being scheduled into register save push
10615 sequence when access to the redzone area is done through frame pointer.
10616 The offset between the frame pointer and the stack pointer is calculated
10617 relative to the value of the stack pointer at the end of the function
10618 prologue, and moving instructions that access redzone area via frame
10619 pointer inside push sequence violates this assumption. */
10620 if (frame_pointer_needed && frame.red_zone_size)
10621 emit_insn (gen_memory_blockage ());
10623 /* Emit cld instruction if stringops are used in the function. */
10624 if (TARGET_CLD && ix86_current_function_needs_cld)
10625 emit_insn (gen_cld ());
10627 /* SEH requires that the prologue end within 256 bytes of the start of
10628 the function. Prevent instruction schedules that would extend that. */
10630 emit_insn (gen_blockage ());
10633 /* Emit code to restore REG using a POP insn. */
10636 ix86_emit_restore_reg_using_pop (rtx reg)
10638 struct machine_function *m = cfun->machine;
10639 rtx insn = emit_insn (gen_pop (reg));
10641 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10642 m->fs.sp_offset -= UNITS_PER_WORD;
10644 if (m->fs.cfa_reg == crtl->drap_reg
10645 && REGNO (reg) == REGNO (crtl->drap_reg))
10647 /* Previously we'd represented the CFA as an expression
10648 like *(%ebp - 8). We've just popped that value from
10649 the stack, which means we need to reset the CFA to
10650 the drap register. This will remain until we restore
10651 the stack pointer. */
10652 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10653 RTX_FRAME_RELATED_P (insn) = 1;
10655 /* This means that the DRAP register is valid for addressing too. */
10656 m->fs.drap_valid = true;
10660 if (m->fs.cfa_reg == stack_pointer_rtx)
10662 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10663 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10664 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10665 RTX_FRAME_RELATED_P (insn) = 1;
10667 m->fs.cfa_offset -= UNITS_PER_WORD;
10670 /* When the frame pointer is the CFA, and we pop it, we are
10671 swapping back to the stack pointer as the CFA. This happens
10672 for stack frames that don't allocate other data, so we assume
10673 the stack pointer is now pointing at the return address, i.e.
10674 the function entry state, which makes the offset be 1 word. */
10675 if (reg == hard_frame_pointer_rtx)
10677 m->fs.fp_valid = false;
10678 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10680 m->fs.cfa_reg = stack_pointer_rtx;
10681 m->fs.cfa_offset -= UNITS_PER_WORD;
10683 add_reg_note (insn, REG_CFA_DEF_CFA,
10684 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10685 GEN_INT (m->fs.cfa_offset)));
10686 RTX_FRAME_RELATED_P (insn) = 1;
10691 /* Emit code to restore saved registers using POP insns. */
10694 ix86_emit_restore_regs_using_pop (void)
10696 unsigned int regno;
10698 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10699 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10700 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10703 /* Emit code and notes for the LEAVE instruction. */
10706 ix86_emit_leave (void)
10708 struct machine_function *m = cfun->machine;
10709 rtx insn = emit_insn (ix86_gen_leave ());
10711 ix86_add_queued_cfa_restore_notes (insn);
10713 gcc_assert (m->fs.fp_valid);
10714 m->fs.sp_valid = true;
10715 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10716 m->fs.fp_valid = false;
10718 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10720 m->fs.cfa_reg = stack_pointer_rtx;
10721 m->fs.cfa_offset = m->fs.sp_offset;
10723 add_reg_note (insn, REG_CFA_DEF_CFA,
10724 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10725 RTX_FRAME_RELATED_P (insn) = 1;
10726 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10731 /* Emit code to restore saved registers using MOV insns.
10732 First register is restored from CFA - CFA_OFFSET. */
10734 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10735 int maybe_eh_return)
10737 struct machine_function *m = cfun->machine;
10738 unsigned int regno;
10740 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10741 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10743 rtx reg = gen_rtx_REG (Pmode, regno);
10746 mem = choose_baseaddr (cfa_offset);
10747 mem = gen_frame_mem (Pmode, mem);
10748 insn = emit_move_insn (reg, mem);
10750 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10752 /* Previously we'd represented the CFA as an expression
10753 like *(%ebp - 8). We've just popped that value from
10754 the stack, which means we need to reset the CFA to
10755 the drap register. This will remain until we restore
10756 the stack pointer. */
10757 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10758 RTX_FRAME_RELATED_P (insn) = 1;
10760 /* This means that the DRAP register is valid for addressing. */
10761 m->fs.drap_valid = true;
10764 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10766 cfa_offset -= UNITS_PER_WORD;
10770 /* Emit code to restore saved registers using MOV insns.
10771 First register is restored from CFA - CFA_OFFSET. */
10773 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10774 int maybe_eh_return)
10776 unsigned int regno;
10778 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10779 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10781 rtx reg = gen_rtx_REG (V4SFmode, regno);
10784 mem = choose_baseaddr (cfa_offset);
10785 mem = gen_rtx_MEM (V4SFmode, mem);
10786 set_mem_align (mem, 128);
10787 emit_move_insn (reg, mem);
10789 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10795 /* Restore function stack, frame, and registers. */
10798 ix86_expand_epilogue (int style)
10800 struct machine_function *m = cfun->machine;
10801 struct machine_frame_state frame_state_save = m->fs;
10802 struct ix86_frame frame;
10803 bool restore_regs_via_mov;
10806 ix86_finalize_stack_realign_flags ();
10807 ix86_compute_frame_layout (&frame);
10809 m->fs.sp_valid = (!frame_pointer_needed
10810 || (current_function_sp_is_unchanging
10811 && !stack_realign_fp));
10812 gcc_assert (!m->fs.sp_valid
10813 || m->fs.sp_offset == frame.stack_pointer_offset);
10815 /* The FP must be valid if the frame pointer is present. */
10816 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10817 gcc_assert (!m->fs.fp_valid
10818 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10820 /* We must have *some* valid pointer to the stack frame. */
10821 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10823 /* The DRAP is never valid at this point. */
10824 gcc_assert (!m->fs.drap_valid);
10826 /* See the comment about red zone and frame
10827 pointer usage in ix86_expand_prologue. */
10828 if (frame_pointer_needed && frame.red_zone_size)
10829 emit_insn (gen_memory_blockage ());
10831 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10832 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10834 /* Determine the CFA offset of the end of the red-zone. */
10835 m->fs.red_zone_offset = 0;
10836 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10838 /* The red-zone begins below the return address. */
10839 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10841 /* When the register save area is in the aligned portion of
10842 the stack, determine the maximum runtime displacement that
10843 matches up with the aligned frame. */
10844 if (stack_realign_drap)
10845 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10849 /* Special care must be taken for the normal return case of a function
10850 using eh_return: the eax and edx registers are marked as saved, but
10851 not restored along this path. Adjust the save location to match. */
10852 if (crtl->calls_eh_return && style != 2)
10853 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10855 /* EH_RETURN requires the use of moves to function properly. */
10856 if (crtl->calls_eh_return)
10857 restore_regs_via_mov = true;
10858 /* SEH requires the use of pops to identify the epilogue. */
10859 else if (TARGET_SEH)
10860 restore_regs_via_mov = false;
10861 /* If we're only restoring one register and sp is not valid then
10862 using a move instruction to restore the register since it's
10863 less work than reloading sp and popping the register. */
10864 else if (!m->fs.sp_valid && frame.nregs <= 1)
10865 restore_regs_via_mov = true;
10866 else if (TARGET_EPILOGUE_USING_MOVE
10867 && cfun->machine->use_fast_prologue_epilogue
10868 && (frame.nregs > 1
10869 || m->fs.sp_offset != frame.reg_save_offset))
10870 restore_regs_via_mov = true;
10871 else if (frame_pointer_needed
10873 && m->fs.sp_offset != frame.reg_save_offset)
10874 restore_regs_via_mov = true;
10875 else if (frame_pointer_needed
10876 && TARGET_USE_LEAVE
10877 && cfun->machine->use_fast_prologue_epilogue
10878 && frame.nregs == 1)
10879 restore_regs_via_mov = true;
10881 restore_regs_via_mov = false;
10883 if (restore_regs_via_mov || frame.nsseregs)
10885 /* Ensure that the entire register save area is addressable via
10886 the stack pointer, if we will restore via sp. */
10888 && m->fs.sp_offset > 0x7fffffff
10889 && !(m->fs.fp_valid || m->fs.drap_valid)
10890 && (frame.nsseregs + frame.nregs) != 0)
10892 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10893 GEN_INT (m->fs.sp_offset
10894 - frame.sse_reg_save_offset),
10896 m->fs.cfa_reg == stack_pointer_rtx);
10900 /* If there are any SSE registers to restore, then we have to do it
10901 via moves, since there's obviously no pop for SSE regs. */
10902 if (frame.nsseregs)
10903 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10906 if (restore_regs_via_mov)
10911 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10913 /* eh_return epilogues need %ecx added to the stack pointer. */
10916 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10918 /* Stack align doesn't work with eh_return. */
10919 gcc_assert (!stack_realign_drap);
10920 /* Neither does regparm nested functions. */
10921 gcc_assert (!ix86_static_chain_on_stack);
10923 if (frame_pointer_needed)
10925 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10926 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10927 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10929 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10930 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10932 /* Note that we use SA as a temporary CFA, as the return
10933 address is at the proper place relative to it. We
10934 pretend this happens at the FP restore insn because
10935 prior to this insn the FP would be stored at the wrong
10936 offset relative to SA, and after this insn we have no
10937 other reasonable register to use for the CFA. We don't
10938 bother resetting the CFA to the SP for the duration of
10939 the return insn. */
10940 add_reg_note (insn, REG_CFA_DEF_CFA,
10941 plus_constant (sa, UNITS_PER_WORD));
10942 ix86_add_queued_cfa_restore_notes (insn);
10943 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10944 RTX_FRAME_RELATED_P (insn) = 1;
10946 m->fs.cfa_reg = sa;
10947 m->fs.cfa_offset = UNITS_PER_WORD;
10948 m->fs.fp_valid = false;
10950 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10951 const0_rtx, style, false);
10955 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10956 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10957 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10958 ix86_add_queued_cfa_restore_notes (insn);
10960 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10961 if (m->fs.cfa_offset != UNITS_PER_WORD)
10963 m->fs.cfa_offset = UNITS_PER_WORD;
10964 add_reg_note (insn, REG_CFA_DEF_CFA,
10965 plus_constant (stack_pointer_rtx,
10967 RTX_FRAME_RELATED_P (insn) = 1;
10970 m->fs.sp_offset = UNITS_PER_WORD;
10971 m->fs.sp_valid = true;
10976 /* SEH requires that the function end with (1) a stack adjustment
10977 if necessary, (2) a sequence of pops, and (3) a return or
10978 jump instruction. Prevent insns from the function body from
10979 being scheduled into this sequence. */
10982 /* Prevent a catch region from being adjacent to the standard
10983 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10984 several other flags that would be interesting to test are
10986 if (flag_non_call_exceptions)
10987 emit_insn (gen_nops (const1_rtx));
10989 emit_insn (gen_blockage ());
10992 /* First step is to deallocate the stack frame so that we can
10993 pop the registers. */
10994 if (!m->fs.sp_valid)
10996 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10997 GEN_INT (m->fs.fp_offset
10998 - frame.reg_save_offset),
11001 else if (m->fs.sp_offset != frame.reg_save_offset)
11003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11004 GEN_INT (m->fs.sp_offset
11005 - frame.reg_save_offset),
11007 m->fs.cfa_reg == stack_pointer_rtx);
11010 ix86_emit_restore_regs_using_pop ();
11013 /* If we used a stack pointer and haven't already got rid of it,
11015 if (m->fs.fp_valid)
11017 /* If the stack pointer is valid and pointing at the frame
11018 pointer store address, then we only need a pop. */
11019 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11020 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11021 /* Leave results in shorter dependency chains on CPUs that are
11022 able to grok it fast. */
11023 else if (TARGET_USE_LEAVE
11024 || optimize_function_for_size_p (cfun)
11025 || !cfun->machine->use_fast_prologue_epilogue)
11026 ix86_emit_leave ();
11029 pro_epilogue_adjust_stack (stack_pointer_rtx,
11030 hard_frame_pointer_rtx,
11031 const0_rtx, style, !using_drap);
11032 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11038 int param_ptr_offset = UNITS_PER_WORD;
11041 gcc_assert (stack_realign_drap);
11043 if (ix86_static_chain_on_stack)
11044 param_ptr_offset += UNITS_PER_WORD;
11045 if (!call_used_regs[REGNO (crtl->drap_reg)])
11046 param_ptr_offset += UNITS_PER_WORD;
11048 insn = emit_insn (gen_rtx_SET
11049 (VOIDmode, stack_pointer_rtx,
11050 gen_rtx_PLUS (Pmode,
11052 GEN_INT (-param_ptr_offset))));
11053 m->fs.cfa_reg = stack_pointer_rtx;
11054 m->fs.cfa_offset = param_ptr_offset;
11055 m->fs.sp_offset = param_ptr_offset;
11056 m->fs.realigned = false;
11058 add_reg_note (insn, REG_CFA_DEF_CFA,
11059 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11060 GEN_INT (param_ptr_offset)));
11061 RTX_FRAME_RELATED_P (insn) = 1;
11063 if (!call_used_regs[REGNO (crtl->drap_reg)])
11064 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11067 /* At this point the stack pointer must be valid, and we must have
11068 restored all of the registers. We may not have deallocated the
11069 entire stack frame. We've delayed this until now because it may
11070 be possible to merge the local stack deallocation with the
11071 deallocation forced by ix86_static_chain_on_stack. */
11072 gcc_assert (m->fs.sp_valid);
11073 gcc_assert (!m->fs.fp_valid);
11074 gcc_assert (!m->fs.realigned);
11075 if (m->fs.sp_offset != UNITS_PER_WORD)
11077 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11078 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11082 /* Sibcall epilogues don't want a return instruction. */
11085 m->fs = frame_state_save;
11089 /* Emit vzeroupper if needed. */
11090 if (TARGET_VZEROUPPER
11091 && !TREE_THIS_VOLATILE (cfun->decl)
11092 && !cfun->machine->caller_return_avx256_p)
11093 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
11095 if (crtl->args.pops_args && crtl->args.size)
11097 rtx popc = GEN_INT (crtl->args.pops_args);
11099 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11100 address, do explicit add, and jump indirectly to the caller. */
11102 if (crtl->args.pops_args >= 65536)
11104 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11107 /* There is no "pascal" calling convention in any 64bit ABI. */
11108 gcc_assert (!TARGET_64BIT);
11110 insn = emit_insn (gen_pop (ecx));
11111 m->fs.cfa_offset -= UNITS_PER_WORD;
11112 m->fs.sp_offset -= UNITS_PER_WORD;
11114 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11115 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11116 add_reg_note (insn, REG_CFA_REGISTER,
11117 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11118 RTX_FRAME_RELATED_P (insn) = 1;
11120 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11122 emit_jump_insn (gen_return_indirect_internal (ecx));
11125 emit_jump_insn (gen_return_pop_internal (popc));
11128 emit_jump_insn (gen_return_internal ());
11130 /* Restore the state back to the state from the prologue,
11131 so that it's correct for the next epilogue. */
11132 m->fs = frame_state_save;
11135 /* Reset from the function's potential modifications. */
11138 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11139 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11141 if (pic_offset_table_rtx)
11142 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11144 /* Mach-O doesn't support labels at the end of objects, so if
11145 it looks like we might want one, insert a NOP. */
11147 rtx insn = get_last_insn ();
11150 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11151 insn = PREV_INSN (insn);
11155 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11156 fputs ("\tnop\n", file);
11162 /* Return a scratch register to use in the split stack prologue. The
11163 split stack prologue is used for -fsplit-stack. It is the first
11164 instructions in the function, even before the regular prologue.
11165 The scratch register can be any caller-saved register which is not
11166 used for parameters or for the static chain. */
11168 static unsigned int
11169 split_stack_prologue_scratch_regno (void)
11178 is_fastcall = (lookup_attribute ("fastcall",
11179 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11181 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11185 if (DECL_STATIC_CHAIN (cfun->decl))
11187 sorry ("-fsplit-stack does not support fastcall with "
11188 "nested function");
11189 return INVALID_REGNUM;
11193 else if (regparm < 3)
11195 if (!DECL_STATIC_CHAIN (cfun->decl))
11201 sorry ("-fsplit-stack does not support 2 register "
11202 " parameters for a nested function");
11203 return INVALID_REGNUM;
11210 /* FIXME: We could make this work by pushing a register
11211 around the addition and comparison. */
11212 sorry ("-fsplit-stack does not support 3 register parameters");
11213 return INVALID_REGNUM;
11218 /* A SYMBOL_REF for the function which allocates new stackspace for
11221 static GTY(()) rtx split_stack_fn;
11223 /* A SYMBOL_REF for the more stack function when using the large
11226 static GTY(()) rtx split_stack_fn_large;
11228 /* Handle -fsplit-stack. These are the first instructions in the
11229 function, even before the regular prologue. */
11232 ix86_expand_split_stack_prologue (void)
11234 struct ix86_frame frame;
11235 HOST_WIDE_INT allocate;
11236 unsigned HOST_WIDE_INT args_size;
11237 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11238 rtx scratch_reg = NULL_RTX;
11239 rtx varargs_label = NULL_RTX;
11242 gcc_assert (flag_split_stack && reload_completed);
11244 ix86_finalize_stack_realign_flags ();
11245 ix86_compute_frame_layout (&frame);
11246 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11248 /* This is the label we will branch to if we have enough stack
11249 space. We expect the basic block reordering pass to reverse this
11250 branch if optimizing, so that we branch in the unlikely case. */
11251 label = gen_label_rtx ();
11253 /* We need to compare the stack pointer minus the frame size with
11254 the stack boundary in the TCB. The stack boundary always gives
11255 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11256 can compare directly. Otherwise we need to do an addition. */
11258 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11259 UNSPEC_STACK_CHECK);
11260 limit = gen_rtx_CONST (Pmode, limit);
11261 limit = gen_rtx_MEM (Pmode, limit);
11262 if (allocate < SPLIT_STACK_AVAILABLE)
11263 current = stack_pointer_rtx;
11266 unsigned int scratch_regno;
11269 /* We need a scratch register to hold the stack pointer minus
11270 the required frame size. Since this is the very start of the
11271 function, the scratch register can be any caller-saved
11272 register which is not used for parameters. */
11273 offset = GEN_INT (- allocate);
11274 scratch_regno = split_stack_prologue_scratch_regno ();
11275 if (scratch_regno == INVALID_REGNUM)
11277 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11278 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11280 /* We don't use ix86_gen_add3 in this case because it will
11281 want to split to lea, but when not optimizing the insn
11282 will not be split after this point. */
11283 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11284 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11289 emit_move_insn (scratch_reg, offset);
11290 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11291 stack_pointer_rtx));
11293 current = scratch_reg;
11296 ix86_expand_branch (GEU, current, limit, label);
11297 jump_insn = get_last_insn ();
11298 JUMP_LABEL (jump_insn) = label;
11300 /* Mark the jump as very likely to be taken. */
11301 add_reg_note (jump_insn, REG_BR_PROB,
11302 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11304 if (split_stack_fn == NULL_RTX)
11305 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11306 fn = split_stack_fn;
11308 /* Get more stack space. We pass in the desired stack space and the
11309 size of the arguments to copy to the new stack. In 32-bit mode
11310 we push the parameters; __morestack will return on a new stack
11311 anyhow. In 64-bit mode we pass the parameters in r10 and
11313 allocate_rtx = GEN_INT (allocate);
11314 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11315 call_fusage = NULL_RTX;
11320 reg10 = gen_rtx_REG (Pmode, R10_REG);
11321 reg11 = gen_rtx_REG (Pmode, R11_REG);
11323 /* If this function uses a static chain, it will be in %r10.
11324 Preserve it across the call to __morestack. */
11325 if (DECL_STATIC_CHAIN (cfun->decl))
11329 rax = gen_rtx_REG (Pmode, AX_REG);
11330 emit_move_insn (rax, reg10);
11331 use_reg (&call_fusage, rax);
11334 if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11336 HOST_WIDE_INT argval;
11338 /* When using the large model we need to load the address
11339 into a register, and we've run out of registers. So we
11340 switch to a different calling convention, and we call a
11341 different function: __morestack_large. We pass the
11342 argument size in the upper 32 bits of r10 and pass the
11343 frame size in the lower 32 bits. */
11344 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11345 gcc_assert ((args_size & 0xffffffff) == args_size);
11347 if (split_stack_fn_large == NULL_RTX)
11348 split_stack_fn_large =
11349 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11351 if (ix86_cmodel == CM_LARGE_PIC)
11355 label = gen_label_rtx ();
11356 emit_label (label);
11357 LABEL_PRESERVE_P (label) = 1;
11358 emit_insn (gen_set_rip_rex64 (reg10, label));
11359 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11360 emit_insn (gen_adddi3 (reg10, reg10, reg11));
11361 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11363 x = gen_rtx_CONST (Pmode, x);
11364 emit_move_insn (reg11, x);
11365 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11366 x = gen_const_mem (Pmode, x);
11367 emit_move_insn (reg11, x);
11370 emit_move_insn (reg11, split_stack_fn_large);
11374 argval = ((args_size << 16) << 16) + allocate;
11375 emit_move_insn (reg10, GEN_INT (argval));
11379 emit_move_insn (reg10, allocate_rtx);
11380 emit_move_insn (reg11, GEN_INT (args_size));
11381 use_reg (&call_fusage, reg11);
11384 use_reg (&call_fusage, reg10);
11388 emit_insn (gen_push (GEN_INT (args_size)));
11389 emit_insn (gen_push (allocate_rtx));
11391 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11392 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11394 add_function_usage_to (call_insn, call_fusage);
11396 /* In order to make call/return prediction work right, we now need
11397 to execute a return instruction. See
11398 libgcc/config/i386/morestack.S for the details on how this works.
11400 For flow purposes gcc must not see this as a return
11401 instruction--we need control flow to continue at the subsequent
11402 label. Therefore, we use an unspec. */
11403 gcc_assert (crtl->args.pops_args < 65536);
11404 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11406 /* If we are in 64-bit mode and this function uses a static chain,
11407 we saved %r10 in %rax before calling _morestack. */
11408 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11409 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11410 gen_rtx_REG (Pmode, AX_REG));
11412 /* If this function calls va_start, we need to store a pointer to
11413 the arguments on the old stack, because they may not have been
11414 all copied to the new stack. At this point the old stack can be
11415 found at the frame pointer value used by __morestack, because
11416 __morestack has set that up before calling back to us. Here we
11417 store that pointer in a scratch register, and in
11418 ix86_expand_prologue we store the scratch register in a stack
11420 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11422 unsigned int scratch_regno;
11426 scratch_regno = split_stack_prologue_scratch_regno ();
11427 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11428 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11432 return address within this function
11433 return address of caller of this function
11435 So we add three words to get to the stack arguments.
11439 return address within this function
11440 first argument to __morestack
11441 second argument to __morestack
11442 return address of caller of this function
11444 So we add five words to get to the stack arguments.
11446 words = TARGET_64BIT ? 3 : 5;
11447 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11448 gen_rtx_PLUS (Pmode, frame_reg,
11449 GEN_INT (words * UNITS_PER_WORD))));
11451 varargs_label = gen_label_rtx ();
11452 emit_jump_insn (gen_jump (varargs_label));
11453 JUMP_LABEL (get_last_insn ()) = varargs_label;
11458 emit_label (label);
11459 LABEL_NUSES (label) = 1;
11461 /* If this function calls va_start, we now have to set the scratch
11462 register for the case where we do not call __morestack. In this
11463 case we need to set it based on the stack pointer. */
11464 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11466 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11467 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11468 GEN_INT (UNITS_PER_WORD))));
11470 emit_label (varargs_label);
11471 LABEL_NUSES (varargs_label) = 1;
11475 /* We may have to tell the dataflow pass that the split stack prologue
11476 is initializing a scratch register. */
11479 ix86_live_on_entry (bitmap regs)
11481 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11483 gcc_assert (flag_split_stack);
11484 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11488 /* Extract the parts of an RTL expression that is a valid memory address
11489 for an instruction. Return 0 if the structure of the address is
11490 grossly off. Return -1 if the address contains ASHIFT, so it is not
11491 strictly valid, but still used for computing length of lea instruction. */
11494 ix86_decompose_address (rtx addr, struct ix86_address *out)
11496 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11497 rtx base_reg, index_reg;
11498 HOST_WIDE_INT scale = 1;
11499 rtx scale_rtx = NULL_RTX;
11502 enum ix86_address_seg seg = SEG_DEFAULT;
11504 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11506 else if (GET_CODE (addr) == PLUS)
11508 rtx addends[4], op;
11516 addends[n++] = XEXP (op, 1);
11519 while (GET_CODE (op) == PLUS);
11524 for (i = n; i >= 0; --i)
11527 switch (GET_CODE (op))
11532 index = XEXP (op, 0);
11533 scale_rtx = XEXP (op, 1);
11539 index = XEXP (op, 0);
11540 tmp = XEXP (op, 1);
11541 if (!CONST_INT_P (tmp))
11543 scale = INTVAL (tmp);
11544 if ((unsigned HOST_WIDE_INT) scale > 3)
11546 scale = 1 << scale;
11550 if (XINT (op, 1) == UNSPEC_TP
11551 && TARGET_TLS_DIRECT_SEG_REFS
11552 && seg == SEG_DEFAULT)
11553 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11582 else if (GET_CODE (addr) == MULT)
11584 index = XEXP (addr, 0); /* index*scale */
11585 scale_rtx = XEXP (addr, 1);
11587 else if (GET_CODE (addr) == ASHIFT)
11589 /* We're called for lea too, which implements ashift on occasion. */
11590 index = XEXP (addr, 0);
11591 tmp = XEXP (addr, 1);
11592 if (!CONST_INT_P (tmp))
11594 scale = INTVAL (tmp);
11595 if ((unsigned HOST_WIDE_INT) scale > 3)
11597 scale = 1 << scale;
11601 disp = addr; /* displacement */
11603 /* Extract the integral value of scale. */
11606 if (!CONST_INT_P (scale_rtx))
11608 scale = INTVAL (scale_rtx);
11611 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11612 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11614 /* Avoid useless 0 displacement. */
11615 if (disp == const0_rtx && (base || index))
11618 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11619 if (base_reg && index_reg && scale == 1
11620 && (index_reg == arg_pointer_rtx
11621 || index_reg == frame_pointer_rtx
11622 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11625 tmp = base, base = index, index = tmp;
11626 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11629 /* Special case: %ebp cannot be encoded as a base without a displacement.
11633 && (base_reg == hard_frame_pointer_rtx
11634 || base_reg == frame_pointer_rtx
11635 || base_reg == arg_pointer_rtx
11636 || (REG_P (base_reg)
11637 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11638 || REGNO (base_reg) == R13_REG))))
11641 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11642 Avoid this by transforming to [%esi+0].
11643 Reload calls address legitimization without cfun defined, so we need
11644 to test cfun for being non-NULL. */
11645 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11646 && base_reg && !index_reg && !disp
11647 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11650 /* Special case: encode reg+reg instead of reg*2. */
11651 if (!base && index && scale == 2)
11652 base = index, base_reg = index_reg, scale = 1;
11654 /* Special case: scaling cannot be encoded without base or displacement. */
11655 if (!base && !disp && index && scale != 1)
11659 out->index = index;
11661 out->scale = scale;
11667 /* Return cost of the memory address x.
11668 For i386, it is better to use a complex address than let gcc copy
11669 the address into a reg and make a new pseudo. But not if the address
11670 requires to two regs - that would mean more pseudos with longer
11673 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11675 struct ix86_address parts;
11677 int ok = ix86_decompose_address (x, &parts);
11681 if (parts.base && GET_CODE (parts.base) == SUBREG)
11682 parts.base = SUBREG_REG (parts.base);
11683 if (parts.index && GET_CODE (parts.index) == SUBREG)
11684 parts.index = SUBREG_REG (parts.index);
11686 /* Attempt to minimize number of registers in the address. */
11688 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11690 && (!REG_P (parts.index)
11691 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11695 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11697 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11698 && parts.base != parts.index)
11701 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11702 since it's predecode logic can't detect the length of instructions
11703 and it degenerates to vector decoded. Increase cost of such
11704 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11705 to split such addresses or even refuse such addresses at all.
11707 Following addressing modes are affected:
11712 The first and last case may be avoidable by explicitly coding the zero in
11713 memory address, but I don't have AMD-K6 machine handy to check this
11717 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11718 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11719 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11725 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11726 this is used for to form addresses to local data when -fPIC is in
11730 darwin_local_data_pic (rtx disp)
11732 return (GET_CODE (disp) == UNSPEC
11733 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11736 /* Determine if a given RTX is a valid constant. We already know this
11737 satisfies CONSTANT_P. */
11740 legitimate_constant_p (rtx x)
11742 switch (GET_CODE (x))
11747 if (GET_CODE (x) == PLUS)
11749 if (!CONST_INT_P (XEXP (x, 1)))
11754 if (TARGET_MACHO && darwin_local_data_pic (x))
11757 /* Only some unspecs are valid as "constants". */
11758 if (GET_CODE (x) == UNSPEC)
11759 switch (XINT (x, 1))
11762 case UNSPEC_GOTOFF:
11763 case UNSPEC_PLTOFF:
11764 return TARGET_64BIT;
11766 case UNSPEC_NTPOFF:
11767 x = XVECEXP (x, 0, 0);
11768 return (GET_CODE (x) == SYMBOL_REF
11769 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11770 case UNSPEC_DTPOFF:
11771 x = XVECEXP (x, 0, 0);
11772 return (GET_CODE (x) == SYMBOL_REF
11773 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11778 /* We must have drilled down to a symbol. */
11779 if (GET_CODE (x) == LABEL_REF)
11781 if (GET_CODE (x) != SYMBOL_REF)
11786 /* TLS symbols are never valid. */
11787 if (SYMBOL_REF_TLS_MODEL (x))
11790 /* DLLIMPORT symbols are never valid. */
11791 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11792 && SYMBOL_REF_DLLIMPORT_P (x))
11796 /* mdynamic-no-pic */
11797 if (MACHO_DYNAMIC_NO_PIC_P)
11798 return machopic_symbol_defined_p (x);
11803 if (GET_MODE (x) == TImode
11804 && x != CONST0_RTX (TImode)
11810 if (!standard_sse_constant_p (x))
11817 /* Otherwise we handle everything else in the move patterns. */
11821 /* Determine if it's legal to put X into the constant pool. This
11822 is not possible for the address of thread-local symbols, which
11823 is checked above. */
11826 ix86_cannot_force_const_mem (rtx x)
11828 /* We can always put integral constants and vectors in memory. */
11829 switch (GET_CODE (x))
11839 return !legitimate_constant_p (x);
11843 /* Nonzero if the constant value X is a legitimate general operand
11844 when generating PIC code. It is given that flag_pic is on and
11845 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11848 legitimate_pic_operand_p (rtx x)
11852 switch (GET_CODE (x))
11855 inner = XEXP (x, 0);
11856 if (GET_CODE (inner) == PLUS
11857 && CONST_INT_P (XEXP (inner, 1)))
11858 inner = XEXP (inner, 0);
11860 /* Only some unspecs are valid as "constants". */
11861 if (GET_CODE (inner) == UNSPEC)
11862 switch (XINT (inner, 1))
11865 case UNSPEC_GOTOFF:
11866 case UNSPEC_PLTOFF:
11867 return TARGET_64BIT;
11869 x = XVECEXP (inner, 0, 0);
11870 return (GET_CODE (x) == SYMBOL_REF
11871 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11872 case UNSPEC_MACHOPIC_OFFSET:
11873 return legitimate_pic_address_disp_p (x);
11881 return legitimate_pic_address_disp_p (x);
11888 /* Determine if a given CONST RTX is a valid memory displacement
11892 legitimate_pic_address_disp_p (rtx disp)
11896 /* In 64bit mode we can allow direct addresses of symbols and labels
11897 when they are not dynamic symbols. */
11900 rtx op0 = disp, op1;
11902 switch (GET_CODE (disp))
11908 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11910 op0 = XEXP (XEXP (disp, 0), 0);
11911 op1 = XEXP (XEXP (disp, 0), 1);
11912 if (!CONST_INT_P (op1)
11913 || INTVAL (op1) >= 16*1024*1024
11914 || INTVAL (op1) < -16*1024*1024)
11916 if (GET_CODE (op0) == LABEL_REF)
11918 if (GET_CODE (op0) != SYMBOL_REF)
11923 /* TLS references should always be enclosed in UNSPEC. */
11924 if (SYMBOL_REF_TLS_MODEL (op0))
11926 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11927 && ix86_cmodel != CM_LARGE_PIC)
11935 if (GET_CODE (disp) != CONST)
11937 disp = XEXP (disp, 0);
11941 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11942 of GOT tables. We should not need these anyway. */
11943 if (GET_CODE (disp) != UNSPEC
11944 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11945 && XINT (disp, 1) != UNSPEC_GOTOFF
11946 && XINT (disp, 1) != UNSPEC_PCREL
11947 && XINT (disp, 1) != UNSPEC_PLTOFF))
11950 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11951 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11957 if (GET_CODE (disp) == PLUS)
11959 if (!CONST_INT_P (XEXP (disp, 1)))
11961 disp = XEXP (disp, 0);
11965 if (TARGET_MACHO && darwin_local_data_pic (disp))
11968 if (GET_CODE (disp) != UNSPEC)
11971 switch (XINT (disp, 1))
11976 /* We need to check for both symbols and labels because VxWorks loads
11977 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11979 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11980 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11981 case UNSPEC_GOTOFF:
11982 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11983 While ABI specify also 32bit relocation but we don't produce it in
11984 small PIC model at all. */
11985 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11986 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11988 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11990 case UNSPEC_GOTTPOFF:
11991 case UNSPEC_GOTNTPOFF:
11992 case UNSPEC_INDNTPOFF:
11995 disp = XVECEXP (disp, 0, 0);
11996 return (GET_CODE (disp) == SYMBOL_REF
11997 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11998 case UNSPEC_NTPOFF:
11999 disp = XVECEXP (disp, 0, 0);
12000 return (GET_CODE (disp) == SYMBOL_REF
12001 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12002 case UNSPEC_DTPOFF:
12003 disp = XVECEXP (disp, 0, 0);
12004 return (GET_CODE (disp) == SYMBOL_REF
12005 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12011 /* Recognizes RTL expressions that are valid memory addresses for an
12012 instruction. The MODE argument is the machine mode for the MEM
12013 expression that wants to use this address.
12015 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12016 convert common non-canonical forms to canonical form so that they will
12020 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12021 rtx addr, bool strict)
12023 struct ix86_address parts;
12024 rtx base, index, disp;
12025 HOST_WIDE_INT scale;
12027 if (ix86_decompose_address (addr, &parts) <= 0)
12028 /* Decomposition failed. */
12032 index = parts.index;
12034 scale = parts.scale;
12036 /* Validate base register.
12038 Don't allow SUBREG's that span more than a word here. It can lead to spill
12039 failures when the base is one word out of a two word structure, which is
12040 represented internally as a DImode int. */
12048 else if (GET_CODE (base) == SUBREG
12049 && REG_P (SUBREG_REG (base))
12050 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
12052 reg = SUBREG_REG (base);
12054 /* Base is not a register. */
12057 if (GET_MODE (base) != Pmode)
12058 /* Base is not in Pmode. */
12061 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12062 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12063 /* Base is not valid. */
12067 /* Validate index register.
12069 Don't allow SUBREG's that span more than a word here -- same as above. */
12077 else if (GET_CODE (index) == SUBREG
12078 && REG_P (SUBREG_REG (index))
12079 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
12081 reg = SUBREG_REG (index);
12083 /* Index is not a register. */
12086 if (GET_MODE (index) != Pmode)
12087 /* Index is not in Pmode. */
12090 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12091 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12092 /* Index is not valid. */
12096 /* Validate scale factor. */
12100 /* Scale without index. */
12103 if (scale != 2 && scale != 4 && scale != 8)
12104 /* Scale is not a valid multiplier. */
12108 /* Validate displacement. */
12111 if (GET_CODE (disp) == CONST
12112 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12113 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12114 switch (XINT (XEXP (disp, 0), 1))
12116 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12117 used. While ABI specify also 32bit relocations, we don't produce
12118 them at all and use IP relative instead. */
12120 case UNSPEC_GOTOFF:
12121 gcc_assert (flag_pic);
12123 goto is_legitimate_pic;
12125 /* 64bit address unspec. */
12128 case UNSPEC_GOTPCREL:
12130 gcc_assert (flag_pic);
12131 goto is_legitimate_pic;
12133 case UNSPEC_GOTTPOFF:
12134 case UNSPEC_GOTNTPOFF:
12135 case UNSPEC_INDNTPOFF:
12136 case UNSPEC_NTPOFF:
12137 case UNSPEC_DTPOFF:
12140 case UNSPEC_STACK_CHECK:
12141 gcc_assert (flag_split_stack);
12145 /* Invalid address unspec. */
12149 else if (SYMBOLIC_CONST (disp)
12153 && MACHOPIC_INDIRECT
12154 && !machopic_operand_p (disp)
12160 if (TARGET_64BIT && (index || base))
12162 /* foo@dtpoff(%rX) is ok. */
12163 if (GET_CODE (disp) != CONST
12164 || GET_CODE (XEXP (disp, 0)) != PLUS
12165 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12166 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12167 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12168 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12169 /* Non-constant pic memory reference. */
12172 else if ((!TARGET_MACHO || flag_pic)
12173 && ! legitimate_pic_address_disp_p (disp))
12174 /* Displacement is an invalid pic construct. */
12177 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
12178 /* displacment must be referenced via non_lazy_pointer */
12182 /* This code used to verify that a symbolic pic displacement
12183 includes the pic_offset_table_rtx register.
12185 While this is good idea, unfortunately these constructs may
12186 be created by "adds using lea" optimization for incorrect
12195 This code is nonsensical, but results in addressing
12196 GOT table with pic_offset_table_rtx base. We can't
12197 just refuse it easily, since it gets matched by
12198 "addsi3" pattern, that later gets split to lea in the
12199 case output register differs from input. While this
12200 can be handled by separate addsi pattern for this case
12201 that never results in lea, this seems to be easier and
12202 correct fix for crash to disable this test. */
12204 else if (GET_CODE (disp) != LABEL_REF
12205 && !CONST_INT_P (disp)
12206 && (GET_CODE (disp) != CONST
12207 || !legitimate_constant_p (disp))
12208 && (GET_CODE (disp) != SYMBOL_REF
12209 || !legitimate_constant_p (disp)))
12210 /* Displacement is not constant. */
12212 else if (TARGET_64BIT
12213 && !x86_64_immediate_operand (disp, VOIDmode))
12214 /* Displacement is out of range. */
12218 /* Everything looks valid. */
12222 /* Determine if a given RTX is a valid constant address. */
12225 constant_address_p (rtx x)
12227 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12230 /* Return a unique alias set for the GOT. */
12232 static alias_set_type
12233 ix86_GOT_alias_set (void)
12235 static alias_set_type set = -1;
12237 set = new_alias_set ();
12241 /* Return a legitimate reference for ORIG (an address) using the
12242 register REG. If REG is 0, a new pseudo is generated.
12244 There are two types of references that must be handled:
12246 1. Global data references must load the address from the GOT, via
12247 the PIC reg. An insn is emitted to do this load, and the reg is
12250 2. Static data references, constant pool addresses, and code labels
12251 compute the address as an offset from the GOT, whose base is in
12252 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12253 differentiate them from global data objects. The returned
12254 address is the PIC reg + an unspec constant.
12256 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12257 reg also appears in the address. */
12260 legitimize_pic_address (rtx orig, rtx reg)
12263 rtx new_rtx = orig;
12267 if (TARGET_MACHO && !TARGET_64BIT)
12270 reg = gen_reg_rtx (Pmode);
12271 /* Use the generic Mach-O PIC machinery. */
12272 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12276 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12278 else if (TARGET_64BIT
12279 && ix86_cmodel != CM_SMALL_PIC
12280 && gotoff_operand (addr, Pmode))
12283 /* This symbol may be referenced via a displacement from the PIC
12284 base address (@GOTOFF). */
12286 if (reload_in_progress)
12287 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12288 if (GET_CODE (addr) == CONST)
12289 addr = XEXP (addr, 0);
12290 if (GET_CODE (addr) == PLUS)
12292 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12294 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12297 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12298 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12300 tmpreg = gen_reg_rtx (Pmode);
12303 emit_move_insn (tmpreg, new_rtx);
12307 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12308 tmpreg, 1, OPTAB_DIRECT);
12311 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12313 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12315 /* This symbol may be referenced via a displacement from the PIC
12316 base address (@GOTOFF). */
12318 if (reload_in_progress)
12319 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12320 if (GET_CODE (addr) == CONST)
12321 addr = XEXP (addr, 0);
12322 if (GET_CODE (addr) == PLUS)
12324 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12326 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12329 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12330 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12331 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12335 emit_move_insn (reg, new_rtx);
12339 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12340 /* We can't use @GOTOFF for text labels on VxWorks;
12341 see gotoff_operand. */
12342 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12344 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12346 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12347 return legitimize_dllimport_symbol (addr, true);
12348 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12349 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12350 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12352 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12353 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12357 /* For x64 PE-COFF there is no GOT table. So we use address
12359 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12361 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12362 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12365 reg = gen_reg_rtx (Pmode);
12366 emit_move_insn (reg, new_rtx);
12369 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12371 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12372 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12373 new_rtx = gen_const_mem (Pmode, new_rtx);
12374 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12377 reg = gen_reg_rtx (Pmode);
12378 /* Use directly gen_movsi, otherwise the address is loaded
12379 into register for CSE. We don't want to CSE this addresses,
12380 instead we CSE addresses from the GOT table, so skip this. */
12381 emit_insn (gen_movsi (reg, new_rtx));
12386 /* This symbol must be referenced via a load from the
12387 Global Offset Table (@GOT). */
12389 if (reload_in_progress)
12390 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12391 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12392 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12394 new_rtx = force_reg (Pmode, new_rtx);
12395 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12396 new_rtx = gen_const_mem (Pmode, new_rtx);
12397 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12400 reg = gen_reg_rtx (Pmode);
12401 emit_move_insn (reg, new_rtx);
12407 if (CONST_INT_P (addr)
12408 && !x86_64_immediate_operand (addr, VOIDmode))
12412 emit_move_insn (reg, addr);
12416 new_rtx = force_reg (Pmode, addr);
12418 else if (GET_CODE (addr) == CONST)
12420 addr = XEXP (addr, 0);
12422 /* We must match stuff we generate before. Assume the only
12423 unspecs that can get here are ours. Not that we could do
12424 anything with them anyway.... */
12425 if (GET_CODE (addr) == UNSPEC
12426 || (GET_CODE (addr) == PLUS
12427 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12429 gcc_assert (GET_CODE (addr) == PLUS);
12431 if (GET_CODE (addr) == PLUS)
12433 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12435 /* Check first to see if this is a constant offset from a @GOTOFF
12436 symbol reference. */
12437 if (gotoff_operand (op0, Pmode)
12438 && CONST_INT_P (op1))
12442 if (reload_in_progress)
12443 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12444 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12446 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12447 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12448 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12452 emit_move_insn (reg, new_rtx);
12458 if (INTVAL (op1) < -16*1024*1024
12459 || INTVAL (op1) >= 16*1024*1024)
12461 if (!x86_64_immediate_operand (op1, Pmode))
12462 op1 = force_reg (Pmode, op1);
12463 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12469 base = legitimize_pic_address (XEXP (addr, 0), reg);
12470 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12471 base == reg ? NULL_RTX : reg);
12473 if (CONST_INT_P (new_rtx))
12474 new_rtx = plus_constant (base, INTVAL (new_rtx));
12477 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12479 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12480 new_rtx = XEXP (new_rtx, 1);
12482 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12490 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12493 get_thread_pointer (int to_reg)
12497 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12501 reg = gen_reg_rtx (Pmode);
12502 insn = gen_rtx_SET (VOIDmode, reg, tp);
12503 insn = emit_insn (insn);
12508 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12509 false if we expect this to be used for a memory address and true if
12510 we expect to load the address into a register. */
12513 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12515 rtx dest, base, off, pic, tp;
12520 case TLS_MODEL_GLOBAL_DYNAMIC:
12521 dest = gen_reg_rtx (Pmode);
12522 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12524 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12526 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12529 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12530 insns = get_insns ();
12533 RTL_CONST_CALL_P (insns) = 1;
12534 emit_libcall_block (insns, dest, rax, x);
12536 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12537 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12539 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12541 if (TARGET_GNU2_TLS)
12543 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12545 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12549 case TLS_MODEL_LOCAL_DYNAMIC:
12550 base = gen_reg_rtx (Pmode);
12551 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12553 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12555 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12558 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12559 insns = get_insns ();
12562 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12563 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12564 RTL_CONST_CALL_P (insns) = 1;
12565 emit_libcall_block (insns, base, rax, note);
12567 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12568 emit_insn (gen_tls_local_dynamic_base_64 (base));
12570 emit_insn (gen_tls_local_dynamic_base_32 (base));
12572 if (TARGET_GNU2_TLS)
12574 rtx x = ix86_tls_module_base ();
12576 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12577 gen_rtx_MINUS (Pmode, x, tp));
12580 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12581 off = gen_rtx_CONST (Pmode, off);
12583 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12585 if (TARGET_GNU2_TLS)
12587 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12589 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12594 case TLS_MODEL_INITIAL_EXEC:
12597 if (TARGET_SUN_TLS)
12599 /* The Sun linker took the AMD64 TLS spec literally
12600 and can only handle %rax as destination of the
12601 initial executable code sequence. */
12603 dest = gen_reg_rtx (Pmode);
12604 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
12609 type = UNSPEC_GOTNTPOFF;
12613 if (reload_in_progress)
12614 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12615 pic = pic_offset_table_rtx;
12616 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12618 else if (!TARGET_ANY_GNU_TLS)
12620 pic = gen_reg_rtx (Pmode);
12621 emit_insn (gen_set_got (pic));
12622 type = UNSPEC_GOTTPOFF;
12627 type = UNSPEC_INDNTPOFF;
12630 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12631 off = gen_rtx_CONST (Pmode, off);
12633 off = gen_rtx_PLUS (Pmode, pic, off);
12634 off = gen_const_mem (Pmode, off);
12635 set_mem_alias_set (off, ix86_GOT_alias_set ());
12637 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12639 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12640 off = force_reg (Pmode, off);
12641 return gen_rtx_PLUS (Pmode, base, off);
12645 base = get_thread_pointer (true);
12646 dest = gen_reg_rtx (Pmode);
12647 emit_insn (gen_subsi3 (dest, base, off));
12651 case TLS_MODEL_LOCAL_EXEC:
12652 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12653 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12654 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12655 off = gen_rtx_CONST (Pmode, off);
12657 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12659 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12660 return gen_rtx_PLUS (Pmode, base, off);
12664 base = get_thread_pointer (true);
12665 dest = gen_reg_rtx (Pmode);
12666 emit_insn (gen_subsi3 (dest, base, off));
12671 gcc_unreachable ();
12677 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12680 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12681 htab_t dllimport_map;
12684 get_dllimport_decl (tree decl)
12686 struct tree_map *h, in;
12689 const char *prefix;
12690 size_t namelen, prefixlen;
12695 if (!dllimport_map)
12696 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12698 in.hash = htab_hash_pointer (decl);
12699 in.base.from = decl;
12700 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12701 h = (struct tree_map *) *loc;
12705 *loc = h = ggc_alloc_tree_map ();
12707 h->base.from = decl;
12708 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12709 VAR_DECL, NULL, ptr_type_node);
12710 DECL_ARTIFICIAL (to) = 1;
12711 DECL_IGNORED_P (to) = 1;
12712 DECL_EXTERNAL (to) = 1;
12713 TREE_READONLY (to) = 1;
12715 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12716 name = targetm.strip_name_encoding (name);
12717 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12718 ? "*__imp_" : "*__imp__";
12719 namelen = strlen (name);
12720 prefixlen = strlen (prefix);
12721 imp_name = (char *) alloca (namelen + prefixlen + 1);
12722 memcpy (imp_name, prefix, prefixlen);
12723 memcpy (imp_name + prefixlen, name, namelen + 1);
12725 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12726 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12727 SET_SYMBOL_REF_DECL (rtl, to);
12728 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12730 rtl = gen_const_mem (Pmode, rtl);
12731 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12733 SET_DECL_RTL (to, rtl);
12734 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12739 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12740 true if we require the result be a register. */
12743 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12748 gcc_assert (SYMBOL_REF_DECL (symbol));
12749 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12751 x = DECL_RTL (imp_decl);
12753 x = force_reg (Pmode, x);
12757 /* Try machine-dependent ways of modifying an illegitimate address
12758 to be legitimate. If we find one, return the new, valid address.
12759 This macro is used in only one place: `memory_address' in explow.c.
12761 OLDX is the address as it was before break_out_memory_refs was called.
12762 In some cases it is useful to look at this to decide what needs to be done.
12764 It is always safe for this macro to do nothing. It exists to recognize
12765 opportunities to optimize the output.
12767 For the 80386, we handle X+REG by loading X into a register R and
12768 using R+REG. R will go in a general reg and indexing will be used.
12769 However, if REG is a broken-out memory address or multiplication,
12770 nothing needs to be done because REG can certainly go in a general reg.
12772 When -fpic is used, special handling is needed for symbolic references.
12773 See comments by legitimize_pic_address in i386.c for details. */
12776 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12777 enum machine_mode mode)
12782 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12784 return legitimize_tls_address (x, (enum tls_model) log, false);
12785 if (GET_CODE (x) == CONST
12786 && GET_CODE (XEXP (x, 0)) == PLUS
12787 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12788 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12790 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12791 (enum tls_model) log, false);
12792 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12795 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12797 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12798 return legitimize_dllimport_symbol (x, true);
12799 if (GET_CODE (x) == CONST
12800 && GET_CODE (XEXP (x, 0)) == PLUS
12801 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12802 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12804 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12805 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12809 if (flag_pic && SYMBOLIC_CONST (x))
12810 return legitimize_pic_address (x, 0);
12813 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12814 return machopic_indirect_data_reference (x, 0);
12817 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12818 if (GET_CODE (x) == ASHIFT
12819 && CONST_INT_P (XEXP (x, 1))
12820 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12823 log = INTVAL (XEXP (x, 1));
12824 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12825 GEN_INT (1 << log));
12828 if (GET_CODE (x) == PLUS)
12830 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12832 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12833 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12834 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12837 log = INTVAL (XEXP (XEXP (x, 0), 1));
12838 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12839 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12840 GEN_INT (1 << log));
12843 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12844 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12845 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12848 log = INTVAL (XEXP (XEXP (x, 1), 1));
12849 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12850 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12851 GEN_INT (1 << log));
12854 /* Put multiply first if it isn't already. */
12855 if (GET_CODE (XEXP (x, 1)) == MULT)
12857 rtx tmp = XEXP (x, 0);
12858 XEXP (x, 0) = XEXP (x, 1);
12863 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12864 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12865 created by virtual register instantiation, register elimination, and
12866 similar optimizations. */
12867 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12870 x = gen_rtx_PLUS (Pmode,
12871 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12872 XEXP (XEXP (x, 1), 0)),
12873 XEXP (XEXP (x, 1), 1));
12877 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12878 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12879 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12880 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12881 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12882 && CONSTANT_P (XEXP (x, 1)))
12885 rtx other = NULL_RTX;
12887 if (CONST_INT_P (XEXP (x, 1)))
12889 constant = XEXP (x, 1);
12890 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12892 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12894 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12895 other = XEXP (x, 1);
12903 x = gen_rtx_PLUS (Pmode,
12904 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12905 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12906 plus_constant (other, INTVAL (constant)));
12910 if (changed && ix86_legitimate_address_p (mode, x, false))
12913 if (GET_CODE (XEXP (x, 0)) == MULT)
12916 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12919 if (GET_CODE (XEXP (x, 1)) == MULT)
12922 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12926 && REG_P (XEXP (x, 1))
12927 && REG_P (XEXP (x, 0)))
12930 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12933 x = legitimize_pic_address (x, 0);
12936 if (changed && ix86_legitimate_address_p (mode, x, false))
12939 if (REG_P (XEXP (x, 0)))
12941 rtx temp = gen_reg_rtx (Pmode);
12942 rtx val = force_operand (XEXP (x, 1), temp);
12944 emit_move_insn (temp, val);
12946 XEXP (x, 1) = temp;
12950 else if (REG_P (XEXP (x, 1)))
12952 rtx temp = gen_reg_rtx (Pmode);
12953 rtx val = force_operand (XEXP (x, 0), temp);
12955 emit_move_insn (temp, val);
12957 XEXP (x, 0) = temp;
12965 /* Print an integer constant expression in assembler syntax. Addition
12966 and subtraction are the only arithmetic that may appear in these
12967 expressions. FILE is the stdio stream to write to, X is the rtx, and
12968 CODE is the operand print code from the output string. */
12971 output_pic_addr_const (FILE *file, rtx x, int code)
12975 switch (GET_CODE (x))
12978 gcc_assert (flag_pic);
12983 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12984 output_addr_const (file, x);
12987 const char *name = XSTR (x, 0);
12989 /* Mark the decl as referenced so that cgraph will
12990 output the function. */
12991 if (SYMBOL_REF_DECL (x))
12992 mark_decl_referenced (SYMBOL_REF_DECL (x));
12995 if (MACHOPIC_INDIRECT
12996 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12997 name = machopic_indirection_name (x, /*stub_p=*/true);
12999 assemble_name (file, name);
13001 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
13002 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13003 fputs ("@PLT", file);
13010 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13011 assemble_name (asm_out_file, buf);
13015 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13019 /* This used to output parentheses around the expression,
13020 but that does not work on the 386 (either ATT or BSD assembler). */
13021 output_pic_addr_const (file, XEXP (x, 0), code);
13025 if (GET_MODE (x) == VOIDmode)
13027 /* We can use %d if the number is <32 bits and positive. */
13028 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13029 fprintf (file, "0x%lx%08lx",
13030 (unsigned long) CONST_DOUBLE_HIGH (x),
13031 (unsigned long) CONST_DOUBLE_LOW (x));
13033 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13036 /* We can't handle floating point constants;
13037 TARGET_PRINT_OPERAND must handle them. */
13038 output_operand_lossage ("floating constant misused");
13042 /* Some assemblers need integer constants to appear first. */
13043 if (CONST_INT_P (XEXP (x, 0)))
13045 output_pic_addr_const (file, XEXP (x, 0), code);
13047 output_pic_addr_const (file, XEXP (x, 1), code);
13051 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13052 output_pic_addr_const (file, XEXP (x, 1), code);
13054 output_pic_addr_const (file, XEXP (x, 0), code);
13060 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13061 output_pic_addr_const (file, XEXP (x, 0), code);
13063 output_pic_addr_const (file, XEXP (x, 1), code);
13065 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13069 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13071 bool f = i386_asm_output_addr_const_extra (file, x);
13076 gcc_assert (XVECLEN (x, 0) == 1);
13077 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13078 switch (XINT (x, 1))
13081 fputs ("@GOT", file);
13083 case UNSPEC_GOTOFF:
13084 fputs ("@GOTOFF", file);
13086 case UNSPEC_PLTOFF:
13087 fputs ("@PLTOFF", file);
13090 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13091 "(%rip)" : "[rip]", file);
13093 case UNSPEC_GOTPCREL:
13094 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13095 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13097 case UNSPEC_GOTTPOFF:
13098 /* FIXME: This might be @TPOFF in Sun ld too. */
13099 fputs ("@gottpoff", file);
13102 fputs ("@tpoff", file);
13104 case UNSPEC_NTPOFF:
13106 fputs ("@tpoff", file);
13108 fputs ("@ntpoff", file);
13110 case UNSPEC_DTPOFF:
13111 fputs ("@dtpoff", file);
13113 case UNSPEC_GOTNTPOFF:
13115 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13116 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13118 fputs ("@gotntpoff", file);
13120 case UNSPEC_INDNTPOFF:
13121 fputs ("@indntpoff", file);
13124 case UNSPEC_MACHOPIC_OFFSET:
13126 machopic_output_function_base_name (file);
13130 output_operand_lossage ("invalid UNSPEC as operand");
13136 output_operand_lossage ("invalid expression as operand");
13140 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13141 We need to emit DTP-relative relocations. */
13143 static void ATTRIBUTE_UNUSED
13144 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13146 fputs (ASM_LONG, file);
13147 output_addr_const (file, x);
13148 fputs ("@dtpoff", file);
13154 fputs (", 0", file);
13157 gcc_unreachable ();
13161 /* Return true if X is a representation of the PIC register. This copes
13162 with calls from ix86_find_base_term, where the register might have
13163 been replaced by a cselib value. */
13166 ix86_pic_register_p (rtx x)
13168 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13169 return (pic_offset_table_rtx
13170 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13172 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13175 /* Helper function for ix86_delegitimize_address.
13176 Attempt to delegitimize TLS local-exec accesses. */
13179 ix86_delegitimize_tls_address (rtx orig_x)
13181 rtx x = orig_x, unspec;
13182 struct ix86_address addr;
13184 if (!TARGET_TLS_DIRECT_SEG_REFS)
13188 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13190 if (ix86_decompose_address (x, &addr) == 0
13191 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
13192 || addr.disp == NULL_RTX
13193 || GET_CODE (addr.disp) != CONST)
13195 unspec = XEXP (addr.disp, 0);
13196 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13197 unspec = XEXP (unspec, 0);
13198 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13200 x = XVECEXP (unspec, 0, 0);
13201 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13202 if (unspec != XEXP (addr.disp, 0))
13203 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13206 rtx idx = addr.index;
13207 if (addr.scale != 1)
13208 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13209 x = gen_rtx_PLUS (Pmode, idx, x);
13212 x = gen_rtx_PLUS (Pmode, addr.base, x);
13213 if (MEM_P (orig_x))
13214 x = replace_equiv_address_nv (orig_x, x);
13218 /* In the name of slightly smaller debug output, and to cater to
13219 general assembler lossage, recognize PIC+GOTOFF and turn it back
13220 into a direct symbol reference.
13222 On Darwin, this is necessary to avoid a crash, because Darwin
13223 has a different PIC label for each routine but the DWARF debugging
13224 information is not associated with any particular routine, so it's
13225 necessary to remove references to the PIC label from RTL stored by
13226 the DWARF output code. */
13229 ix86_delegitimize_address (rtx x)
13231 rtx orig_x = delegitimize_mem_from_attrs (x);
13232 /* addend is NULL or some rtx if x is something+GOTOFF where
13233 something doesn't include the PIC register. */
13234 rtx addend = NULL_RTX;
13235 /* reg_addend is NULL or a multiple of some register. */
13236 rtx reg_addend = NULL_RTX;
13237 /* const_addend is NULL or a const_int. */
13238 rtx const_addend = NULL_RTX;
13239 /* This is the result, or NULL. */
13240 rtx result = NULL_RTX;
13249 if (GET_CODE (x) != CONST
13250 || GET_CODE (XEXP (x, 0)) != UNSPEC
13251 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13252 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13253 || !MEM_P (orig_x))
13254 return ix86_delegitimize_tls_address (orig_x);
13255 x = XVECEXP (XEXP (x, 0), 0, 0);
13256 if (GET_MODE (orig_x) != Pmode)
13257 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13261 if (GET_CODE (x) != PLUS
13262 || GET_CODE (XEXP (x, 1)) != CONST)
13263 return ix86_delegitimize_tls_address (orig_x);
13265 if (ix86_pic_register_p (XEXP (x, 0)))
13266 /* %ebx + GOT/GOTOFF */
13268 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13270 /* %ebx + %reg * scale + GOT/GOTOFF */
13271 reg_addend = XEXP (x, 0);
13272 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13273 reg_addend = XEXP (reg_addend, 1);
13274 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13275 reg_addend = XEXP (reg_addend, 0);
13278 reg_addend = NULL_RTX;
13279 addend = XEXP (x, 0);
13283 addend = XEXP (x, 0);
13285 x = XEXP (XEXP (x, 1), 0);
13286 if (GET_CODE (x) == PLUS
13287 && CONST_INT_P (XEXP (x, 1)))
13289 const_addend = XEXP (x, 1);
13293 if (GET_CODE (x) == UNSPEC
13294 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13295 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13296 result = XVECEXP (x, 0, 0);
13298 if (TARGET_MACHO && darwin_local_data_pic (x)
13299 && !MEM_P (orig_x))
13300 result = XVECEXP (x, 0, 0);
13303 return ix86_delegitimize_tls_address (orig_x);
13306 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13308 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13311 /* If the rest of original X doesn't involve the PIC register, add
13312 addend and subtract pic_offset_table_rtx. This can happen e.g.
13314 leal (%ebx, %ecx, 4), %ecx
13316 movl foo@GOTOFF(%ecx), %edx
13317 in which case we return (%ecx - %ebx) + foo. */
13318 if (pic_offset_table_rtx)
13319 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13320 pic_offset_table_rtx),
13325 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13326 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13330 /* If X is a machine specific address (i.e. a symbol or label being
13331 referenced as a displacement from the GOT implemented using an
13332 UNSPEC), then return the base term. Otherwise return X. */
13335 ix86_find_base_term (rtx x)
13341 if (GET_CODE (x) != CONST)
13343 term = XEXP (x, 0);
13344 if (GET_CODE (term) == PLUS
13345 && (CONST_INT_P (XEXP (term, 1))
13346 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13347 term = XEXP (term, 0);
13348 if (GET_CODE (term) != UNSPEC
13349 || (XINT (term, 1) != UNSPEC_GOTPCREL
13350 && XINT (term, 1) != UNSPEC_PCREL))
13353 return XVECEXP (term, 0, 0);
13356 return ix86_delegitimize_address (x);
13360 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13361 int fp, FILE *file)
13363 const char *suffix;
13365 if (mode == CCFPmode || mode == CCFPUmode)
13367 code = ix86_fp_compare_code_to_integer (code);
13371 code = reverse_condition (code);
13422 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13426 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13427 Those same assemblers have the same but opposite lossage on cmov. */
13428 if (mode == CCmode)
13429 suffix = fp ? "nbe" : "a";
13430 else if (mode == CCCmode)
13433 gcc_unreachable ();
13449 gcc_unreachable ();
13453 gcc_assert (mode == CCmode || mode == CCCmode);
13470 gcc_unreachable ();
13474 /* ??? As above. */
13475 gcc_assert (mode == CCmode || mode == CCCmode);
13476 suffix = fp ? "nb" : "ae";
13479 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13483 /* ??? As above. */
13484 if (mode == CCmode)
13486 else if (mode == CCCmode)
13487 suffix = fp ? "nb" : "ae";
13489 gcc_unreachable ();
13492 suffix = fp ? "u" : "p";
13495 suffix = fp ? "nu" : "np";
13498 gcc_unreachable ();
13500 fputs (suffix, file);
13503 /* Print the name of register X to FILE based on its machine mode and number.
13504 If CODE is 'w', pretend the mode is HImode.
13505 If CODE is 'b', pretend the mode is QImode.
13506 If CODE is 'k', pretend the mode is SImode.
13507 If CODE is 'q', pretend the mode is DImode.
13508 If CODE is 'x', pretend the mode is V4SFmode.
13509 If CODE is 't', pretend the mode is V8SFmode.
13510 If CODE is 'h', pretend the reg is the 'high' byte register.
13511 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13512 If CODE is 'd', duplicate the operand for AVX instruction.
13516 print_reg (rtx x, int code, FILE *file)
13519 bool duplicated = code == 'd' && TARGET_AVX;
13521 gcc_assert (x == pc_rtx
13522 || (REGNO (x) != ARG_POINTER_REGNUM
13523 && REGNO (x) != FRAME_POINTER_REGNUM
13524 && REGNO (x) != FLAGS_REG
13525 && REGNO (x) != FPSR_REG
13526 && REGNO (x) != FPCR_REG));
13528 if (ASSEMBLER_DIALECT == ASM_ATT)
13533 gcc_assert (TARGET_64BIT);
13534 fputs ("rip", file);
13538 if (code == 'w' || MMX_REG_P (x))
13540 else if (code == 'b')
13542 else if (code == 'k')
13544 else if (code == 'q')
13546 else if (code == 'y')
13548 else if (code == 'h')
13550 else if (code == 'x')
13552 else if (code == 't')
13555 code = GET_MODE_SIZE (GET_MODE (x));
13557 /* Irritatingly, AMD extended registers use different naming convention
13558 from the normal registers. */
13559 if (REX_INT_REG_P (x))
13561 gcc_assert (TARGET_64BIT);
13565 error ("extended registers have no high halves");
13568 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13571 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13574 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13577 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13580 error ("unsupported operand size for extended register");
13590 if (STACK_TOP_P (x))
13599 if (! ANY_FP_REG_P (x))
13600 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13605 reg = hi_reg_name[REGNO (x)];
13608 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13610 reg = qi_reg_name[REGNO (x)];
13613 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13615 reg = qi_high_reg_name[REGNO (x)];
13620 gcc_assert (!duplicated);
13622 fputs (hi_reg_name[REGNO (x)] + 1, file);
13627 gcc_unreachable ();
13633 if (ASSEMBLER_DIALECT == ASM_ATT)
13634 fprintf (file, ", %%%s", reg);
13636 fprintf (file, ", %s", reg);
13640 /* Locate some local-dynamic symbol still in use by this function
13641 so that we can print its name in some tls_local_dynamic_base
13645 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13649 if (GET_CODE (x) == SYMBOL_REF
13650 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13652 cfun->machine->some_ld_name = XSTR (x, 0);
13659 static const char *
13660 get_some_local_dynamic_name (void)
13664 if (cfun->machine->some_ld_name)
13665 return cfun->machine->some_ld_name;
13667 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13668 if (NONDEBUG_INSN_P (insn)
13669 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13670 return cfun->machine->some_ld_name;
13675 /* Meaning of CODE:
13676 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13677 C -- print opcode suffix for set/cmov insn.
13678 c -- like C, but print reversed condition
13679 F,f -- likewise, but for floating-point.
13680 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13682 R -- print the prefix for register names.
13683 z -- print the opcode suffix for the size of the current operand.
13684 Z -- likewise, with special suffixes for x87 instructions.
13685 * -- print a star (in certain assembler syntax)
13686 A -- print an absolute memory reference.
13687 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13688 s -- print a shift double count, followed by the assemblers argument
13690 b -- print the QImode name of the register for the indicated operand.
13691 %b0 would print %al if operands[0] is reg 0.
13692 w -- likewise, print the HImode name of the register.
13693 k -- likewise, print the SImode name of the register.
13694 q -- likewise, print the DImode name of the register.
13695 x -- likewise, print the V4SFmode name of the register.
13696 t -- likewise, print the V8SFmode name of the register.
13697 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13698 y -- print "st(0)" instead of "st" as a register.
13699 d -- print duplicated register operand for AVX instruction.
13700 D -- print condition for SSE cmp instruction.
13701 P -- if PIC, print an @PLT suffix.
13702 X -- don't print any sort of PIC '@' suffix for a symbol.
13703 & -- print some in-use local-dynamic symbol name.
13704 H -- print a memory address offset by 8; used for sse high-parts
13705 Y -- print condition for XOP pcom* instruction.
13706 + -- print a branch hint as 'cs' or 'ds' prefix
13707 ; -- print a semicolon (after prefixes due to bug in older gas).
13708 @ -- print a segment register of thread base pointer load
13712 ix86_print_operand (FILE *file, rtx x, int code)
13719 if (ASSEMBLER_DIALECT == ASM_ATT)
13725 const char *name = get_some_local_dynamic_name ();
13727 output_operand_lossage ("'%%&' used without any "
13728 "local dynamic TLS references");
13730 assemble_name (file, name);
13735 switch (ASSEMBLER_DIALECT)
13742 /* Intel syntax. For absolute addresses, registers should not
13743 be surrounded by braces. */
13747 ix86_print_operand (file, x, 0);
13754 gcc_unreachable ();
13757 ix86_print_operand (file, x, 0);
13762 if (ASSEMBLER_DIALECT == ASM_ATT)
13767 if (ASSEMBLER_DIALECT == ASM_ATT)
13772 if (ASSEMBLER_DIALECT == ASM_ATT)
13777 if (ASSEMBLER_DIALECT == ASM_ATT)
13782 if (ASSEMBLER_DIALECT == ASM_ATT)
13787 if (ASSEMBLER_DIALECT == ASM_ATT)
13792 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13794 /* Opcodes don't get size suffixes if using Intel opcodes. */
13795 if (ASSEMBLER_DIALECT == ASM_INTEL)
13798 switch (GET_MODE_SIZE (GET_MODE (x)))
13817 output_operand_lossage
13818 ("invalid operand size for operand code '%c'", code);
13823 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13825 (0, "non-integer operand used with operand code '%c'", code);
13829 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13830 if (ASSEMBLER_DIALECT == ASM_INTEL)
13833 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13835 switch (GET_MODE_SIZE (GET_MODE (x)))
13838 #ifdef HAVE_AS_IX86_FILDS
13848 #ifdef HAVE_AS_IX86_FILDQ
13851 fputs ("ll", file);
13859 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13861 /* 387 opcodes don't get size suffixes
13862 if the operands are registers. */
13863 if (STACK_REG_P (x))
13866 switch (GET_MODE_SIZE (GET_MODE (x)))
13887 output_operand_lossage
13888 ("invalid operand type used with operand code '%c'", code);
13892 output_operand_lossage
13893 ("invalid operand size for operand code '%c'", code);
13910 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13912 ix86_print_operand (file, x, 0);
13913 fputs (", ", file);
13918 /* Little bit of braindamage here. The SSE compare instructions
13919 does use completely different names for the comparisons that the
13920 fp conditional moves. */
13923 switch (GET_CODE (x))
13926 fputs ("eq", file);
13929 fputs ("eq_us", file);
13932 fputs ("lt", file);
13935 fputs ("nge", file);
13938 fputs ("le", file);
13941 fputs ("ngt", file);
13944 fputs ("unord", file);
13947 fputs ("neq", file);
13950 fputs ("neq_oq", file);
13953 fputs ("ge", file);
13956 fputs ("nlt", file);
13959 fputs ("gt", file);
13962 fputs ("nle", file);
13965 fputs ("ord", file);
13968 output_operand_lossage ("operand is not a condition code, "
13969 "invalid operand code 'D'");
13975 switch (GET_CODE (x))
13979 fputs ("eq", file);
13983 fputs ("lt", file);
13987 fputs ("le", file);
13990 fputs ("unord", file);
13994 fputs ("neq", file);
13998 fputs ("nlt", file);
14002 fputs ("nle", file);
14005 fputs ("ord", file);
14008 output_operand_lossage ("operand is not a condition code, "
14009 "invalid operand code 'D'");
14015 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14016 if (ASSEMBLER_DIALECT == ASM_ATT)
14018 switch (GET_MODE (x))
14020 case HImode: putc ('w', file); break;
14022 case SFmode: putc ('l', file); break;
14024 case DFmode: putc ('q', file); break;
14025 default: gcc_unreachable ();
14032 if (!COMPARISON_P (x))
14034 output_operand_lossage ("operand is neither a constant nor a "
14035 "condition code, invalid operand code "
14039 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
14042 if (!COMPARISON_P (x))
14044 output_operand_lossage ("operand is neither a constant nor a "
14045 "condition code, invalid operand code "
14049 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14050 if (ASSEMBLER_DIALECT == ASM_ATT)
14053 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
14056 /* Like above, but reverse condition */
14058 /* Check to see if argument to %c is really a constant
14059 and not a condition code which needs to be reversed. */
14060 if (!COMPARISON_P (x))
14062 output_operand_lossage ("operand is neither a constant nor a "
14063 "condition code, invalid operand "
14067 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
14070 if (!COMPARISON_P (x))
14072 output_operand_lossage ("operand is neither a constant nor a "
14073 "condition code, invalid operand "
14077 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14078 if (ASSEMBLER_DIALECT == ASM_ATT)
14081 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
14085 /* It doesn't actually matter what mode we use here, as we're
14086 only going to use this for printing. */
14087 x = adjust_address_nv (x, DImode, 8);
14095 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
14098 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14101 int pred_val = INTVAL (XEXP (x, 0));
14103 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14104 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14106 int taken = pred_val > REG_BR_PROB_BASE / 2;
14107 int cputaken = final_forward_branch_p (current_output_insn) == 0;
14109 /* Emit hints only in the case default branch prediction
14110 heuristics would fail. */
14111 if (taken != cputaken)
14113 /* We use 3e (DS) prefix for taken branches and
14114 2e (CS) prefix for not taken branches. */
14116 fputs ("ds ; ", file);
14118 fputs ("cs ; ", file);
14126 switch (GET_CODE (x))
14129 fputs ("neq", file);
14132 fputs ("eq", file);
14136 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14140 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14144 fputs ("le", file);
14148 fputs ("lt", file);
14151 fputs ("unord", file);
14154 fputs ("ord", file);
14157 fputs ("ueq", file);
14160 fputs ("nlt", file);
14163 fputs ("nle", file);
14166 fputs ("ule", file);
14169 fputs ("ult", file);
14172 fputs ("une", file);
14175 output_operand_lossage ("operand is not a condition code, "
14176 "invalid operand code 'Y'");
14182 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14188 if (ASSEMBLER_DIALECT == ASM_ATT)
14191 /* The kernel uses a different segment register for performance
14192 reasons; a system call would not have to trash the userspace
14193 segment register, which would be expensive. */
14194 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14195 fputs ("fs", file);
14197 fputs ("gs", file);
14201 output_operand_lossage ("invalid operand code '%c'", code);
14206 print_reg (x, code, file);
14208 else if (MEM_P (x))
14210 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14211 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14212 && GET_MODE (x) != BLKmode)
14215 switch (GET_MODE_SIZE (GET_MODE (x)))
14217 case 1: size = "BYTE"; break;
14218 case 2: size = "WORD"; break;
14219 case 4: size = "DWORD"; break;
14220 case 8: size = "QWORD"; break;
14221 case 12: size = "TBYTE"; break;
14223 if (GET_MODE (x) == XFmode)
14228 case 32: size = "YMMWORD"; break;
14230 gcc_unreachable ();
14233 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14236 else if (code == 'w')
14238 else if (code == 'k')
14241 fputs (size, file);
14242 fputs (" PTR ", file);
14246 /* Avoid (%rip) for call operands. */
14247 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14248 && !CONST_INT_P (x))
14249 output_addr_const (file, x);
14250 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14251 output_operand_lossage ("invalid constraints for operand");
14253 output_address (x);
14256 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14261 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14262 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14264 if (ASSEMBLER_DIALECT == ASM_ATT)
14266 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14268 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14270 fprintf (file, "0x%08x", (unsigned int) l);
14273 /* These float cases don't actually occur as immediate operands. */
14274 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14278 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14279 fputs (dstr, file);
14282 else if (GET_CODE (x) == CONST_DOUBLE
14283 && GET_MODE (x) == XFmode)
14287 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14288 fputs (dstr, file);
14293 /* We have patterns that allow zero sets of memory, for instance.
14294 In 64-bit mode, we should probably support all 8-byte vectors,
14295 since we can in fact encode that into an immediate. */
14296 if (GET_CODE (x) == CONST_VECTOR)
14298 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14304 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14306 if (ASSEMBLER_DIALECT == ASM_ATT)
14309 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14310 || GET_CODE (x) == LABEL_REF)
14312 if (ASSEMBLER_DIALECT == ASM_ATT)
14315 fputs ("OFFSET FLAT:", file);
14318 if (CONST_INT_P (x))
14319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14320 else if (flag_pic || MACHOPIC_INDIRECT)
14321 output_pic_addr_const (file, x, code);
14323 output_addr_const (file, x);
14328 ix86_print_operand_punct_valid_p (unsigned char code)
14330 return (code == '@' || code == '*' || code == '+'
14331 || code == '&' || code == ';');
14334 /* Print a memory operand whose address is ADDR. */
14337 ix86_print_operand_address (FILE *file, rtx addr)
14339 struct ix86_address parts;
14340 rtx base, index, disp;
14342 int ok = ix86_decompose_address (addr, &parts);
14347 index = parts.index;
14349 scale = parts.scale;
14357 if (ASSEMBLER_DIALECT == ASM_ATT)
14359 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14362 gcc_unreachable ();
14365 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14366 if (TARGET_64BIT && !base && !index)
14370 if (GET_CODE (disp) == CONST
14371 && GET_CODE (XEXP (disp, 0)) == PLUS
14372 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14373 symbol = XEXP (XEXP (disp, 0), 0);
14375 if (GET_CODE (symbol) == LABEL_REF
14376 || (GET_CODE (symbol) == SYMBOL_REF
14377 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14380 if (!base && !index)
14382 /* Displacement only requires special attention. */
14384 if (CONST_INT_P (disp))
14386 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14387 fputs ("ds:", file);
14388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14391 output_pic_addr_const (file, disp, 0);
14393 output_addr_const (file, disp);
14397 if (ASSEMBLER_DIALECT == ASM_ATT)
14402 output_pic_addr_const (file, disp, 0);
14403 else if (GET_CODE (disp) == LABEL_REF)
14404 output_asm_label (disp);
14406 output_addr_const (file, disp);
14411 print_reg (base, 0, file);
14415 print_reg (index, 0, file);
14417 fprintf (file, ",%d", scale);
14423 rtx offset = NULL_RTX;
14427 /* Pull out the offset of a symbol; print any symbol itself. */
14428 if (GET_CODE (disp) == CONST
14429 && GET_CODE (XEXP (disp, 0)) == PLUS
14430 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14432 offset = XEXP (XEXP (disp, 0), 1);
14433 disp = gen_rtx_CONST (VOIDmode,
14434 XEXP (XEXP (disp, 0), 0));
14438 output_pic_addr_const (file, disp, 0);
14439 else if (GET_CODE (disp) == LABEL_REF)
14440 output_asm_label (disp);
14441 else if (CONST_INT_P (disp))
14444 output_addr_const (file, disp);
14450 print_reg (base, 0, file);
14453 if (INTVAL (offset) >= 0)
14455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14459 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14466 print_reg (index, 0, file);
14468 fprintf (file, "*%d", scale);
14475 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14478 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14482 if (GET_CODE (x) != UNSPEC)
14485 op = XVECEXP (x, 0, 0);
14486 switch (XINT (x, 1))
14488 case UNSPEC_GOTTPOFF:
14489 output_addr_const (file, op);
14490 /* FIXME: This might be @TPOFF in Sun ld. */
14491 fputs ("@gottpoff", file);
14494 output_addr_const (file, op);
14495 fputs ("@tpoff", file);
14497 case UNSPEC_NTPOFF:
14498 output_addr_const (file, op);
14500 fputs ("@tpoff", file);
14502 fputs ("@ntpoff", file);
14504 case UNSPEC_DTPOFF:
14505 output_addr_const (file, op);
14506 fputs ("@dtpoff", file);
14508 case UNSPEC_GOTNTPOFF:
14509 output_addr_const (file, op);
14511 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14512 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14514 fputs ("@gotntpoff", file);
14516 case UNSPEC_INDNTPOFF:
14517 output_addr_const (file, op);
14518 fputs ("@indntpoff", file);
14521 case UNSPEC_MACHOPIC_OFFSET:
14522 output_addr_const (file, op);
14524 machopic_output_function_base_name (file);
14528 case UNSPEC_STACK_CHECK:
14532 gcc_assert (flag_split_stack);
14534 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14535 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14537 gcc_unreachable ();
14540 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14551 /* Split one or more double-mode RTL references into pairs of half-mode
14552 references. The RTL can be REG, offsettable MEM, integer constant, or
14553 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14554 split and "num" is its length. lo_half and hi_half are output arrays
14555 that parallel "operands". */
14558 split_double_mode (enum machine_mode mode, rtx operands[],
14559 int num, rtx lo_half[], rtx hi_half[])
14561 enum machine_mode half_mode;
14567 half_mode = DImode;
14570 half_mode = SImode;
14573 gcc_unreachable ();
14576 byte = GET_MODE_SIZE (half_mode);
14580 rtx op = operands[num];
14582 /* simplify_subreg refuse to split volatile memory addresses,
14583 but we still have to handle it. */
14586 lo_half[num] = adjust_address (op, half_mode, 0);
14587 hi_half[num] = adjust_address (op, half_mode, byte);
14591 lo_half[num] = simplify_gen_subreg (half_mode, op,
14592 GET_MODE (op) == VOIDmode
14593 ? mode : GET_MODE (op), 0);
14594 hi_half[num] = simplify_gen_subreg (half_mode, op,
14595 GET_MODE (op) == VOIDmode
14596 ? mode : GET_MODE (op), byte);
14601 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14602 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14603 is the expression of the binary operation. The output may either be
14604 emitted here, or returned to the caller, like all output_* functions.
14606 There is no guarantee that the operands are the same mode, as they
14607 might be within FLOAT or FLOAT_EXTEND expressions. */
14609 #ifndef SYSV386_COMPAT
14610 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14611 wants to fix the assemblers because that causes incompatibility
14612 with gcc. No-one wants to fix gcc because that causes
14613 incompatibility with assemblers... You can use the option of
14614 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14615 #define SYSV386_COMPAT 1
14619 output_387_binary_op (rtx insn, rtx *operands)
14621 static char buf[40];
14624 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14626 #ifdef ENABLE_CHECKING
14627 /* Even if we do not want to check the inputs, this documents input
14628 constraints. Which helps in understanding the following code. */
14629 if (STACK_REG_P (operands[0])
14630 && ((REG_P (operands[1])
14631 && REGNO (operands[0]) == REGNO (operands[1])
14632 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14633 || (REG_P (operands[2])
14634 && REGNO (operands[0]) == REGNO (operands[2])
14635 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14636 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14639 gcc_assert (is_sse);
14642 switch (GET_CODE (operands[3]))
14645 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14646 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14654 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14655 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14663 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14664 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14672 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14673 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14681 gcc_unreachable ();
14688 strcpy (buf, ssep);
14689 if (GET_MODE (operands[0]) == SFmode)
14690 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14692 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14696 strcpy (buf, ssep + 1);
14697 if (GET_MODE (operands[0]) == SFmode)
14698 strcat (buf, "ss\t{%2, %0|%0, %2}");
14700 strcat (buf, "sd\t{%2, %0|%0, %2}");
14706 switch (GET_CODE (operands[3]))
14710 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14712 rtx temp = operands[2];
14713 operands[2] = operands[1];
14714 operands[1] = temp;
14717 /* know operands[0] == operands[1]. */
14719 if (MEM_P (operands[2]))
14725 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14727 if (STACK_TOP_P (operands[0]))
14728 /* How is it that we are storing to a dead operand[2]?
14729 Well, presumably operands[1] is dead too. We can't
14730 store the result to st(0) as st(0) gets popped on this
14731 instruction. Instead store to operands[2] (which I
14732 think has to be st(1)). st(1) will be popped later.
14733 gcc <= 2.8.1 didn't have this check and generated
14734 assembly code that the Unixware assembler rejected. */
14735 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14737 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14741 if (STACK_TOP_P (operands[0]))
14742 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14744 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14749 if (MEM_P (operands[1]))
14755 if (MEM_P (operands[2]))
14761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14764 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14765 derived assemblers, confusingly reverse the direction of
14766 the operation for fsub{r} and fdiv{r} when the
14767 destination register is not st(0). The Intel assembler
14768 doesn't have this brain damage. Read !SYSV386_COMPAT to
14769 figure out what the hardware really does. */
14770 if (STACK_TOP_P (operands[0]))
14771 p = "{p\t%0, %2|rp\t%2, %0}";
14773 p = "{rp\t%2, %0|p\t%0, %2}";
14775 if (STACK_TOP_P (operands[0]))
14776 /* As above for fmul/fadd, we can't store to st(0). */
14777 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14779 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14784 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14787 if (STACK_TOP_P (operands[0]))
14788 p = "{rp\t%0, %1|p\t%1, %0}";
14790 p = "{p\t%1, %0|rp\t%0, %1}";
14792 if (STACK_TOP_P (operands[0]))
14793 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14795 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14800 if (STACK_TOP_P (operands[0]))
14802 if (STACK_TOP_P (operands[1]))
14803 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14805 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14808 else if (STACK_TOP_P (operands[1]))
14811 p = "{\t%1, %0|r\t%0, %1}";
14813 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14819 p = "{r\t%2, %0|\t%0, %2}";
14821 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14827 gcc_unreachable ();
14834 /* Return needed mode for entity in optimize_mode_switching pass. */
14837 ix86_mode_needed (int entity, rtx insn)
14839 enum attr_i387_cw mode;
14841 /* The mode UNINITIALIZED is used to store control word after a
14842 function call or ASM pattern. The mode ANY specify that function
14843 has no requirements on the control word and make no changes in the
14844 bits we are interested in. */
14847 || (NONJUMP_INSN_P (insn)
14848 && (asm_noperands (PATTERN (insn)) >= 0
14849 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14850 return I387_CW_UNINITIALIZED;
14852 if (recog_memoized (insn) < 0)
14853 return I387_CW_ANY;
14855 mode = get_attr_i387_cw (insn);
14860 if (mode == I387_CW_TRUNC)
14865 if (mode == I387_CW_FLOOR)
14870 if (mode == I387_CW_CEIL)
14875 if (mode == I387_CW_MASK_PM)
14880 gcc_unreachable ();
14883 return I387_CW_ANY;
14886 /* Output code to initialize control word copies used by trunc?f?i and
14887 rounding patterns. CURRENT_MODE is set to current control word,
14888 while NEW_MODE is set to new control word. */
14891 emit_i387_cw_initialization (int mode)
14893 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14896 enum ix86_stack_slot slot;
14898 rtx reg = gen_reg_rtx (HImode);
14900 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14901 emit_move_insn (reg, copy_rtx (stored_mode));
14903 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14904 || optimize_function_for_size_p (cfun))
14908 case I387_CW_TRUNC:
14909 /* round toward zero (truncate) */
14910 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14911 slot = SLOT_CW_TRUNC;
14914 case I387_CW_FLOOR:
14915 /* round down toward -oo */
14916 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14917 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14918 slot = SLOT_CW_FLOOR;
14922 /* round up toward +oo */
14923 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14924 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14925 slot = SLOT_CW_CEIL;
14928 case I387_CW_MASK_PM:
14929 /* mask precision exception for nearbyint() */
14930 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14931 slot = SLOT_CW_MASK_PM;
14935 gcc_unreachable ();
14942 case I387_CW_TRUNC:
14943 /* round toward zero (truncate) */
14944 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14945 slot = SLOT_CW_TRUNC;
14948 case I387_CW_FLOOR:
14949 /* round down toward -oo */
14950 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14951 slot = SLOT_CW_FLOOR;
14955 /* round up toward +oo */
14956 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14957 slot = SLOT_CW_CEIL;
14960 case I387_CW_MASK_PM:
14961 /* mask precision exception for nearbyint() */
14962 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14963 slot = SLOT_CW_MASK_PM;
14967 gcc_unreachable ();
14971 gcc_assert (slot < MAX_386_STACK_LOCALS);
14973 new_mode = assign_386_stack_local (HImode, slot);
14974 emit_move_insn (new_mode, reg);
14977 /* Output code for INSN to convert a float to a signed int. OPERANDS
14978 are the insn operands. The output may be [HSD]Imode and the input
14979 operand may be [SDX]Fmode. */
14982 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14984 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14985 int dimode_p = GET_MODE (operands[0]) == DImode;
14986 int round_mode = get_attr_i387_cw (insn);
14988 /* Jump through a hoop or two for DImode, since the hardware has no
14989 non-popping instruction. We used to do this a different way, but
14990 that was somewhat fragile and broke with post-reload splitters. */
14991 if ((dimode_p || fisttp) && !stack_top_dies)
14992 output_asm_insn ("fld\t%y1", operands);
14994 gcc_assert (STACK_TOP_P (operands[1]));
14995 gcc_assert (MEM_P (operands[0]));
14996 gcc_assert (GET_MODE (operands[1]) != TFmode);
14999 output_asm_insn ("fisttp%Z0\t%0", operands);
15002 if (round_mode != I387_CW_ANY)
15003 output_asm_insn ("fldcw\t%3", operands);
15004 if (stack_top_dies || dimode_p)
15005 output_asm_insn ("fistp%Z0\t%0", operands);
15007 output_asm_insn ("fist%Z0\t%0", operands);
15008 if (round_mode != I387_CW_ANY)
15009 output_asm_insn ("fldcw\t%2", operands);
15015 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15016 have the values zero or one, indicates the ffreep insn's operand
15017 from the OPERANDS array. */
15019 static const char *
15020 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15022 if (TARGET_USE_FFREEP)
15023 #ifdef HAVE_AS_IX86_FFREEP
15024 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15027 static char retval[32];
15028 int regno = REGNO (operands[opno]);
15030 gcc_assert (FP_REGNO_P (regno));
15032 regno -= FIRST_STACK_REG;
15034 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15039 return opno ? "fstp\t%y1" : "fstp\t%y0";
15043 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15044 should be used. UNORDERED_P is true when fucom should be used. */
15047 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
15049 int stack_top_dies;
15050 rtx cmp_op0, cmp_op1;
15051 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15055 cmp_op0 = operands[0];
15056 cmp_op1 = operands[1];
15060 cmp_op0 = operands[1];
15061 cmp_op1 = operands[2];
15066 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
15067 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
15068 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
15069 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
15071 if (GET_MODE (operands[0]) == SFmode)
15073 return &ucomiss[TARGET_AVX ? 0 : 1];
15075 return &comiss[TARGET_AVX ? 0 : 1];
15078 return &ucomisd[TARGET_AVX ? 0 : 1];
15080 return &comisd[TARGET_AVX ? 0 : 1];
15083 gcc_assert (STACK_TOP_P (cmp_op0));
15085 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15087 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15089 if (stack_top_dies)
15091 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15092 return output_387_ffreep (operands, 1);
15095 return "ftst\n\tfnstsw\t%0";
15098 if (STACK_REG_P (cmp_op1)
15100 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15101 && REGNO (cmp_op1) != FIRST_STACK_REG)
15103 /* If both the top of the 387 stack dies, and the other operand
15104 is also a stack register that dies, then this must be a
15105 `fcompp' float compare */
15109 /* There is no double popping fcomi variant. Fortunately,
15110 eflags is immune from the fstp's cc clobbering. */
15112 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
15114 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
15115 return output_387_ffreep (operands, 0);
15120 return "fucompp\n\tfnstsw\t%0";
15122 return "fcompp\n\tfnstsw\t%0";
15127 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15129 static const char * const alt[16] =
15131 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15132 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15133 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15134 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15136 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15137 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15141 "fcomi\t{%y1, %0|%0, %y1}",
15142 "fcomip\t{%y1, %0|%0, %y1}",
15143 "fucomi\t{%y1, %0|%0, %y1}",
15144 "fucomip\t{%y1, %0|%0, %y1}",
15155 mask = eflags_p << 3;
15156 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
15157 mask |= unordered_p << 1;
15158 mask |= stack_top_dies;
15160 gcc_assert (mask < 16);
15169 ix86_output_addr_vec_elt (FILE *file, int value)
15171 const char *directive = ASM_LONG;
15175 directive = ASM_QUAD;
15177 gcc_assert (!TARGET_64BIT);
15180 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
15184 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
15186 const char *directive = ASM_LONG;
15189 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15190 directive = ASM_QUAD;
15192 gcc_assert (!TARGET_64BIT);
15194 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15195 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15196 fprintf (file, "%s%s%d-%s%d\n",
15197 directive, LPREFIX, value, LPREFIX, rel);
15198 else if (HAVE_AS_GOTOFF_IN_DATA)
15199 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15201 else if (TARGET_MACHO)
15203 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15204 machopic_output_function_base_name (file);
15209 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15210 GOT_SYMBOL_NAME, LPREFIX, value);
15213 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15217 ix86_expand_clear (rtx dest)
15221 /* We play register width games, which are only valid after reload. */
15222 gcc_assert (reload_completed);
15224 /* Avoid HImode and its attendant prefix byte. */
15225 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15226 dest = gen_rtx_REG (SImode, REGNO (dest));
15227 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15229 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15230 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15232 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15233 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15239 /* X is an unchanging MEM. If it is a constant pool reference, return
15240 the constant pool rtx, else NULL. */
15243 maybe_get_pool_constant (rtx x)
15245 x = ix86_delegitimize_address (XEXP (x, 0));
15247 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15248 return get_pool_constant (x);
15254 ix86_expand_move (enum machine_mode mode, rtx operands[])
15257 enum tls_model model;
15262 if (GET_CODE (op1) == SYMBOL_REF)
15264 model = SYMBOL_REF_TLS_MODEL (op1);
15267 op1 = legitimize_tls_address (op1, model, true);
15268 op1 = force_operand (op1, op0);
15272 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15273 && SYMBOL_REF_DLLIMPORT_P (op1))
15274 op1 = legitimize_dllimport_symbol (op1, false);
15276 else if (GET_CODE (op1) == CONST
15277 && GET_CODE (XEXP (op1, 0)) == PLUS
15278 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15280 rtx addend = XEXP (XEXP (op1, 0), 1);
15281 rtx symbol = XEXP (XEXP (op1, 0), 0);
15284 model = SYMBOL_REF_TLS_MODEL (symbol);
15286 tmp = legitimize_tls_address (symbol, model, true);
15287 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15288 && SYMBOL_REF_DLLIMPORT_P (symbol))
15289 tmp = legitimize_dllimport_symbol (symbol, true);
15293 tmp = force_operand (tmp, NULL);
15294 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15295 op0, 1, OPTAB_DIRECT);
15301 if ((flag_pic || MACHOPIC_INDIRECT)
15302 && mode == Pmode && symbolic_operand (op1, Pmode))
15304 if (TARGET_MACHO && !TARGET_64BIT)
15307 /* dynamic-no-pic */
15308 if (MACHOPIC_INDIRECT)
15310 rtx temp = ((reload_in_progress
15311 || ((op0 && REG_P (op0))
15313 ? op0 : gen_reg_rtx (Pmode));
15314 op1 = machopic_indirect_data_reference (op1, temp);
15316 op1 = machopic_legitimize_pic_address (op1, mode,
15317 temp == op1 ? 0 : temp);
15319 if (op0 != op1 && GET_CODE (op0) != MEM)
15321 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15325 if (GET_CODE (op0) == MEM)
15326 op1 = force_reg (Pmode, op1);
15330 if (GET_CODE (temp) != REG)
15331 temp = gen_reg_rtx (Pmode);
15332 temp = legitimize_pic_address (op1, temp);
15337 /* dynamic-no-pic */
15343 op1 = force_reg (Pmode, op1);
15344 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15346 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15347 op1 = legitimize_pic_address (op1, reg);
15356 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15357 || !push_operand (op0, mode))
15359 op1 = force_reg (mode, op1);
15361 if (push_operand (op0, mode)
15362 && ! general_no_elim_operand (op1, mode))
15363 op1 = copy_to_mode_reg (mode, op1);
15365 /* Force large constants in 64bit compilation into register
15366 to get them CSEed. */
15367 if (can_create_pseudo_p ()
15368 && (mode == DImode) && TARGET_64BIT
15369 && immediate_operand (op1, mode)
15370 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15371 && !register_operand (op0, mode)
15373 op1 = copy_to_mode_reg (mode, op1);
15375 if (can_create_pseudo_p ()
15376 && FLOAT_MODE_P (mode)
15377 && GET_CODE (op1) == CONST_DOUBLE)
15379 /* If we are loading a floating point constant to a register,
15380 force the value to memory now, since we'll get better code
15381 out the back end. */
15383 op1 = validize_mem (force_const_mem (mode, op1));
15384 if (!register_operand (op0, mode))
15386 rtx temp = gen_reg_rtx (mode);
15387 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15388 emit_move_insn (op0, temp);
15394 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15398 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15400 rtx op0 = operands[0], op1 = operands[1];
15401 unsigned int align = GET_MODE_ALIGNMENT (mode);
15403 /* Force constants other than zero into memory. We do not know how
15404 the instructions used to build constants modify the upper 64 bits
15405 of the register, once we have that information we may be able
15406 to handle some of them more efficiently. */
15407 if (can_create_pseudo_p ()
15408 && register_operand (op0, mode)
15409 && (CONSTANT_P (op1)
15410 || (GET_CODE (op1) == SUBREG
15411 && CONSTANT_P (SUBREG_REG (op1))))
15412 && !standard_sse_constant_p (op1))
15413 op1 = validize_mem (force_const_mem (mode, op1));
15415 /* We need to check memory alignment for SSE mode since attribute
15416 can make operands unaligned. */
15417 if (can_create_pseudo_p ()
15418 && SSE_REG_MODE_P (mode)
15419 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15420 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15424 /* ix86_expand_vector_move_misalign() does not like constants ... */
15425 if (CONSTANT_P (op1)
15426 || (GET_CODE (op1) == SUBREG
15427 && CONSTANT_P (SUBREG_REG (op1))))
15428 op1 = validize_mem (force_const_mem (mode, op1));
15430 /* ... nor both arguments in memory. */
15431 if (!register_operand (op0, mode)
15432 && !register_operand (op1, mode))
15433 op1 = force_reg (mode, op1);
15435 tmp[0] = op0; tmp[1] = op1;
15436 ix86_expand_vector_move_misalign (mode, tmp);
15440 /* Make operand1 a register if it isn't already. */
15441 if (can_create_pseudo_p ()
15442 && !register_operand (op0, mode)
15443 && !register_operand (op1, mode))
15445 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15449 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15452 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15453 straight to ix86_expand_vector_move. */
15454 /* Code generation for scalar reg-reg moves of single and double precision data:
15455 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15459 if (x86_sse_partial_reg_dependency == true)
15464 Code generation for scalar loads of double precision data:
15465 if (x86_sse_split_regs == true)
15466 movlpd mem, reg (gas syntax)
15470 Code generation for unaligned packed loads of single precision data
15471 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15472 if (x86_sse_unaligned_move_optimal)
15475 if (x86_sse_partial_reg_dependency == true)
15487 Code generation for unaligned packed loads of double precision data
15488 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15489 if (x86_sse_unaligned_move_optimal)
15492 if (x86_sse_split_regs == true)
15505 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15514 switch (GET_MODE_CLASS (mode))
15516 case MODE_VECTOR_INT:
15518 switch (GET_MODE_SIZE (mode))
15521 /* If we're optimizing for size, movups is the smallest. */
15522 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15524 op0 = gen_lowpart (V4SFmode, op0);
15525 op1 = gen_lowpart (V4SFmode, op1);
15526 emit_insn (gen_avx_movups (op0, op1));
15529 op0 = gen_lowpart (V16QImode, op0);
15530 op1 = gen_lowpart (V16QImode, op1);
15531 emit_insn (gen_avx_movdqu (op0, op1));
15534 op0 = gen_lowpart (V32QImode, op0);
15535 op1 = gen_lowpart (V32QImode, op1);
15536 emit_insn (gen_avx_movdqu256 (op0, op1));
15539 gcc_unreachable ();
15542 case MODE_VECTOR_FLOAT:
15543 op0 = gen_lowpart (mode, op0);
15544 op1 = gen_lowpart (mode, op1);
15549 emit_insn (gen_avx_movups (op0, op1));
15552 emit_insn (gen_avx_movups256 (op0, op1));
15555 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15557 op0 = gen_lowpart (V4SFmode, op0);
15558 op1 = gen_lowpart (V4SFmode, op1);
15559 emit_insn (gen_avx_movups (op0, op1));
15562 emit_insn (gen_avx_movupd (op0, op1));
15565 emit_insn (gen_avx_movupd256 (op0, op1));
15568 gcc_unreachable ();
15573 gcc_unreachable ();
15581 /* If we're optimizing for size, movups is the smallest. */
15582 if (optimize_insn_for_size_p ()
15583 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15585 op0 = gen_lowpart (V4SFmode, op0);
15586 op1 = gen_lowpart (V4SFmode, op1);
15587 emit_insn (gen_sse_movups (op0, op1));
15591 /* ??? If we have typed data, then it would appear that using
15592 movdqu is the only way to get unaligned data loaded with
15594 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15596 op0 = gen_lowpart (V16QImode, op0);
15597 op1 = gen_lowpart (V16QImode, op1);
15598 emit_insn (gen_sse2_movdqu (op0, op1));
15602 if (TARGET_SSE2 && mode == V2DFmode)
15606 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15608 op0 = gen_lowpart (V2DFmode, op0);
15609 op1 = gen_lowpart (V2DFmode, op1);
15610 emit_insn (gen_sse2_movupd (op0, op1));
15614 /* When SSE registers are split into halves, we can avoid
15615 writing to the top half twice. */
15616 if (TARGET_SSE_SPLIT_REGS)
15618 emit_clobber (op0);
15623 /* ??? Not sure about the best option for the Intel chips.
15624 The following would seem to satisfy; the register is
15625 entirely cleared, breaking the dependency chain. We
15626 then store to the upper half, with a dependency depth
15627 of one. A rumor has it that Intel recommends two movsd
15628 followed by an unpacklpd, but this is unconfirmed. And
15629 given that the dependency depth of the unpacklpd would
15630 still be one, I'm not sure why this would be better. */
15631 zero = CONST0_RTX (V2DFmode);
15634 m = adjust_address (op1, DFmode, 0);
15635 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15636 m = adjust_address (op1, DFmode, 8);
15637 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15641 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15643 op0 = gen_lowpart (V4SFmode, op0);
15644 op1 = gen_lowpart (V4SFmode, op1);
15645 emit_insn (gen_sse_movups (op0, op1));
15649 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15650 emit_move_insn (op0, CONST0_RTX (mode));
15652 emit_clobber (op0);
15654 if (mode != V4SFmode)
15655 op0 = gen_lowpart (V4SFmode, op0);
15656 m = adjust_address (op1, V2SFmode, 0);
15657 emit_insn (gen_sse_loadlps (op0, op0, m));
15658 m = adjust_address (op1, V2SFmode, 8);
15659 emit_insn (gen_sse_loadhps (op0, op0, m));
15662 else if (MEM_P (op0))
15664 /* If we're optimizing for size, movups is the smallest. */
15665 if (optimize_insn_for_size_p ()
15666 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15668 op0 = gen_lowpart (V4SFmode, op0);
15669 op1 = gen_lowpart (V4SFmode, op1);
15670 emit_insn (gen_sse_movups (op0, op1));
15674 /* ??? Similar to above, only less clear because of quote
15675 typeless stores unquote. */
15676 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15677 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15679 op0 = gen_lowpart (V16QImode, op0);
15680 op1 = gen_lowpart (V16QImode, op1);
15681 emit_insn (gen_sse2_movdqu (op0, op1));
15685 if (TARGET_SSE2 && mode == V2DFmode)
15687 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15689 op0 = gen_lowpart (V2DFmode, op0);
15690 op1 = gen_lowpart (V2DFmode, op1);
15691 emit_insn (gen_sse2_movupd (op0, op1));
15695 m = adjust_address (op0, DFmode, 0);
15696 emit_insn (gen_sse2_storelpd (m, op1));
15697 m = adjust_address (op0, DFmode, 8);
15698 emit_insn (gen_sse2_storehpd (m, op1));
15703 if (mode != V4SFmode)
15704 op1 = gen_lowpart (V4SFmode, op1);
15706 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15708 op0 = gen_lowpart (V4SFmode, op0);
15709 emit_insn (gen_sse_movups (op0, op1));
15713 m = adjust_address (op0, V2SFmode, 0);
15714 emit_insn (gen_sse_storelps (m, op1));
15715 m = adjust_address (op0, V2SFmode, 8);
15716 emit_insn (gen_sse_storehps (m, op1));
15721 gcc_unreachable ();
15724 /* Expand a push in MODE. This is some mode for which we do not support
15725 proper push instructions, at least from the registers that we expect
15726 the value to live in. */
15729 ix86_expand_push (enum machine_mode mode, rtx x)
15733 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15734 GEN_INT (-GET_MODE_SIZE (mode)),
15735 stack_pointer_rtx, 1, OPTAB_DIRECT);
15736 if (tmp != stack_pointer_rtx)
15737 emit_move_insn (stack_pointer_rtx, tmp);
15739 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15741 /* When we push an operand onto stack, it has to be aligned at least
15742 at the function argument boundary. However since we don't have
15743 the argument type, we can't determine the actual argument
15745 emit_move_insn (tmp, x);
15748 /* Helper function of ix86_fixup_binary_operands to canonicalize
15749 operand order. Returns true if the operands should be swapped. */
15752 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15755 rtx dst = operands[0];
15756 rtx src1 = operands[1];
15757 rtx src2 = operands[2];
15759 /* If the operation is not commutative, we can't do anything. */
15760 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15763 /* Highest priority is that src1 should match dst. */
15764 if (rtx_equal_p (dst, src1))
15766 if (rtx_equal_p (dst, src2))
15769 /* Next highest priority is that immediate constants come second. */
15770 if (immediate_operand (src2, mode))
15772 if (immediate_operand (src1, mode))
15775 /* Lowest priority is that memory references should come second. */
15785 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15786 destination to use for the operation. If different from the true
15787 destination in operands[0], a copy operation will be required. */
15790 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15793 rtx dst = operands[0];
15794 rtx src1 = operands[1];
15795 rtx src2 = operands[2];
15797 /* Canonicalize operand order. */
15798 if (ix86_swap_binary_operands_p (code, mode, operands))
15802 /* It is invalid to swap operands of different modes. */
15803 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15810 /* Both source operands cannot be in memory. */
15811 if (MEM_P (src1) && MEM_P (src2))
15813 /* Optimization: Only read from memory once. */
15814 if (rtx_equal_p (src1, src2))
15816 src2 = force_reg (mode, src2);
15820 src2 = force_reg (mode, src2);
15823 /* If the destination is memory, and we do not have matching source
15824 operands, do things in registers. */
15825 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15826 dst = gen_reg_rtx (mode);
15828 /* Source 1 cannot be a constant. */
15829 if (CONSTANT_P (src1))
15830 src1 = force_reg (mode, src1);
15832 /* Source 1 cannot be a non-matching memory. */
15833 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15834 src1 = force_reg (mode, src1);
15836 operands[1] = src1;
15837 operands[2] = src2;
15841 /* Similarly, but assume that the destination has already been
15842 set up properly. */
15845 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15846 enum machine_mode mode, rtx operands[])
15848 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15849 gcc_assert (dst == operands[0]);
15852 /* Attempt to expand a binary operator. Make the expansion closer to the
15853 actual machine, then just general_operand, which will allow 3 separate
15854 memory references (one output, two input) in a single insn. */
15857 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15860 rtx src1, src2, dst, op, clob;
15862 dst = ix86_fixup_binary_operands (code, mode, operands);
15863 src1 = operands[1];
15864 src2 = operands[2];
15866 /* Emit the instruction. */
15868 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15869 if (reload_in_progress)
15871 /* Reload doesn't know about the flags register, and doesn't know that
15872 it doesn't want to clobber it. We can only do this with PLUS. */
15873 gcc_assert (code == PLUS);
15876 else if (reload_completed
15878 && !rtx_equal_p (dst, src1))
15880 /* This is going to be an LEA; avoid splitting it later. */
15885 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15886 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15889 /* Fix up the destination if needed. */
15890 if (dst != operands[0])
15891 emit_move_insn (operands[0], dst);
15894 /* Return TRUE or FALSE depending on whether the binary operator meets the
15895 appropriate constraints. */
15898 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15901 rtx dst = operands[0];
15902 rtx src1 = operands[1];
15903 rtx src2 = operands[2];
15905 /* Both source operands cannot be in memory. */
15906 if (MEM_P (src1) && MEM_P (src2))
15909 /* Canonicalize operand order for commutative operators. */
15910 if (ix86_swap_binary_operands_p (code, mode, operands))
15917 /* If the destination is memory, we must have a matching source operand. */
15918 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15921 /* Source 1 cannot be a constant. */
15922 if (CONSTANT_P (src1))
15925 /* Source 1 cannot be a non-matching memory. */
15926 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15928 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15929 return (code == AND
15932 || (TARGET_64BIT && mode == DImode))
15933 && CONST_INT_P (src2)
15934 && (INTVAL (src2) == 0xff
15935 || INTVAL (src2) == 0xffff));
15941 /* Attempt to expand a unary operator. Make the expansion closer to the
15942 actual machine, then just general_operand, which will allow 2 separate
15943 memory references (one output, one input) in a single insn. */
15946 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15949 int matching_memory;
15950 rtx src, dst, op, clob;
15955 /* If the destination is memory, and we do not have matching source
15956 operands, do things in registers. */
15957 matching_memory = 0;
15960 if (rtx_equal_p (dst, src))
15961 matching_memory = 1;
15963 dst = gen_reg_rtx (mode);
15966 /* When source operand is memory, destination must match. */
15967 if (MEM_P (src) && !matching_memory)
15968 src = force_reg (mode, src);
15970 /* Emit the instruction. */
15972 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15973 if (reload_in_progress || code == NOT)
15975 /* Reload doesn't know about the flags register, and doesn't know that
15976 it doesn't want to clobber it. */
15977 gcc_assert (code == NOT);
15982 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15983 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15986 /* Fix up the destination if needed. */
15987 if (dst != operands[0])
15988 emit_move_insn (operands[0], dst);
15991 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15992 divisor are within the the range [0-255]. */
15995 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15998 rtx end_label, qimode_label;
15999 rtx insn, div, mod;
16000 rtx scratch, tmp0, tmp1, tmp2;
16001 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16002 rtx (*gen_zero_extend) (rtx, rtx);
16003 rtx (*gen_test_ccno_1) (rtx, rtx);
16008 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16009 gen_test_ccno_1 = gen_testsi_ccno_1;
16010 gen_zero_extend = gen_zero_extendqisi2;
16013 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16014 gen_test_ccno_1 = gen_testdi_ccno_1;
16015 gen_zero_extend = gen_zero_extendqidi2;
16018 gcc_unreachable ();
16021 end_label = gen_label_rtx ();
16022 qimode_label = gen_label_rtx ();
16024 scratch = gen_reg_rtx (mode);
16026 /* Use 8bit unsigned divimod if dividend and divisor are within the
16027 the range [0-255]. */
16028 emit_move_insn (scratch, operands[2]);
16029 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
16030 scratch, 1, OPTAB_DIRECT);
16031 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
16032 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
16033 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
16034 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
16035 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
16037 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
16038 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16039 JUMP_LABEL (insn) = qimode_label;
16041 /* Generate original signed/unsigned divimod. */
16042 div = gen_divmod4_1 (operands[0], operands[1],
16043 operands[2], operands[3]);
16046 /* Branch to the end. */
16047 emit_jump_insn (gen_jump (end_label));
16050 /* Generate 8bit unsigned divide. */
16051 emit_label (qimode_label);
16052 /* Don't use operands[0] for result of 8bit divide since not all
16053 registers support QImode ZERO_EXTRACT. */
16054 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
16055 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
16056 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
16057 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
16061 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
16062 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
16066 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
16067 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
16070 /* Extract remainder from AH. */
16071 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
16072 if (REG_P (operands[1]))
16073 insn = emit_move_insn (operands[1], tmp1);
16076 /* Need a new scratch register since the old one has result
16078 scratch = gen_reg_rtx (mode);
16079 emit_move_insn (scratch, tmp1);
16080 insn = emit_move_insn (operands[1], scratch);
16082 set_unique_reg_note (insn, REG_EQUAL, mod);
16084 /* Zero extend quotient from AL. */
16085 tmp1 = gen_lowpart (QImode, tmp0);
16086 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
16087 set_unique_reg_note (insn, REG_EQUAL, div);
16089 emit_label (end_label);
16092 #define LEA_SEARCH_THRESHOLD 12
16094 /* Search backward for non-agu definition of register number REGNO1
16095 or register number REGNO2 in INSN's basic block until
16096 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16097 2. Reach BB boundary, or
16098 3. Reach agu definition.
16099 Returns the distance between the non-agu definition point and INSN.
16100 If no definition point, returns -1. */
16103 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
16106 basic_block bb = BLOCK_FOR_INSN (insn);
16109 enum attr_type insn_type;
16111 if (insn != BB_HEAD (bb))
16113 rtx prev = PREV_INSN (insn);
16114 while (prev && distance < LEA_SEARCH_THRESHOLD)
16116 if (NONDEBUG_INSN_P (prev))
16119 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16120 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16121 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16122 && (regno1 == DF_REF_REGNO (*def_rec)
16123 || regno2 == DF_REF_REGNO (*def_rec)))
16125 insn_type = get_attr_type (prev);
16126 if (insn_type != TYPE_LEA)
16130 if (prev == BB_HEAD (bb))
16132 prev = PREV_INSN (prev);
16136 if (distance < LEA_SEARCH_THRESHOLD)
16140 bool simple_loop = false;
16142 FOR_EACH_EDGE (e, ei, bb->preds)
16145 simple_loop = true;
16151 rtx prev = BB_END (bb);
16154 && distance < LEA_SEARCH_THRESHOLD)
16156 if (NONDEBUG_INSN_P (prev))
16159 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
16160 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16161 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16162 && (regno1 == DF_REF_REGNO (*def_rec)
16163 || regno2 == DF_REF_REGNO (*def_rec)))
16165 insn_type = get_attr_type (prev);
16166 if (insn_type != TYPE_LEA)
16170 prev = PREV_INSN (prev);
16178 /* get_attr_type may modify recog data. We want to make sure
16179 that recog data is valid for instruction INSN, on which
16180 distance_non_agu_define is called. INSN is unchanged here. */
16181 extract_insn_cached (insn);
16185 /* Return the distance between INSN and the next insn that uses
16186 register number REGNO0 in memory address. Return -1 if no such
16187 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16190 distance_agu_use (unsigned int regno0, rtx insn)
16192 basic_block bb = BLOCK_FOR_INSN (insn);
16197 if (insn != BB_END (bb))
16199 rtx next = NEXT_INSN (insn);
16200 while (next && distance < LEA_SEARCH_THRESHOLD)
16202 if (NONDEBUG_INSN_P (next))
16206 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16207 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16208 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16209 && regno0 == DF_REF_REGNO (*use_rec))
16211 /* Return DISTANCE if OP0 is used in memory
16212 address in NEXT. */
16216 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16217 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16218 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16219 && regno0 == DF_REF_REGNO (*def_rec))
16221 /* Return -1 if OP0 is set in NEXT. */
16225 if (next == BB_END (bb))
16227 next = NEXT_INSN (next);
16231 if (distance < LEA_SEARCH_THRESHOLD)
16235 bool simple_loop = false;
16237 FOR_EACH_EDGE (e, ei, bb->succs)
16240 simple_loop = true;
16246 rtx next = BB_HEAD (bb);
16249 && distance < LEA_SEARCH_THRESHOLD)
16251 if (NONDEBUG_INSN_P (next))
16255 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16256 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16257 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16258 && regno0 == DF_REF_REGNO (*use_rec))
16260 /* Return DISTANCE if OP0 is used in memory
16261 address in NEXT. */
16265 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16266 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16267 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16268 && regno0 == DF_REF_REGNO (*def_rec))
16270 /* Return -1 if OP0 is set in NEXT. */
16275 next = NEXT_INSN (next);
16283 /* Define this macro to tune LEA priority vs ADD, it take effect when
16284 there is a dilemma of choicing LEA or ADD
16285 Negative value: ADD is more preferred than LEA
16287 Positive value: LEA is more preferred than ADD*/
16288 #define IX86_LEA_PRIORITY 2
16290 /* Return true if it is ok to optimize an ADD operation to LEA
16291 operation to avoid flag register consumation. For most processors,
16292 ADD is faster than LEA. For the processors like ATOM, if the
16293 destination register of LEA holds an actual address which will be
16294 used soon, LEA is better and otherwise ADD is better. */
16297 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16299 unsigned int regno0 = true_regnum (operands[0]);
16300 unsigned int regno1 = true_regnum (operands[1]);
16301 unsigned int regno2 = true_regnum (operands[2]);
16303 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16304 if (regno0 != regno1 && regno0 != regno2)
16307 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16311 int dist_define, dist_use;
16313 /* Return false if REGNO0 isn't used in memory address. */
16314 dist_use = distance_agu_use (regno0, insn);
16318 dist_define = distance_non_agu_define (regno1, regno2, insn);
16319 if (dist_define <= 0)
16322 /* If this insn has both backward non-agu dependence and forward
16323 agu dependence, the one with short distance take effect. */
16324 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16331 /* Return true if destination reg of SET_BODY is shift count of
16335 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16341 /* Retrieve destination of SET_BODY. */
16342 switch (GET_CODE (set_body))
16345 set_dest = SET_DEST (set_body);
16346 if (!set_dest || !REG_P (set_dest))
16350 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16351 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16359 /* Retrieve shift count of USE_BODY. */
16360 switch (GET_CODE (use_body))
16363 shift_rtx = XEXP (use_body, 1);
16366 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16367 if (ix86_dep_by_shift_count_body (set_body,
16368 XVECEXP (use_body, 0, i)))
16376 && (GET_CODE (shift_rtx) == ASHIFT
16377 || GET_CODE (shift_rtx) == LSHIFTRT
16378 || GET_CODE (shift_rtx) == ASHIFTRT
16379 || GET_CODE (shift_rtx) == ROTATE
16380 || GET_CODE (shift_rtx) == ROTATERT))
16382 rtx shift_count = XEXP (shift_rtx, 1);
16384 /* Return true if shift count is dest of SET_BODY. */
16385 if (REG_P (shift_count)
16386 && true_regnum (set_dest) == true_regnum (shift_count))
16393 /* Return true if destination reg of SET_INSN is shift count of
16397 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16399 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16400 PATTERN (use_insn));
16403 /* Return TRUE or FALSE depending on whether the unary operator meets the
16404 appropriate constraints. */
16407 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16408 enum machine_mode mode ATTRIBUTE_UNUSED,
16409 rtx operands[2] ATTRIBUTE_UNUSED)
16411 /* If one of operands is memory, source and destination must match. */
16412 if ((MEM_P (operands[0])
16413 || MEM_P (operands[1]))
16414 && ! rtx_equal_p (operands[0], operands[1]))
16419 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16420 are ok, keeping in mind the possible movddup alternative. */
16423 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16425 if (MEM_P (operands[0]))
16426 return rtx_equal_p (operands[0], operands[1 + high]);
16427 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16428 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16432 /* Post-reload splitter for converting an SF or DFmode value in an
16433 SSE register into an unsigned SImode. */
16436 ix86_split_convert_uns_si_sse (rtx operands[])
16438 enum machine_mode vecmode;
16439 rtx value, large, zero_or_two31, input, two31, x;
16441 large = operands[1];
16442 zero_or_two31 = operands[2];
16443 input = operands[3];
16444 two31 = operands[4];
16445 vecmode = GET_MODE (large);
16446 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16448 /* Load up the value into the low element. We must ensure that the other
16449 elements are valid floats -- zero is the easiest such value. */
16452 if (vecmode == V4SFmode)
16453 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16455 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16459 input = gen_rtx_REG (vecmode, REGNO (input));
16460 emit_move_insn (value, CONST0_RTX (vecmode));
16461 if (vecmode == V4SFmode)
16462 emit_insn (gen_sse_movss (value, value, input));
16464 emit_insn (gen_sse2_movsd (value, value, input));
16467 emit_move_insn (large, two31);
16468 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16470 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16471 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16473 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16474 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16476 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16477 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16479 large = gen_rtx_REG (V4SImode, REGNO (large));
16480 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16482 x = gen_rtx_REG (V4SImode, REGNO (value));
16483 if (vecmode == V4SFmode)
16484 emit_insn (gen_sse2_cvttps2dq (x, value));
16486 emit_insn (gen_sse2_cvttpd2dq (x, value));
16489 emit_insn (gen_xorv4si3 (value, value, large));
16492 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16493 Expects the 64-bit DImode to be supplied in a pair of integral
16494 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16495 -mfpmath=sse, !optimize_size only. */
16498 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16500 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16501 rtx int_xmm, fp_xmm;
16502 rtx biases, exponents;
16505 int_xmm = gen_reg_rtx (V4SImode);
16506 if (TARGET_INTER_UNIT_MOVES)
16507 emit_insn (gen_movdi_to_sse (int_xmm, input));
16508 else if (TARGET_SSE_SPLIT_REGS)
16510 emit_clobber (int_xmm);
16511 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16515 x = gen_reg_rtx (V2DImode);
16516 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16517 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16520 x = gen_rtx_CONST_VECTOR (V4SImode,
16521 gen_rtvec (4, GEN_INT (0x43300000UL),
16522 GEN_INT (0x45300000UL),
16523 const0_rtx, const0_rtx));
16524 exponents = validize_mem (force_const_mem (V4SImode, x));
16526 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16527 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16529 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16530 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16531 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16532 (0x1.0p84 + double(fp_value_hi_xmm)).
16533 Note these exponents differ by 32. */
16535 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16537 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16538 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16539 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16540 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16541 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16542 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16543 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16544 biases = validize_mem (force_const_mem (V2DFmode, biases));
16545 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16547 /* Add the upper and lower DFmode values together. */
16549 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16552 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16553 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16554 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16557 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16560 /* Not used, but eases macroization of patterns. */
16562 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16563 rtx input ATTRIBUTE_UNUSED)
16565 gcc_unreachable ();
16568 /* Convert an unsigned SImode value into a DFmode. Only currently used
16569 for SSE, but applicable anywhere. */
16572 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16574 REAL_VALUE_TYPE TWO31r;
16577 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16578 NULL, 1, OPTAB_DIRECT);
16580 fp = gen_reg_rtx (DFmode);
16581 emit_insn (gen_floatsidf2 (fp, x));
16583 real_ldexp (&TWO31r, &dconst1, 31);
16584 x = const_double_from_real_value (TWO31r, DFmode);
16586 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16588 emit_move_insn (target, x);
16591 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16592 32-bit mode; otherwise we have a direct convert instruction. */
16595 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16597 REAL_VALUE_TYPE TWO32r;
16598 rtx fp_lo, fp_hi, x;
16600 fp_lo = gen_reg_rtx (DFmode);
16601 fp_hi = gen_reg_rtx (DFmode);
16603 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16605 real_ldexp (&TWO32r, &dconst1, 32);
16606 x = const_double_from_real_value (TWO32r, DFmode);
16607 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16609 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16611 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16614 emit_move_insn (target, x);
16617 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16618 For x86_32, -mfpmath=sse, !optimize_size only. */
16620 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16622 REAL_VALUE_TYPE ONE16r;
16623 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16625 real_ldexp (&ONE16r, &dconst1, 16);
16626 x = const_double_from_real_value (ONE16r, SFmode);
16627 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16628 NULL, 0, OPTAB_DIRECT);
16629 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16630 NULL, 0, OPTAB_DIRECT);
16631 fp_hi = gen_reg_rtx (SFmode);
16632 fp_lo = gen_reg_rtx (SFmode);
16633 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16634 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16635 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16637 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16639 if (!rtx_equal_p (target, fp_hi))
16640 emit_move_insn (target, fp_hi);
16643 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16644 then replicate the value for all elements of the vector
16648 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16655 v = gen_rtvec (4, value, value, value, value);
16656 return gen_rtx_CONST_VECTOR (V4SImode, v);
16660 v = gen_rtvec (2, value, value);
16661 return gen_rtx_CONST_VECTOR (V2DImode, v);
16665 v = gen_rtvec (8, value, value, value, value,
16666 value, value, value, value);
16668 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16669 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16670 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16671 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16672 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16676 v = gen_rtvec (4, value, value, value, value);
16678 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16679 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16680 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16684 v = gen_rtvec (4, value, value, value, value);
16686 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16687 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16688 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16692 v = gen_rtvec (2, value, value);
16694 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16695 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16698 gcc_unreachable ();
16702 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16703 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16704 for an SSE register. If VECT is true, then replicate the mask for
16705 all elements of the vector register. If INVERT is true, then create
16706 a mask excluding the sign bit. */
16709 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16711 enum machine_mode vec_mode, imode;
16712 HOST_WIDE_INT hi, lo;
16717 /* Find the sign bit, sign extended to 2*HWI. */
16724 mode = GET_MODE_INNER (mode);
16726 lo = 0x80000000, hi = lo < 0;
16733 mode = GET_MODE_INNER (mode);
16735 if (HOST_BITS_PER_WIDE_INT >= 64)
16736 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16738 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16743 vec_mode = VOIDmode;
16744 if (HOST_BITS_PER_WIDE_INT >= 64)
16747 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16754 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16758 lo = ~lo, hi = ~hi;
16764 mask = immed_double_const (lo, hi, imode);
16766 vec = gen_rtvec (2, v, mask);
16767 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16768 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16775 gcc_unreachable ();
16779 lo = ~lo, hi = ~hi;
16781 /* Force this value into the low part of a fp vector constant. */
16782 mask = immed_double_const (lo, hi, imode);
16783 mask = gen_lowpart (mode, mask);
16785 if (vec_mode == VOIDmode)
16786 return force_reg (mode, mask);
16788 v = ix86_build_const_vector (vec_mode, vect, mask);
16789 return force_reg (vec_mode, v);
16792 /* Generate code for floating point ABS or NEG. */
16795 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16798 rtx mask, set, dst, src;
16799 bool use_sse = false;
16800 bool vector_mode = VECTOR_MODE_P (mode);
16801 enum machine_mode vmode = mode;
16805 else if (mode == TFmode)
16807 else if (TARGET_SSE_MATH)
16809 use_sse = SSE_FLOAT_MODE_P (mode);
16810 if (mode == SFmode)
16812 else if (mode == DFmode)
16816 /* NEG and ABS performed with SSE use bitwise mask operations.
16817 Create the appropriate mask now. */
16819 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16826 set = gen_rtx_fmt_e (code, mode, src);
16827 set = gen_rtx_SET (VOIDmode, dst, set);
16834 use = gen_rtx_USE (VOIDmode, mask);
16836 par = gen_rtvec (2, set, use);
16839 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16840 par = gen_rtvec (3, set, use, clob);
16842 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16848 /* Expand a copysign operation. Special case operand 0 being a constant. */
16851 ix86_expand_copysign (rtx operands[])
16853 enum machine_mode mode, vmode;
16854 rtx dest, op0, op1, mask, nmask;
16856 dest = operands[0];
16860 mode = GET_MODE (dest);
16862 if (mode == SFmode)
16864 else if (mode == DFmode)
16869 if (GET_CODE (op0) == CONST_DOUBLE)
16871 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16873 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16874 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16876 if (mode == SFmode || mode == DFmode)
16878 if (op0 == CONST0_RTX (mode))
16879 op0 = CONST0_RTX (vmode);
16882 rtx v = ix86_build_const_vector (vmode, false, op0);
16884 op0 = force_reg (vmode, v);
16887 else if (op0 != CONST0_RTX (mode))
16888 op0 = force_reg (mode, op0);
16890 mask = ix86_build_signbit_mask (vmode, 0, 0);
16892 if (mode == SFmode)
16893 copysign_insn = gen_copysignsf3_const;
16894 else if (mode == DFmode)
16895 copysign_insn = gen_copysigndf3_const;
16897 copysign_insn = gen_copysigntf3_const;
16899 emit_insn (copysign_insn (dest, op0, op1, mask));
16903 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16905 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16906 mask = ix86_build_signbit_mask (vmode, 0, 0);
16908 if (mode == SFmode)
16909 copysign_insn = gen_copysignsf3_var;
16910 else if (mode == DFmode)
16911 copysign_insn = gen_copysigndf3_var;
16913 copysign_insn = gen_copysigntf3_var;
16915 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16919 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16920 be a constant, and so has already been expanded into a vector constant. */
16923 ix86_split_copysign_const (rtx operands[])
16925 enum machine_mode mode, vmode;
16926 rtx dest, op0, mask, x;
16928 dest = operands[0];
16930 mask = operands[3];
16932 mode = GET_MODE (dest);
16933 vmode = GET_MODE (mask);
16935 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16936 x = gen_rtx_AND (vmode, dest, mask);
16937 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16939 if (op0 != CONST0_RTX (vmode))
16941 x = gen_rtx_IOR (vmode, dest, op0);
16942 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16946 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16947 so we have to do two masks. */
16950 ix86_split_copysign_var (rtx operands[])
16952 enum machine_mode mode, vmode;
16953 rtx dest, scratch, op0, op1, mask, nmask, x;
16955 dest = operands[0];
16956 scratch = operands[1];
16959 nmask = operands[4];
16960 mask = operands[5];
16962 mode = GET_MODE (dest);
16963 vmode = GET_MODE (mask);
16965 if (rtx_equal_p (op0, op1))
16967 /* Shouldn't happen often (it's useless, obviously), but when it does
16968 we'd generate incorrect code if we continue below. */
16969 emit_move_insn (dest, op0);
16973 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16975 gcc_assert (REGNO (op1) == REGNO (scratch));
16977 x = gen_rtx_AND (vmode, scratch, mask);
16978 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16981 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16982 x = gen_rtx_NOT (vmode, dest);
16983 x = gen_rtx_AND (vmode, x, op0);
16984 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16988 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16990 x = gen_rtx_AND (vmode, scratch, mask);
16992 else /* alternative 2,4 */
16994 gcc_assert (REGNO (mask) == REGNO (scratch));
16995 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16996 x = gen_rtx_AND (vmode, scratch, op1);
16998 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
17000 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
17002 dest = simplify_gen_subreg (vmode, op0, mode, 0);
17003 x = gen_rtx_AND (vmode, dest, nmask);
17005 else /* alternative 3,4 */
17007 gcc_assert (REGNO (nmask) == REGNO (dest));
17009 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
17010 x = gen_rtx_AND (vmode, dest, op0);
17012 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17015 x = gen_rtx_IOR (vmode, dest, scratch);
17016 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17019 /* Return TRUE or FALSE depending on whether the first SET in INSN
17020 has source and destination with matching CC modes, and that the
17021 CC mode is at least as constrained as REQ_MODE. */
17024 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
17027 enum machine_mode set_mode;
17029 set = PATTERN (insn);
17030 if (GET_CODE (set) == PARALLEL)
17031 set = XVECEXP (set, 0, 0);
17032 gcc_assert (GET_CODE (set) == SET);
17033 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
17035 set_mode = GET_MODE (SET_DEST (set));
17039 if (req_mode != CCNOmode
17040 && (req_mode != CCmode
17041 || XEXP (SET_SRC (set), 1) != const0_rtx))
17045 if (req_mode == CCGCmode)
17049 if (req_mode == CCGOCmode || req_mode == CCNOmode)
17053 if (req_mode == CCZmode)
17064 gcc_unreachable ();
17067 return GET_MODE (SET_SRC (set)) == set_mode;
17070 /* Generate insn patterns to do an integer compare of OPERANDS. */
17073 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
17075 enum machine_mode cmpmode;
17078 cmpmode = SELECT_CC_MODE (code, op0, op1);
17079 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
17081 /* This is very simple, but making the interface the same as in the
17082 FP case makes the rest of the code easier. */
17083 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
17084 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
17086 /* Return the test that should be put into the flags user, i.e.
17087 the bcc, scc, or cmov instruction. */
17088 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
17091 /* Figure out whether to use ordered or unordered fp comparisons.
17092 Return the appropriate mode to use. */
17095 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
17097 /* ??? In order to make all comparisons reversible, we do all comparisons
17098 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17099 all forms trapping and nontrapping comparisons, we can make inequality
17100 comparisons trapping again, since it results in better code when using
17101 FCOM based compares. */
17102 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
17106 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
17108 enum machine_mode mode = GET_MODE (op0);
17110 if (SCALAR_FLOAT_MODE_P (mode))
17112 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17113 return ix86_fp_compare_mode (code);
17118 /* Only zero flag is needed. */
17119 case EQ: /* ZF=0 */
17120 case NE: /* ZF!=0 */
17122 /* Codes needing carry flag. */
17123 case GEU: /* CF=0 */
17124 case LTU: /* CF=1 */
17125 /* Detect overflow checks. They need just the carry flag. */
17126 if (GET_CODE (op0) == PLUS
17127 && rtx_equal_p (op1, XEXP (op0, 0)))
17131 case GTU: /* CF=0 & ZF=0 */
17132 case LEU: /* CF=1 | ZF=1 */
17133 /* Detect overflow checks. They need just the carry flag. */
17134 if (GET_CODE (op0) == MINUS
17135 && rtx_equal_p (op1, XEXP (op0, 0)))
17139 /* Codes possibly doable only with sign flag when
17140 comparing against zero. */
17141 case GE: /* SF=OF or SF=0 */
17142 case LT: /* SF<>OF or SF=1 */
17143 if (op1 == const0_rtx)
17146 /* For other cases Carry flag is not required. */
17148 /* Codes doable only with sign flag when comparing
17149 against zero, but we miss jump instruction for it
17150 so we need to use relational tests against overflow
17151 that thus needs to be zero. */
17152 case GT: /* ZF=0 & SF=OF */
17153 case LE: /* ZF=1 | SF<>OF */
17154 if (op1 == const0_rtx)
17158 /* strcmp pattern do (use flags) and combine may ask us for proper
17163 gcc_unreachable ();
17167 /* Return the fixed registers used for condition codes. */
17170 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
17177 /* If two condition code modes are compatible, return a condition code
17178 mode which is compatible with both. Otherwise, return
17181 static enum machine_mode
17182 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
17187 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
17190 if ((m1 == CCGCmode && m2 == CCGOCmode)
17191 || (m1 == CCGOCmode && m2 == CCGCmode))
17197 gcc_unreachable ();
17227 /* These are only compatible with themselves, which we already
17234 /* Return a comparison we can do and that it is equivalent to
17235 swap_condition (code) apart possibly from orderedness.
17236 But, never change orderedness if TARGET_IEEE_FP, returning
17237 UNKNOWN in that case if necessary. */
17239 static enum rtx_code
17240 ix86_fp_swap_condition (enum rtx_code code)
17244 case GT: /* GTU - CF=0 & ZF=0 */
17245 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17246 case GE: /* GEU - CF=0 */
17247 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17248 case UNLT: /* LTU - CF=1 */
17249 return TARGET_IEEE_FP ? UNKNOWN : GT;
17250 case UNLE: /* LEU - CF=1 | ZF=1 */
17251 return TARGET_IEEE_FP ? UNKNOWN : GE;
17253 return swap_condition (code);
17257 /* Return cost of comparison CODE using the best strategy for performance.
17258 All following functions do use number of instructions as a cost metrics.
17259 In future this should be tweaked to compute bytes for optimize_size and
17260 take into account performance of various instructions on various CPUs. */
17263 ix86_fp_comparison_cost (enum rtx_code code)
17267 /* The cost of code using bit-twiddling on %ah. */
17284 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17288 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17291 gcc_unreachable ();
17294 switch (ix86_fp_comparison_strategy (code))
17296 case IX86_FPCMP_COMI:
17297 return arith_cost > 4 ? 3 : 2;
17298 case IX86_FPCMP_SAHF:
17299 return arith_cost > 4 ? 4 : 3;
17305 /* Return strategy to use for floating-point. We assume that fcomi is always
17306 preferrable where available, since that is also true when looking at size
17307 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17309 enum ix86_fpcmp_strategy
17310 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17312 /* Do fcomi/sahf based test when profitable. */
17315 return IX86_FPCMP_COMI;
17317 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17318 return IX86_FPCMP_SAHF;
17320 return IX86_FPCMP_ARITH;
17323 /* Swap, force into registers, or otherwise massage the two operands
17324 to a fp comparison. The operands are updated in place; the new
17325 comparison code is returned. */
17327 static enum rtx_code
17328 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17330 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17331 rtx op0 = *pop0, op1 = *pop1;
17332 enum machine_mode op_mode = GET_MODE (op0);
17333 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17335 /* All of the unordered compare instructions only work on registers.
17336 The same is true of the fcomi compare instructions. The XFmode
17337 compare instructions require registers except when comparing
17338 against zero or when converting operand 1 from fixed point to
17342 && (fpcmp_mode == CCFPUmode
17343 || (op_mode == XFmode
17344 && ! (standard_80387_constant_p (op0) == 1
17345 || standard_80387_constant_p (op1) == 1)
17346 && GET_CODE (op1) != FLOAT)
17347 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17349 op0 = force_reg (op_mode, op0);
17350 op1 = force_reg (op_mode, op1);
17354 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17355 things around if they appear profitable, otherwise force op0
17356 into a register. */
17358 if (standard_80387_constant_p (op0) == 0
17360 && ! (standard_80387_constant_p (op1) == 0
17363 enum rtx_code new_code = ix86_fp_swap_condition (code);
17364 if (new_code != UNKNOWN)
17367 tmp = op0, op0 = op1, op1 = tmp;
17373 op0 = force_reg (op_mode, op0);
17375 if (CONSTANT_P (op1))
17377 int tmp = standard_80387_constant_p (op1);
17379 op1 = validize_mem (force_const_mem (op_mode, op1));
17383 op1 = force_reg (op_mode, op1);
17386 op1 = force_reg (op_mode, op1);
17390 /* Try to rearrange the comparison to make it cheaper. */
17391 if (ix86_fp_comparison_cost (code)
17392 > ix86_fp_comparison_cost (swap_condition (code))
17393 && (REG_P (op1) || can_create_pseudo_p ()))
17396 tmp = op0, op0 = op1, op1 = tmp;
17397 code = swap_condition (code);
17399 op0 = force_reg (op_mode, op0);
17407 /* Convert comparison codes we use to represent FP comparison to integer
17408 code that will result in proper branch. Return UNKNOWN if no such code
17412 ix86_fp_compare_code_to_integer (enum rtx_code code)
17441 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17444 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17446 enum machine_mode fpcmp_mode, intcmp_mode;
17449 fpcmp_mode = ix86_fp_compare_mode (code);
17450 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17452 /* Do fcomi/sahf based test when profitable. */
17453 switch (ix86_fp_comparison_strategy (code))
17455 case IX86_FPCMP_COMI:
17456 intcmp_mode = fpcmp_mode;
17457 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17458 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17463 case IX86_FPCMP_SAHF:
17464 intcmp_mode = fpcmp_mode;
17465 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17466 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17470 scratch = gen_reg_rtx (HImode);
17471 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17472 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17475 case IX86_FPCMP_ARITH:
17476 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17477 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17478 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17480 scratch = gen_reg_rtx (HImode);
17481 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17483 /* In the unordered case, we have to check C2 for NaN's, which
17484 doesn't happen to work out to anything nice combination-wise.
17485 So do some bit twiddling on the value we've got in AH to come
17486 up with an appropriate set of condition codes. */
17488 intcmp_mode = CCNOmode;
17493 if (code == GT || !TARGET_IEEE_FP)
17495 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17500 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17501 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17502 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17503 intcmp_mode = CCmode;
17509 if (code == LT && TARGET_IEEE_FP)
17511 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17512 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17513 intcmp_mode = CCmode;
17518 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17524 if (code == GE || !TARGET_IEEE_FP)
17526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17532 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17538 if (code == LE && TARGET_IEEE_FP)
17540 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17541 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17542 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17543 intcmp_mode = CCmode;
17548 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17554 if (code == EQ && TARGET_IEEE_FP)
17556 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17557 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17558 intcmp_mode = CCmode;
17563 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17569 if (code == NE && TARGET_IEEE_FP)
17571 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17572 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17578 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17584 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17588 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17593 gcc_unreachable ();
17601 /* Return the test that should be put into the flags user, i.e.
17602 the bcc, scc, or cmov instruction. */
17603 return gen_rtx_fmt_ee (code, VOIDmode,
17604 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17609 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17613 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17614 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17616 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17618 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17619 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17622 ret = ix86_expand_int_compare (code, op0, op1);
17628 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17630 enum machine_mode mode = GET_MODE (op0);
17642 tmp = ix86_expand_compare (code, op0, op1);
17643 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17644 gen_rtx_LABEL_REF (VOIDmode, label),
17646 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17653 /* Expand DImode branch into multiple compare+branch. */
17655 rtx lo[2], hi[2], label2;
17656 enum rtx_code code1, code2, code3;
17657 enum machine_mode submode;
17659 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17661 tmp = op0, op0 = op1, op1 = tmp;
17662 code = swap_condition (code);
17665 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17666 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17668 submode = mode == DImode ? SImode : DImode;
17670 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17671 avoid two branches. This costs one extra insn, so disable when
17672 optimizing for size. */
17674 if ((code == EQ || code == NE)
17675 && (!optimize_insn_for_size_p ()
17676 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17681 if (hi[1] != const0_rtx)
17682 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17683 NULL_RTX, 0, OPTAB_WIDEN);
17686 if (lo[1] != const0_rtx)
17687 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17688 NULL_RTX, 0, OPTAB_WIDEN);
17690 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17691 NULL_RTX, 0, OPTAB_WIDEN);
17693 ix86_expand_branch (code, tmp, const0_rtx, label);
17697 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17698 op1 is a constant and the low word is zero, then we can just
17699 examine the high word. Similarly for low word -1 and
17700 less-or-equal-than or greater-than. */
17702 if (CONST_INT_P (hi[1]))
17705 case LT: case LTU: case GE: case GEU:
17706 if (lo[1] == const0_rtx)
17708 ix86_expand_branch (code, hi[0], hi[1], label);
17712 case LE: case LEU: case GT: case GTU:
17713 if (lo[1] == constm1_rtx)
17715 ix86_expand_branch (code, hi[0], hi[1], label);
17723 /* Otherwise, we need two or three jumps. */
17725 label2 = gen_label_rtx ();
17728 code2 = swap_condition (code);
17729 code3 = unsigned_condition (code);
17733 case LT: case GT: case LTU: case GTU:
17736 case LE: code1 = LT; code2 = GT; break;
17737 case GE: code1 = GT; code2 = LT; break;
17738 case LEU: code1 = LTU; code2 = GTU; break;
17739 case GEU: code1 = GTU; code2 = LTU; break;
17741 case EQ: code1 = UNKNOWN; code2 = NE; break;
17742 case NE: code2 = UNKNOWN; break;
17745 gcc_unreachable ();
17750 * if (hi(a) < hi(b)) goto true;
17751 * if (hi(a) > hi(b)) goto false;
17752 * if (lo(a) < lo(b)) goto true;
17756 if (code1 != UNKNOWN)
17757 ix86_expand_branch (code1, hi[0], hi[1], label);
17758 if (code2 != UNKNOWN)
17759 ix86_expand_branch (code2, hi[0], hi[1], label2);
17761 ix86_expand_branch (code3, lo[0], lo[1], label);
17763 if (code2 != UNKNOWN)
17764 emit_label (label2);
17769 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17774 /* Split branch based on floating point condition. */
17776 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17777 rtx target1, rtx target2, rtx tmp, rtx pushed)
17782 if (target2 != pc_rtx)
17785 code = reverse_condition_maybe_unordered (code);
17790 condition = ix86_expand_fp_compare (code, op1, op2,
17793 /* Remove pushed operand from stack. */
17795 ix86_free_from_memory (GET_MODE (pushed));
17797 i = emit_jump_insn (gen_rtx_SET
17799 gen_rtx_IF_THEN_ELSE (VOIDmode,
17800 condition, target1, target2)));
17801 if (split_branch_probability >= 0)
17802 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17806 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17810 gcc_assert (GET_MODE (dest) == QImode);
17812 ret = ix86_expand_compare (code, op0, op1);
17813 PUT_MODE (ret, QImode);
17814 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17817 /* Expand comparison setting or clearing carry flag. Return true when
17818 successful and set pop for the operation. */
17820 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17822 enum machine_mode mode =
17823 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17825 /* Do not handle double-mode compares that go through special path. */
17826 if (mode == (TARGET_64BIT ? TImode : DImode))
17829 if (SCALAR_FLOAT_MODE_P (mode))
17831 rtx compare_op, compare_seq;
17833 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17835 /* Shortcut: following common codes never translate
17836 into carry flag compares. */
17837 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17838 || code == ORDERED || code == UNORDERED)
17841 /* These comparisons require zero flag; swap operands so they won't. */
17842 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17843 && !TARGET_IEEE_FP)
17848 code = swap_condition (code);
17851 /* Try to expand the comparison and verify that we end up with
17852 carry flag based comparison. This fails to be true only when
17853 we decide to expand comparison using arithmetic that is not
17854 too common scenario. */
17856 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17857 compare_seq = get_insns ();
17860 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17861 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17862 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17864 code = GET_CODE (compare_op);
17866 if (code != LTU && code != GEU)
17869 emit_insn (compare_seq);
17874 if (!INTEGRAL_MODE_P (mode))
17883 /* Convert a==0 into (unsigned)a<1. */
17886 if (op1 != const0_rtx)
17889 code = (code == EQ ? LTU : GEU);
17892 /* Convert a>b into b<a or a>=b-1. */
17895 if (CONST_INT_P (op1))
17897 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17898 /* Bail out on overflow. We still can swap operands but that
17899 would force loading of the constant into register. */
17900 if (op1 == const0_rtx
17901 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17903 code = (code == GTU ? GEU : LTU);
17910 code = (code == GTU ? LTU : GEU);
17914 /* Convert a>=0 into (unsigned)a<0x80000000. */
17917 if (mode == DImode || op1 != const0_rtx)
17919 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17920 code = (code == LT ? GEU : LTU);
17924 if (mode == DImode || op1 != constm1_rtx)
17926 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17927 code = (code == LE ? GEU : LTU);
17933 /* Swapping operands may cause constant to appear as first operand. */
17934 if (!nonimmediate_operand (op0, VOIDmode))
17936 if (!can_create_pseudo_p ())
17938 op0 = force_reg (mode, op0);
17940 *pop = ix86_expand_compare (code, op0, op1);
17941 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17946 ix86_expand_int_movcc (rtx operands[])
17948 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17949 rtx compare_seq, compare_op;
17950 enum machine_mode mode = GET_MODE (operands[0]);
17951 bool sign_bit_compare_p = false;
17952 rtx op0 = XEXP (operands[1], 0);
17953 rtx op1 = XEXP (operands[1], 1);
17956 compare_op = ix86_expand_compare (code, op0, op1);
17957 compare_seq = get_insns ();
17960 compare_code = GET_CODE (compare_op);
17962 if ((op1 == const0_rtx && (code == GE || code == LT))
17963 || (op1 == constm1_rtx && (code == GT || code == LE)))
17964 sign_bit_compare_p = true;
17966 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17967 HImode insns, we'd be swallowed in word prefix ops. */
17969 if ((mode != HImode || TARGET_FAST_PREFIX)
17970 && (mode != (TARGET_64BIT ? TImode : DImode))
17971 && CONST_INT_P (operands[2])
17972 && CONST_INT_P (operands[3]))
17974 rtx out = operands[0];
17975 HOST_WIDE_INT ct = INTVAL (operands[2]);
17976 HOST_WIDE_INT cf = INTVAL (operands[3]);
17977 HOST_WIDE_INT diff;
17980 /* Sign bit compares are better done using shifts than we do by using
17982 if (sign_bit_compare_p
17983 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17985 /* Detect overlap between destination and compare sources. */
17988 if (!sign_bit_compare_p)
17991 bool fpcmp = false;
17993 compare_code = GET_CODE (compare_op);
17995 flags = XEXP (compare_op, 0);
17997 if (GET_MODE (flags) == CCFPmode
17998 || GET_MODE (flags) == CCFPUmode)
18002 = ix86_fp_compare_code_to_integer (compare_code);
18005 /* To simplify rest of code, restrict to the GEU case. */
18006 if (compare_code == LTU)
18008 HOST_WIDE_INT tmp = ct;
18011 compare_code = reverse_condition (compare_code);
18012 code = reverse_condition (code);
18017 PUT_CODE (compare_op,
18018 reverse_condition_maybe_unordered
18019 (GET_CODE (compare_op)));
18021 PUT_CODE (compare_op,
18022 reverse_condition (GET_CODE (compare_op)));
18026 if (reg_overlap_mentioned_p (out, op0)
18027 || reg_overlap_mentioned_p (out, op1))
18028 tmp = gen_reg_rtx (mode);
18030 if (mode == DImode)
18031 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
18033 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
18034 flags, compare_op));
18038 if (code == GT || code == GE)
18039 code = reverse_condition (code);
18042 HOST_WIDE_INT tmp = ct;
18047 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
18060 tmp = expand_simple_binop (mode, PLUS,
18062 copy_rtx (tmp), 1, OPTAB_DIRECT);
18073 tmp = expand_simple_binop (mode, IOR,
18075 copy_rtx (tmp), 1, OPTAB_DIRECT);
18077 else if (diff == -1 && ct)
18087 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18089 tmp = expand_simple_binop (mode, PLUS,
18090 copy_rtx (tmp), GEN_INT (cf),
18091 copy_rtx (tmp), 1, OPTAB_DIRECT);
18099 * andl cf - ct, dest
18109 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
18112 tmp = expand_simple_binop (mode, AND,
18114 gen_int_mode (cf - ct, mode),
18115 copy_rtx (tmp), 1, OPTAB_DIRECT);
18117 tmp = expand_simple_binop (mode, PLUS,
18118 copy_rtx (tmp), GEN_INT (ct),
18119 copy_rtx (tmp), 1, OPTAB_DIRECT);
18122 if (!rtx_equal_p (tmp, out))
18123 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
18130 enum machine_mode cmp_mode = GET_MODE (op0);
18133 tmp = ct, ct = cf, cf = tmp;
18136 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18138 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18140 /* We may be reversing unordered compare to normal compare, that
18141 is not valid in general (we may convert non-trapping condition
18142 to trapping one), however on i386 we currently emit all
18143 comparisons unordered. */
18144 compare_code = reverse_condition_maybe_unordered (compare_code);
18145 code = reverse_condition_maybe_unordered (code);
18149 compare_code = reverse_condition (compare_code);
18150 code = reverse_condition (code);
18154 compare_code = UNKNOWN;
18155 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
18156 && CONST_INT_P (op1))
18158 if (op1 == const0_rtx
18159 && (code == LT || code == GE))
18160 compare_code = code;
18161 else if (op1 == constm1_rtx)
18165 else if (code == GT)
18170 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18171 if (compare_code != UNKNOWN
18172 && GET_MODE (op0) == GET_MODE (out)
18173 && (cf == -1 || ct == -1))
18175 /* If lea code below could be used, only optimize
18176 if it results in a 2 insn sequence. */
18178 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
18179 || diff == 3 || diff == 5 || diff == 9)
18180 || (compare_code == LT && ct == -1)
18181 || (compare_code == GE && cf == -1))
18184 * notl op1 (if necessary)
18192 code = reverse_condition (code);
18195 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18197 out = expand_simple_binop (mode, IOR,
18199 out, 1, OPTAB_DIRECT);
18200 if (out != operands[0])
18201 emit_move_insn (operands[0], out);
18208 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
18209 || diff == 3 || diff == 5 || diff == 9)
18210 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18212 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18218 * lea cf(dest*(ct-cf)),dest
18222 * This also catches the degenerate setcc-only case.
18228 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18231 /* On x86_64 the lea instruction operates on Pmode, so we need
18232 to get arithmetics done in proper mode to match. */
18234 tmp = copy_rtx (out);
18238 out1 = copy_rtx (out);
18239 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18243 tmp = gen_rtx_PLUS (mode, tmp, out1);
18249 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18252 if (!rtx_equal_p (tmp, out))
18255 out = force_operand (tmp, copy_rtx (out));
18257 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18259 if (!rtx_equal_p (out, operands[0]))
18260 emit_move_insn (operands[0], copy_rtx (out));
18266 * General case: Jumpful:
18267 * xorl dest,dest cmpl op1, op2
18268 * cmpl op1, op2 movl ct, dest
18269 * setcc dest jcc 1f
18270 * decl dest movl cf, dest
18271 * andl (cf-ct),dest 1:
18274 * Size 20. Size 14.
18276 * This is reasonably steep, but branch mispredict costs are
18277 * high on modern cpus, so consider failing only if optimizing
18281 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18282 && BRANCH_COST (optimize_insn_for_speed_p (),
18287 enum machine_mode cmp_mode = GET_MODE (op0);
18292 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18294 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18296 /* We may be reversing unordered compare to normal compare,
18297 that is not valid in general (we may convert non-trapping
18298 condition to trapping one), however on i386 we currently
18299 emit all comparisons unordered. */
18300 code = reverse_condition_maybe_unordered (code);
18304 code = reverse_condition (code);
18305 if (compare_code != UNKNOWN)
18306 compare_code = reverse_condition (compare_code);
18310 if (compare_code != UNKNOWN)
18312 /* notl op1 (if needed)
18317 For x < 0 (resp. x <= -1) there will be no notl,
18318 so if possible swap the constants to get rid of the
18320 True/false will be -1/0 while code below (store flag
18321 followed by decrement) is 0/-1, so the constants need
18322 to be exchanged once more. */
18324 if (compare_code == GE || !cf)
18326 code = reverse_condition (code);
18331 HOST_WIDE_INT tmp = cf;
18336 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18340 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18342 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18344 copy_rtx (out), 1, OPTAB_DIRECT);
18347 out = expand_simple_binop (mode, AND, copy_rtx (out),
18348 gen_int_mode (cf - ct, mode),
18349 copy_rtx (out), 1, OPTAB_DIRECT);
18351 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18352 copy_rtx (out), 1, OPTAB_DIRECT);
18353 if (!rtx_equal_p (out, operands[0]))
18354 emit_move_insn (operands[0], copy_rtx (out));
18360 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18362 /* Try a few things more with specific constants and a variable. */
18365 rtx var, orig_out, out, tmp;
18367 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18370 /* If one of the two operands is an interesting constant, load a
18371 constant with the above and mask it in with a logical operation. */
18373 if (CONST_INT_P (operands[2]))
18376 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18377 operands[3] = constm1_rtx, op = and_optab;
18378 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18379 operands[3] = const0_rtx, op = ior_optab;
18383 else if (CONST_INT_P (operands[3]))
18386 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18387 operands[2] = constm1_rtx, op = and_optab;
18388 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18389 operands[2] = const0_rtx, op = ior_optab;
18396 orig_out = operands[0];
18397 tmp = gen_reg_rtx (mode);
18400 /* Recurse to get the constant loaded. */
18401 if (ix86_expand_int_movcc (operands) == 0)
18404 /* Mask in the interesting variable. */
18405 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18407 if (!rtx_equal_p (out, orig_out))
18408 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18414 * For comparison with above,
18424 if (! nonimmediate_operand (operands[2], mode))
18425 operands[2] = force_reg (mode, operands[2]);
18426 if (! nonimmediate_operand (operands[3], mode))
18427 operands[3] = force_reg (mode, operands[3]);
18429 if (! register_operand (operands[2], VOIDmode)
18431 || ! register_operand (operands[3], VOIDmode)))
18432 operands[2] = force_reg (mode, operands[2]);
18435 && ! register_operand (operands[3], VOIDmode))
18436 operands[3] = force_reg (mode, operands[3]);
18438 emit_insn (compare_seq);
18439 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18440 gen_rtx_IF_THEN_ELSE (mode,
18441 compare_op, operands[2],
18446 /* Swap, force into registers, or otherwise massage the two operands
18447 to an sse comparison with a mask result. Thus we differ a bit from
18448 ix86_prepare_fp_compare_args which expects to produce a flags result.
18450 The DEST operand exists to help determine whether to commute commutative
18451 operators. The POP0/POP1 operands are updated in place. The new
18452 comparison code is returned, or UNKNOWN if not implementable. */
18454 static enum rtx_code
18455 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18456 rtx *pop0, rtx *pop1)
18464 /* We have no LTGT as an operator. We could implement it with
18465 NE & ORDERED, but this requires an extra temporary. It's
18466 not clear that it's worth it. */
18473 /* These are supported directly. */
18480 /* For commutative operators, try to canonicalize the destination
18481 operand to be first in the comparison - this helps reload to
18482 avoid extra moves. */
18483 if (!dest || !rtx_equal_p (dest, *pop1))
18491 /* These are not supported directly. Swap the comparison operands
18492 to transform into something that is supported. */
18496 code = swap_condition (code);
18500 gcc_unreachable ();
18506 /* Detect conditional moves that exactly match min/max operational
18507 semantics. Note that this is IEEE safe, as long as we don't
18508 interchange the operands.
18510 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18511 and TRUE if the operation is successful and instructions are emitted. */
18514 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18515 rtx cmp_op1, rtx if_true, rtx if_false)
18517 enum machine_mode mode;
18523 else if (code == UNGE)
18526 if_true = if_false;
18532 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18534 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18539 mode = GET_MODE (dest);
18541 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18542 but MODE may be a vector mode and thus not appropriate. */
18543 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18545 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18548 if_true = force_reg (mode, if_true);
18549 v = gen_rtvec (2, if_true, if_false);
18550 tmp = gen_rtx_UNSPEC (mode, v, u);
18554 code = is_min ? SMIN : SMAX;
18555 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18558 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18562 /* Expand an sse vector comparison. Return the register with the result. */
18565 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18566 rtx op_true, rtx op_false)
18568 enum machine_mode mode = GET_MODE (dest);
18571 cmp_op0 = force_reg (mode, cmp_op0);
18572 if (!nonimmediate_operand (cmp_op1, mode))
18573 cmp_op1 = force_reg (mode, cmp_op1);
18576 || reg_overlap_mentioned_p (dest, op_true)
18577 || reg_overlap_mentioned_p (dest, op_false))
18578 dest = gen_reg_rtx (mode);
18580 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18581 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18586 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18587 operations. This is used for both scalar and vector conditional moves. */
18590 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18592 enum machine_mode mode = GET_MODE (dest);
18595 if (op_false == CONST0_RTX (mode))
18597 op_true = force_reg (mode, op_true);
18598 x = gen_rtx_AND (mode, cmp, op_true);
18599 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18601 else if (op_true == CONST0_RTX (mode))
18603 op_false = force_reg (mode, op_false);
18604 x = gen_rtx_NOT (mode, cmp);
18605 x = gen_rtx_AND (mode, x, op_false);
18606 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18608 else if (TARGET_XOP)
18610 rtx pcmov = gen_rtx_SET (mode, dest,
18611 gen_rtx_IF_THEN_ELSE (mode, cmp,
18618 op_true = force_reg (mode, op_true);
18619 op_false = force_reg (mode, op_false);
18621 t2 = gen_reg_rtx (mode);
18623 t3 = gen_reg_rtx (mode);
18627 x = gen_rtx_AND (mode, op_true, cmp);
18628 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18630 x = gen_rtx_NOT (mode, cmp);
18631 x = gen_rtx_AND (mode, x, op_false);
18632 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18634 x = gen_rtx_IOR (mode, t3, t2);
18635 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18639 /* Expand a floating-point conditional move. Return true if successful. */
18642 ix86_expand_fp_movcc (rtx operands[])
18644 enum machine_mode mode = GET_MODE (operands[0]);
18645 enum rtx_code code = GET_CODE (operands[1]);
18646 rtx tmp, compare_op;
18647 rtx op0 = XEXP (operands[1], 0);
18648 rtx op1 = XEXP (operands[1], 1);
18650 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18652 enum machine_mode cmode;
18654 /* Since we've no cmove for sse registers, don't force bad register
18655 allocation just to gain access to it. Deny movcc when the
18656 comparison mode doesn't match the move mode. */
18657 cmode = GET_MODE (op0);
18658 if (cmode == VOIDmode)
18659 cmode = GET_MODE (op1);
18663 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18664 if (code == UNKNOWN)
18667 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18668 operands[2], operands[3]))
18671 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18672 operands[2], operands[3]);
18673 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18677 /* The floating point conditional move instructions don't directly
18678 support conditions resulting from a signed integer comparison. */
18680 compare_op = ix86_expand_compare (code, op0, op1);
18681 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18683 tmp = gen_reg_rtx (QImode);
18684 ix86_expand_setcc (tmp, code, op0, op1);
18686 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18689 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18690 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18691 operands[2], operands[3])));
18696 /* Expand a floating-point vector conditional move; a vcond operation
18697 rather than a movcc operation. */
18700 ix86_expand_fp_vcond (rtx operands[])
18702 enum rtx_code code = GET_CODE (operands[3]);
18705 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18706 &operands[4], &operands[5]);
18707 if (code == UNKNOWN)
18710 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18711 operands[5], operands[1], operands[2]))
18714 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18715 operands[1], operands[2]);
18716 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18720 /* Expand a signed/unsigned integral vector conditional move. */
18723 ix86_expand_int_vcond (rtx operands[])
18725 enum machine_mode mode = GET_MODE (operands[0]);
18726 enum rtx_code code = GET_CODE (operands[3]);
18727 bool negate = false;
18730 cop0 = operands[4];
18731 cop1 = operands[5];
18733 /* XOP supports all of the comparisons on all vector int types. */
18736 /* Canonicalize the comparison to EQ, GT, GTU. */
18747 code = reverse_condition (code);
18753 code = reverse_condition (code);
18759 code = swap_condition (code);
18760 x = cop0, cop0 = cop1, cop1 = x;
18764 gcc_unreachable ();
18767 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18768 if (mode == V2DImode)
18773 /* SSE4.1 supports EQ. */
18774 if (!TARGET_SSE4_1)
18780 /* SSE4.2 supports GT/GTU. */
18781 if (!TARGET_SSE4_2)
18786 gcc_unreachable ();
18790 /* Unsigned parallel compare is not supported by the hardware.
18791 Play some tricks to turn this into a signed comparison
18795 cop0 = force_reg (mode, cop0);
18803 rtx (*gen_sub3) (rtx, rtx, rtx);
18805 /* Subtract (-(INT MAX) - 1) from both operands to make
18807 mask = ix86_build_signbit_mask (mode, true, false);
18808 gen_sub3 = (mode == V4SImode
18809 ? gen_subv4si3 : gen_subv2di3);
18810 t1 = gen_reg_rtx (mode);
18811 emit_insn (gen_sub3 (t1, cop0, mask));
18813 t2 = gen_reg_rtx (mode);
18814 emit_insn (gen_sub3 (t2, cop1, mask));
18824 /* Perform a parallel unsigned saturating subtraction. */
18825 x = gen_reg_rtx (mode);
18826 emit_insn (gen_rtx_SET (VOIDmode, x,
18827 gen_rtx_US_MINUS (mode, cop0, cop1)));
18830 cop1 = CONST0_RTX (mode);
18836 gcc_unreachable ();
18841 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18842 operands[1+negate], operands[2-negate]);
18844 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18845 operands[2-negate]);
18849 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18850 true if we should do zero extension, else sign extension. HIGH_P is
18851 true if we want the N/2 high elements, else the low elements. */
18854 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18856 enum machine_mode imode = GET_MODE (operands[1]);
18857 rtx (*unpack)(rtx, rtx, rtx);
18864 unpack = gen_vec_interleave_highv16qi;
18866 unpack = gen_vec_interleave_lowv16qi;
18870 unpack = gen_vec_interleave_highv8hi;
18872 unpack = gen_vec_interleave_lowv8hi;
18876 unpack = gen_vec_interleave_highv4si;
18878 unpack = gen_vec_interleave_lowv4si;
18881 gcc_unreachable ();
18884 dest = gen_lowpart (imode, operands[0]);
18887 se = force_reg (imode, CONST0_RTX (imode));
18889 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18890 operands[1], pc_rtx, pc_rtx);
18892 emit_insn (unpack (dest, operands[1], se));
18895 /* This function performs the same task as ix86_expand_sse_unpack,
18896 but with SSE4.1 instructions. */
18899 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18901 enum machine_mode imode = GET_MODE (operands[1]);
18902 rtx (*unpack)(rtx, rtx);
18909 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18911 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18915 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18917 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18921 unpack = gen_sse4_1_zero_extendv2siv2di2;
18923 unpack = gen_sse4_1_sign_extendv2siv2di2;
18926 gcc_unreachable ();
18929 dest = operands[0];
18932 /* Shift higher 8 bytes to lower 8 bytes. */
18933 src = gen_reg_rtx (imode);
18934 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18935 gen_lowpart (V1TImode, operands[1]),
18941 emit_insn (unpack (dest, src));
18944 /* Expand conditional increment or decrement using adb/sbb instructions.
18945 The default case using setcc followed by the conditional move can be
18946 done by generic code. */
18948 ix86_expand_int_addcc (rtx operands[])
18950 enum rtx_code code = GET_CODE (operands[1]);
18952 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18954 rtx val = const0_rtx;
18955 bool fpcmp = false;
18956 enum machine_mode mode;
18957 rtx op0 = XEXP (operands[1], 0);
18958 rtx op1 = XEXP (operands[1], 1);
18960 if (operands[3] != const1_rtx
18961 && operands[3] != constm1_rtx)
18963 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18965 code = GET_CODE (compare_op);
18967 flags = XEXP (compare_op, 0);
18969 if (GET_MODE (flags) == CCFPmode
18970 || GET_MODE (flags) == CCFPUmode)
18973 code = ix86_fp_compare_code_to_integer (code);
18980 PUT_CODE (compare_op,
18981 reverse_condition_maybe_unordered
18982 (GET_CODE (compare_op)));
18984 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18987 mode = GET_MODE (operands[0]);
18989 /* Construct either adc or sbb insn. */
18990 if ((code == LTU) == (operands[3] == constm1_rtx))
18995 insn = gen_subqi3_carry;
18998 insn = gen_subhi3_carry;
19001 insn = gen_subsi3_carry;
19004 insn = gen_subdi3_carry;
19007 gcc_unreachable ();
19015 insn = gen_addqi3_carry;
19018 insn = gen_addhi3_carry;
19021 insn = gen_addsi3_carry;
19024 insn = gen_adddi3_carry;
19027 gcc_unreachable ();
19030 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
19036 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
19037 but works for floating pointer parameters and nonoffsetable memories.
19038 For pushes, it returns just stack offsets; the values will be saved
19039 in the right order. Maximally three parts are generated. */
19042 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
19047 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
19049 size = (GET_MODE_SIZE (mode) + 4) / 8;
19051 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
19052 gcc_assert (size >= 2 && size <= 4);
19054 /* Optimize constant pool reference to immediates. This is used by fp
19055 moves, that force all constants to memory to allow combining. */
19056 if (MEM_P (operand) && MEM_READONLY_P (operand))
19058 rtx tmp = maybe_get_pool_constant (operand);
19063 if (MEM_P (operand) && !offsettable_memref_p (operand))
19065 /* The only non-offsetable memories we handle are pushes. */
19066 int ok = push_operand (operand, VOIDmode);
19070 operand = copy_rtx (operand);
19071 PUT_MODE (operand, Pmode);
19072 parts[0] = parts[1] = parts[2] = parts[3] = operand;
19076 if (GET_CODE (operand) == CONST_VECTOR)
19078 enum machine_mode imode = int_mode_for_mode (mode);
19079 /* Caution: if we looked through a constant pool memory above,
19080 the operand may actually have a different mode now. That's
19081 ok, since we want to pun this all the way back to an integer. */
19082 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
19083 gcc_assert (operand != NULL);
19089 if (mode == DImode)
19090 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19095 if (REG_P (operand))
19097 gcc_assert (reload_completed);
19098 for (i = 0; i < size; i++)
19099 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
19101 else if (offsettable_memref_p (operand))
19103 operand = adjust_address (operand, SImode, 0);
19104 parts[0] = operand;
19105 for (i = 1; i < size; i++)
19106 parts[i] = adjust_address (operand, SImode, 4 * i);
19108 else if (GET_CODE (operand) == CONST_DOUBLE)
19113 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19117 real_to_target (l, &r, mode);
19118 parts[3] = gen_int_mode (l[3], SImode);
19119 parts[2] = gen_int_mode (l[2], SImode);
19122 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
19123 parts[2] = gen_int_mode (l[2], SImode);
19126 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
19129 gcc_unreachable ();
19131 parts[1] = gen_int_mode (l[1], SImode);
19132 parts[0] = gen_int_mode (l[0], SImode);
19135 gcc_unreachable ();
19140 if (mode == TImode)
19141 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
19142 if (mode == XFmode || mode == TFmode)
19144 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
19145 if (REG_P (operand))
19147 gcc_assert (reload_completed);
19148 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
19149 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
19151 else if (offsettable_memref_p (operand))
19153 operand = adjust_address (operand, DImode, 0);
19154 parts[0] = operand;
19155 parts[1] = adjust_address (operand, upper_mode, 8);
19157 else if (GET_CODE (operand) == CONST_DOUBLE)
19162 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
19163 real_to_target (l, &r, mode);
19165 /* Do not use shift by 32 to avoid warning on 32bit systems. */
19166 if (HOST_BITS_PER_WIDE_INT >= 64)
19169 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
19170 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
19173 parts[0] = immed_double_const (l[0], l[1], DImode);
19175 if (upper_mode == SImode)
19176 parts[1] = gen_int_mode (l[2], SImode);
19177 else if (HOST_BITS_PER_WIDE_INT >= 64)
19180 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
19181 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
19184 parts[1] = immed_double_const (l[2], l[3], DImode);
19187 gcc_unreachable ();
19194 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
19195 Return false when normal moves are needed; true when all required
19196 insns have been emitted. Operands 2-4 contain the input values
19197 int the correct order; operands 5-7 contain the output values. */
19200 ix86_split_long_move (rtx operands[])
19205 int collisions = 0;
19206 enum machine_mode mode = GET_MODE (operands[0]);
19207 bool collisionparts[4];
19209 /* The DFmode expanders may ask us to move double.
19210 For 64bit target this is single move. By hiding the fact
19211 here we simplify i386.md splitters. */
19212 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19214 /* Optimize constant pool reference to immediates. This is used by
19215 fp moves, that force all constants to memory to allow combining. */
19217 if (MEM_P (operands[1])
19218 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19219 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19220 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19221 if (push_operand (operands[0], VOIDmode))
19223 operands[0] = copy_rtx (operands[0]);
19224 PUT_MODE (operands[0], Pmode);
19227 operands[0] = gen_lowpart (DImode, operands[0]);
19228 operands[1] = gen_lowpart (DImode, operands[1]);
19229 emit_move_insn (operands[0], operands[1]);
19233 /* The only non-offsettable memory we handle is push. */
19234 if (push_operand (operands[0], VOIDmode))
19237 gcc_assert (!MEM_P (operands[0])
19238 || offsettable_memref_p (operands[0]));
19240 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19241 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19243 /* When emitting push, take care for source operands on the stack. */
19244 if (push && MEM_P (operands[1])
19245 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19247 rtx src_base = XEXP (part[1][nparts - 1], 0);
19249 /* Compensate for the stack decrement by 4. */
19250 if (!TARGET_64BIT && nparts == 3
19251 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19252 src_base = plus_constant (src_base, 4);
19254 /* src_base refers to the stack pointer and is
19255 automatically decreased by emitted push. */
19256 for (i = 0; i < nparts; i++)
19257 part[1][i] = change_address (part[1][i],
19258 GET_MODE (part[1][i]), src_base);
19261 /* We need to do copy in the right order in case an address register
19262 of the source overlaps the destination. */
19263 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19267 for (i = 0; i < nparts; i++)
19270 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19271 if (collisionparts[i])
19275 /* Collision in the middle part can be handled by reordering. */
19276 if (collisions == 1 && nparts == 3 && collisionparts [1])
19278 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19279 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19281 else if (collisions == 1
19283 && (collisionparts [1] || collisionparts [2]))
19285 if (collisionparts [1])
19287 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19288 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19292 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19293 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19297 /* If there are more collisions, we can't handle it by reordering.
19298 Do an lea to the last part and use only one colliding move. */
19299 else if (collisions > 1)
19305 base = part[0][nparts - 1];
19307 /* Handle the case when the last part isn't valid for lea.
19308 Happens in 64-bit mode storing the 12-byte XFmode. */
19309 if (GET_MODE (base) != Pmode)
19310 base = gen_rtx_REG (Pmode, REGNO (base));
19312 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19313 part[1][0] = replace_equiv_address (part[1][0], base);
19314 for (i = 1; i < nparts; i++)
19316 tmp = plus_constant (base, UNITS_PER_WORD * i);
19317 part[1][i] = replace_equiv_address (part[1][i], tmp);
19328 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19329 emit_insn (gen_addsi3 (stack_pointer_rtx,
19330 stack_pointer_rtx, GEN_INT (-4)));
19331 emit_move_insn (part[0][2], part[1][2]);
19333 else if (nparts == 4)
19335 emit_move_insn (part[0][3], part[1][3]);
19336 emit_move_insn (part[0][2], part[1][2]);
19341 /* In 64bit mode we don't have 32bit push available. In case this is
19342 register, it is OK - we will just use larger counterpart. We also
19343 retype memory - these comes from attempt to avoid REX prefix on
19344 moving of second half of TFmode value. */
19345 if (GET_MODE (part[1][1]) == SImode)
19347 switch (GET_CODE (part[1][1]))
19350 part[1][1] = adjust_address (part[1][1], DImode, 0);
19354 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19358 gcc_unreachable ();
19361 if (GET_MODE (part[1][0]) == SImode)
19362 part[1][0] = part[1][1];
19365 emit_move_insn (part[0][1], part[1][1]);
19366 emit_move_insn (part[0][0], part[1][0]);
19370 /* Choose correct order to not overwrite the source before it is copied. */
19371 if ((REG_P (part[0][0])
19372 && REG_P (part[1][1])
19373 && (REGNO (part[0][0]) == REGNO (part[1][1])
19375 && REGNO (part[0][0]) == REGNO (part[1][2]))
19377 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19379 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19381 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19383 operands[2 + i] = part[0][j];
19384 operands[6 + i] = part[1][j];
19389 for (i = 0; i < nparts; i++)
19391 operands[2 + i] = part[0][i];
19392 operands[6 + i] = part[1][i];
19396 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19397 if (optimize_insn_for_size_p ())
19399 for (j = 0; j < nparts - 1; j++)
19400 if (CONST_INT_P (operands[6 + j])
19401 && operands[6 + j] != const0_rtx
19402 && REG_P (operands[2 + j]))
19403 for (i = j; i < nparts - 1; i++)
19404 if (CONST_INT_P (operands[7 + i])
19405 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19406 operands[7 + i] = operands[2 + j];
19409 for (i = 0; i < nparts; i++)
19410 emit_move_insn (operands[2 + i], operands[6 + i]);
19415 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19416 left shift by a constant, either using a single shift or
19417 a sequence of add instructions. */
19420 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19422 rtx (*insn)(rtx, rtx, rtx);
19425 || (count * ix86_cost->add <= ix86_cost->shift_const
19426 && !optimize_insn_for_size_p ()))
19428 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19429 while (count-- > 0)
19430 emit_insn (insn (operand, operand, operand));
19434 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19435 emit_insn (insn (operand, operand, GEN_INT (count)));
19440 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19442 rtx (*gen_ashl3)(rtx, rtx, rtx);
19443 rtx (*gen_shld)(rtx, rtx, rtx);
19444 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19446 rtx low[2], high[2];
19449 if (CONST_INT_P (operands[2]))
19451 split_double_mode (mode, operands, 2, low, high);
19452 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19454 if (count >= half_width)
19456 emit_move_insn (high[0], low[1]);
19457 emit_move_insn (low[0], const0_rtx);
19459 if (count > half_width)
19460 ix86_expand_ashl_const (high[0], count - half_width, mode);
19464 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19466 if (!rtx_equal_p (operands[0], operands[1]))
19467 emit_move_insn (operands[0], operands[1]);
19469 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19470 ix86_expand_ashl_const (low[0], count, mode);
19475 split_double_mode (mode, operands, 1, low, high);
19477 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19479 if (operands[1] == const1_rtx)
19481 /* Assuming we've chosen a QImode capable registers, then 1 << N
19482 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19483 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19485 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19487 ix86_expand_clear (low[0]);
19488 ix86_expand_clear (high[0]);
19489 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19491 d = gen_lowpart (QImode, low[0]);
19492 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19493 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19494 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19496 d = gen_lowpart (QImode, high[0]);
19497 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19498 s = gen_rtx_NE (QImode, flags, const0_rtx);
19499 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19502 /* Otherwise, we can get the same results by manually performing
19503 a bit extract operation on bit 5/6, and then performing the two
19504 shifts. The two methods of getting 0/1 into low/high are exactly
19505 the same size. Avoiding the shift in the bit extract case helps
19506 pentium4 a bit; no one else seems to care much either way. */
19509 enum machine_mode half_mode;
19510 rtx (*gen_lshr3)(rtx, rtx, rtx);
19511 rtx (*gen_and3)(rtx, rtx, rtx);
19512 rtx (*gen_xor3)(rtx, rtx, rtx);
19513 HOST_WIDE_INT bits;
19516 if (mode == DImode)
19518 half_mode = SImode;
19519 gen_lshr3 = gen_lshrsi3;
19520 gen_and3 = gen_andsi3;
19521 gen_xor3 = gen_xorsi3;
19526 half_mode = DImode;
19527 gen_lshr3 = gen_lshrdi3;
19528 gen_and3 = gen_anddi3;
19529 gen_xor3 = gen_xordi3;
19533 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19534 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19536 x = gen_lowpart (half_mode, operands[2]);
19537 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19539 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19540 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19541 emit_move_insn (low[0], high[0]);
19542 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19545 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19546 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19550 if (operands[1] == constm1_rtx)
19552 /* For -1 << N, we can avoid the shld instruction, because we
19553 know that we're shifting 0...31/63 ones into a -1. */
19554 emit_move_insn (low[0], constm1_rtx);
19555 if (optimize_insn_for_size_p ())
19556 emit_move_insn (high[0], low[0]);
19558 emit_move_insn (high[0], constm1_rtx);
19562 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19564 if (!rtx_equal_p (operands[0], operands[1]))
19565 emit_move_insn (operands[0], operands[1]);
19567 split_double_mode (mode, operands, 1, low, high);
19568 emit_insn (gen_shld (high[0], low[0], operands[2]));
19571 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19573 if (TARGET_CMOVE && scratch)
19575 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19576 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19578 ix86_expand_clear (scratch);
19579 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19583 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19584 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19586 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19591 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19593 rtx (*gen_ashr3)(rtx, rtx, rtx)
19594 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19595 rtx (*gen_shrd)(rtx, rtx, rtx);
19596 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19598 rtx low[2], high[2];
19601 if (CONST_INT_P (operands[2]))
19603 split_double_mode (mode, operands, 2, low, high);
19604 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19606 if (count == GET_MODE_BITSIZE (mode) - 1)
19608 emit_move_insn (high[0], high[1]);
19609 emit_insn (gen_ashr3 (high[0], high[0],
19610 GEN_INT (half_width - 1)));
19611 emit_move_insn (low[0], high[0]);
19614 else if (count >= half_width)
19616 emit_move_insn (low[0], high[1]);
19617 emit_move_insn (high[0], low[0]);
19618 emit_insn (gen_ashr3 (high[0], high[0],
19619 GEN_INT (half_width - 1)));
19621 if (count > half_width)
19622 emit_insn (gen_ashr3 (low[0], low[0],
19623 GEN_INT (count - half_width)));
19627 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19629 if (!rtx_equal_p (operands[0], operands[1]))
19630 emit_move_insn (operands[0], operands[1]);
19632 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19633 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19638 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19640 if (!rtx_equal_p (operands[0], operands[1]))
19641 emit_move_insn (operands[0], operands[1]);
19643 split_double_mode (mode, operands, 1, low, high);
19645 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19646 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19648 if (TARGET_CMOVE && scratch)
19650 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19651 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19653 emit_move_insn (scratch, high[0]);
19654 emit_insn (gen_ashr3 (scratch, scratch,
19655 GEN_INT (half_width - 1)));
19656 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19661 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19662 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19664 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19670 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19672 rtx (*gen_lshr3)(rtx, rtx, rtx)
19673 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19674 rtx (*gen_shrd)(rtx, rtx, rtx);
19675 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19677 rtx low[2], high[2];
19680 if (CONST_INT_P (operands[2]))
19682 split_double_mode (mode, operands, 2, low, high);
19683 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19685 if (count >= half_width)
19687 emit_move_insn (low[0], high[1]);
19688 ix86_expand_clear (high[0]);
19690 if (count > half_width)
19691 emit_insn (gen_lshr3 (low[0], low[0],
19692 GEN_INT (count - half_width)));
19696 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19698 if (!rtx_equal_p (operands[0], operands[1]))
19699 emit_move_insn (operands[0], operands[1]);
19701 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19702 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19707 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19709 if (!rtx_equal_p (operands[0], operands[1]))
19710 emit_move_insn (operands[0], operands[1]);
19712 split_double_mode (mode, operands, 1, low, high);
19714 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19715 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19717 if (TARGET_CMOVE && scratch)
19719 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19720 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19722 ix86_expand_clear (scratch);
19723 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19728 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19729 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19731 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19736 /* Predict just emitted jump instruction to be taken with probability PROB. */
19738 predict_jump (int prob)
19740 rtx insn = get_last_insn ();
19741 gcc_assert (JUMP_P (insn));
19742 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19745 /* Helper function for the string operations below. Dest VARIABLE whether
19746 it is aligned to VALUE bytes. If true, jump to the label. */
19748 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19750 rtx label = gen_label_rtx ();
19751 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19752 if (GET_MODE (variable) == DImode)
19753 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19755 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19756 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19759 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19761 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19765 /* Adjust COUNTER by the VALUE. */
19767 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19769 rtx (*gen_add)(rtx, rtx, rtx)
19770 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19772 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19775 /* Zero extend possibly SImode EXP to Pmode register. */
19777 ix86_zero_extend_to_Pmode (rtx exp)
19780 if (GET_MODE (exp) == VOIDmode)
19781 return force_reg (Pmode, exp);
19782 if (GET_MODE (exp) == Pmode)
19783 return copy_to_mode_reg (Pmode, exp);
19784 r = gen_reg_rtx (Pmode);
19785 emit_insn (gen_zero_extendsidi2 (r, exp));
19789 /* Divide COUNTREG by SCALE. */
19791 scale_counter (rtx countreg, int scale)
19797 if (CONST_INT_P (countreg))
19798 return GEN_INT (INTVAL (countreg) / scale);
19799 gcc_assert (REG_P (countreg));
19801 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19802 GEN_INT (exact_log2 (scale)),
19803 NULL, 1, OPTAB_DIRECT);
19807 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19808 DImode for constant loop counts. */
19810 static enum machine_mode
19811 counter_mode (rtx count_exp)
19813 if (GET_MODE (count_exp) != VOIDmode)
19814 return GET_MODE (count_exp);
19815 if (!CONST_INT_P (count_exp))
19817 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19822 /* When SRCPTR is non-NULL, output simple loop to move memory
19823 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19824 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19825 equivalent loop to set memory by VALUE (supposed to be in MODE).
19827 The size is rounded down to whole number of chunk size moved at once.
19828 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19832 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19833 rtx destptr, rtx srcptr, rtx value,
19834 rtx count, enum machine_mode mode, int unroll,
19837 rtx out_label, top_label, iter, tmp;
19838 enum machine_mode iter_mode = counter_mode (count);
19839 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19840 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19846 top_label = gen_label_rtx ();
19847 out_label = gen_label_rtx ();
19848 iter = gen_reg_rtx (iter_mode);
19850 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19851 NULL, 1, OPTAB_DIRECT);
19852 /* Those two should combine. */
19853 if (piece_size == const1_rtx)
19855 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19857 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19859 emit_move_insn (iter, const0_rtx);
19861 emit_label (top_label);
19863 tmp = convert_modes (Pmode, iter_mode, iter, true);
19864 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19865 destmem = change_address (destmem, mode, x_addr);
19869 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19870 srcmem = change_address (srcmem, mode, y_addr);
19872 /* When unrolling for chips that reorder memory reads and writes,
19873 we can save registers by using single temporary.
19874 Also using 4 temporaries is overkill in 32bit mode. */
19875 if (!TARGET_64BIT && 0)
19877 for (i = 0; i < unroll; i++)
19882 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19884 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19886 emit_move_insn (destmem, srcmem);
19892 gcc_assert (unroll <= 4);
19893 for (i = 0; i < unroll; i++)
19895 tmpreg[i] = gen_reg_rtx (mode);
19899 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19901 emit_move_insn (tmpreg[i], srcmem);
19903 for (i = 0; i < unroll; i++)
19908 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19910 emit_move_insn (destmem, tmpreg[i]);
19915 for (i = 0; i < unroll; i++)
19919 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19920 emit_move_insn (destmem, value);
19923 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19924 true, OPTAB_LIB_WIDEN);
19926 emit_move_insn (iter, tmp);
19928 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19930 if (expected_size != -1)
19932 expected_size /= GET_MODE_SIZE (mode) * unroll;
19933 if (expected_size == 0)
19935 else if (expected_size > REG_BR_PROB_BASE)
19936 predict_jump (REG_BR_PROB_BASE - 1);
19938 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19941 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19942 iter = ix86_zero_extend_to_Pmode (iter);
19943 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19944 true, OPTAB_LIB_WIDEN);
19945 if (tmp != destptr)
19946 emit_move_insn (destptr, tmp);
19949 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19950 true, OPTAB_LIB_WIDEN);
19952 emit_move_insn (srcptr, tmp);
19954 emit_label (out_label);
19957 /* Output "rep; mov" instruction.
19958 Arguments have same meaning as for previous function */
19960 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19961 rtx destptr, rtx srcptr,
19963 enum machine_mode mode)
19969 /* If the size is known, it is shorter to use rep movs. */
19970 if (mode == QImode && CONST_INT_P (count)
19971 && !(INTVAL (count) & 3))
19974 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19975 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19976 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19977 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19978 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19979 if (mode != QImode)
19981 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19982 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19983 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19984 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19985 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19986 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19990 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19991 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19993 if (CONST_INT_P (count))
19995 count = GEN_INT (INTVAL (count)
19996 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19997 destmem = shallow_copy_rtx (destmem);
19998 srcmem = shallow_copy_rtx (srcmem);
19999 set_mem_size (destmem, count);
20000 set_mem_size (srcmem, count);
20004 if (MEM_SIZE (destmem))
20005 set_mem_size (destmem, NULL_RTX);
20006 if (MEM_SIZE (srcmem))
20007 set_mem_size (srcmem, NULL_RTX);
20009 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
20013 /* Output "rep; stos" instruction.
20014 Arguments have same meaning as for previous function */
20016 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
20017 rtx count, enum machine_mode mode,
20023 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
20024 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
20025 value = force_reg (mode, gen_lowpart (mode, value));
20026 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
20027 if (mode != QImode)
20029 destexp = gen_rtx_ASHIFT (Pmode, countreg,
20030 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
20031 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
20034 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
20035 if (orig_value == const0_rtx && CONST_INT_P (count))
20037 count = GEN_INT (INTVAL (count)
20038 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
20039 destmem = shallow_copy_rtx (destmem);
20040 set_mem_size (destmem, count);
20042 else if (MEM_SIZE (destmem))
20043 set_mem_size (destmem, NULL_RTX);
20044 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
20048 emit_strmov (rtx destmem, rtx srcmem,
20049 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
20051 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
20052 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
20053 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20056 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
20058 expand_movmem_epilogue (rtx destmem, rtx srcmem,
20059 rtx destptr, rtx srcptr, rtx count, int max_size)
20062 if (CONST_INT_P (count))
20064 HOST_WIDE_INT countval = INTVAL (count);
20067 if ((countval & 0x10) && max_size > 16)
20071 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20072 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
20075 gcc_unreachable ();
20078 if ((countval & 0x08) && max_size > 8)
20081 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
20084 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20085 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
20089 if ((countval & 0x04) && max_size > 4)
20091 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
20094 if ((countval & 0x02) && max_size > 2)
20096 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
20099 if ((countval & 0x01) && max_size > 1)
20101 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
20108 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
20109 count, 1, OPTAB_DIRECT);
20110 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
20111 count, QImode, 1, 4);
20115 /* When there are stringops, we can cheaply increase dest and src pointers.
20116 Otherwise we save code size by maintaining offset (zero is readily
20117 available from preceding rep operation) and using x86 addressing modes.
20119 if (TARGET_SINGLE_STRINGOP)
20123 rtx label = ix86_expand_aligntest (count, 4, true);
20124 src = change_address (srcmem, SImode, srcptr);
20125 dest = change_address (destmem, SImode, destptr);
20126 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20127 emit_label (label);
20128 LABEL_NUSES (label) = 1;
20132 rtx label = ix86_expand_aligntest (count, 2, true);
20133 src = change_address (srcmem, HImode, srcptr);
20134 dest = change_address (destmem, HImode, destptr);
20135 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20136 emit_label (label);
20137 LABEL_NUSES (label) = 1;
20141 rtx label = ix86_expand_aligntest (count, 1, true);
20142 src = change_address (srcmem, QImode, srcptr);
20143 dest = change_address (destmem, QImode, destptr);
20144 emit_insn (gen_strmov (destptr, dest, srcptr, src));
20145 emit_label (label);
20146 LABEL_NUSES (label) = 1;
20151 rtx offset = force_reg (Pmode, const0_rtx);
20156 rtx label = ix86_expand_aligntest (count, 4, true);
20157 src = change_address (srcmem, SImode, srcptr);
20158 dest = change_address (destmem, SImode, destptr);
20159 emit_move_insn (dest, src);
20160 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
20161 true, OPTAB_LIB_WIDEN);
20163 emit_move_insn (offset, tmp);
20164 emit_label (label);
20165 LABEL_NUSES (label) = 1;
20169 rtx label = ix86_expand_aligntest (count, 2, true);
20170 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20171 src = change_address (srcmem, HImode, tmp);
20172 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20173 dest = change_address (destmem, HImode, tmp);
20174 emit_move_insn (dest, src);
20175 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
20176 true, OPTAB_LIB_WIDEN);
20178 emit_move_insn (offset, tmp);
20179 emit_label (label);
20180 LABEL_NUSES (label) = 1;
20184 rtx label = ix86_expand_aligntest (count, 1, true);
20185 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
20186 src = change_address (srcmem, QImode, tmp);
20187 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
20188 dest = change_address (destmem, QImode, tmp);
20189 emit_move_insn (dest, src);
20190 emit_label (label);
20191 LABEL_NUSES (label) = 1;
20196 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20198 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
20199 rtx count, int max_size)
20202 expand_simple_binop (counter_mode (count), AND, count,
20203 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
20204 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
20205 gen_lowpart (QImode, value), count, QImode,
20209 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
20211 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20215 if (CONST_INT_P (count))
20217 HOST_WIDE_INT countval = INTVAL (count);
20220 if ((countval & 0x10) && max_size > 16)
20224 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20225 emit_insn (gen_strset (destptr, dest, value));
20226 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20227 emit_insn (gen_strset (destptr, dest, value));
20230 gcc_unreachable ();
20233 if ((countval & 0x08) && max_size > 8)
20237 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20238 emit_insn (gen_strset (destptr, dest, value));
20242 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20243 emit_insn (gen_strset (destptr, dest, value));
20244 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20245 emit_insn (gen_strset (destptr, dest, value));
20249 if ((countval & 0x04) && max_size > 4)
20251 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20252 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20255 if ((countval & 0x02) && max_size > 2)
20257 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20258 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20261 if ((countval & 0x01) && max_size > 1)
20263 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20264 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20271 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20276 rtx label = ix86_expand_aligntest (count, 16, true);
20279 dest = change_address (destmem, DImode, destptr);
20280 emit_insn (gen_strset (destptr, dest, value));
20281 emit_insn (gen_strset (destptr, dest, value));
20285 dest = change_address (destmem, SImode, destptr);
20286 emit_insn (gen_strset (destptr, dest, value));
20287 emit_insn (gen_strset (destptr, dest, value));
20288 emit_insn (gen_strset (destptr, dest, value));
20289 emit_insn (gen_strset (destptr, dest, value));
20291 emit_label (label);
20292 LABEL_NUSES (label) = 1;
20296 rtx label = ix86_expand_aligntest (count, 8, true);
20299 dest = change_address (destmem, DImode, destptr);
20300 emit_insn (gen_strset (destptr, dest, value));
20304 dest = change_address (destmem, SImode, destptr);
20305 emit_insn (gen_strset (destptr, dest, value));
20306 emit_insn (gen_strset (destptr, dest, value));
20308 emit_label (label);
20309 LABEL_NUSES (label) = 1;
20313 rtx label = ix86_expand_aligntest (count, 4, true);
20314 dest = change_address (destmem, SImode, destptr);
20315 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20316 emit_label (label);
20317 LABEL_NUSES (label) = 1;
20321 rtx label = ix86_expand_aligntest (count, 2, true);
20322 dest = change_address (destmem, HImode, destptr);
20323 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20324 emit_label (label);
20325 LABEL_NUSES (label) = 1;
20329 rtx label = ix86_expand_aligntest (count, 1, true);
20330 dest = change_address (destmem, QImode, destptr);
20331 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20332 emit_label (label);
20333 LABEL_NUSES (label) = 1;
20337 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20338 DESIRED_ALIGNMENT. */
20340 expand_movmem_prologue (rtx destmem, rtx srcmem,
20341 rtx destptr, rtx srcptr, rtx count,
20342 int align, int desired_alignment)
20344 if (align <= 1 && desired_alignment > 1)
20346 rtx label = ix86_expand_aligntest (destptr, 1, false);
20347 srcmem = change_address (srcmem, QImode, srcptr);
20348 destmem = change_address (destmem, QImode, destptr);
20349 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20350 ix86_adjust_counter (count, 1);
20351 emit_label (label);
20352 LABEL_NUSES (label) = 1;
20354 if (align <= 2 && desired_alignment > 2)
20356 rtx label = ix86_expand_aligntest (destptr, 2, false);
20357 srcmem = change_address (srcmem, HImode, srcptr);
20358 destmem = change_address (destmem, HImode, destptr);
20359 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20360 ix86_adjust_counter (count, 2);
20361 emit_label (label);
20362 LABEL_NUSES (label) = 1;
20364 if (align <= 4 && desired_alignment > 4)
20366 rtx label = ix86_expand_aligntest (destptr, 4, false);
20367 srcmem = change_address (srcmem, SImode, srcptr);
20368 destmem = change_address (destmem, SImode, destptr);
20369 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20370 ix86_adjust_counter (count, 4);
20371 emit_label (label);
20372 LABEL_NUSES (label) = 1;
20374 gcc_assert (desired_alignment <= 8);
20377 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20378 ALIGN_BYTES is how many bytes need to be copied. */
20380 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20381 int desired_align, int align_bytes)
20384 rtx src_size, dst_size;
20386 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20387 if (src_align_bytes >= 0)
20388 src_align_bytes = desired_align - src_align_bytes;
20389 src_size = MEM_SIZE (src);
20390 dst_size = MEM_SIZE (dst);
20391 if (align_bytes & 1)
20393 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20394 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20396 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20398 if (align_bytes & 2)
20400 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20401 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20402 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20403 set_mem_align (dst, 2 * BITS_PER_UNIT);
20404 if (src_align_bytes >= 0
20405 && (src_align_bytes & 1) == (align_bytes & 1)
20406 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20407 set_mem_align (src, 2 * BITS_PER_UNIT);
20409 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20411 if (align_bytes & 4)
20413 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20414 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20415 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20416 set_mem_align (dst, 4 * BITS_PER_UNIT);
20417 if (src_align_bytes >= 0)
20419 unsigned int src_align = 0;
20420 if ((src_align_bytes & 3) == (align_bytes & 3))
20422 else if ((src_align_bytes & 1) == (align_bytes & 1))
20424 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20425 set_mem_align (src, src_align * BITS_PER_UNIT);
20428 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20430 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20431 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20432 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20433 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20434 if (src_align_bytes >= 0)
20436 unsigned int src_align = 0;
20437 if ((src_align_bytes & 7) == (align_bytes & 7))
20439 else if ((src_align_bytes & 3) == (align_bytes & 3))
20441 else if ((src_align_bytes & 1) == (align_bytes & 1))
20443 if (src_align > (unsigned int) desired_align)
20444 src_align = desired_align;
20445 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20446 set_mem_align (src, src_align * BITS_PER_UNIT);
20449 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20451 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20456 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20457 DESIRED_ALIGNMENT. */
20459 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20460 int align, int desired_alignment)
20462 if (align <= 1 && desired_alignment > 1)
20464 rtx label = ix86_expand_aligntest (destptr, 1, false);
20465 destmem = change_address (destmem, QImode, destptr);
20466 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20467 ix86_adjust_counter (count, 1);
20468 emit_label (label);
20469 LABEL_NUSES (label) = 1;
20471 if (align <= 2 && desired_alignment > 2)
20473 rtx label = ix86_expand_aligntest (destptr, 2, false);
20474 destmem = change_address (destmem, HImode, destptr);
20475 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20476 ix86_adjust_counter (count, 2);
20477 emit_label (label);
20478 LABEL_NUSES (label) = 1;
20480 if (align <= 4 && desired_alignment > 4)
20482 rtx label = ix86_expand_aligntest (destptr, 4, false);
20483 destmem = change_address (destmem, SImode, destptr);
20484 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20485 ix86_adjust_counter (count, 4);
20486 emit_label (label);
20487 LABEL_NUSES (label) = 1;
20489 gcc_assert (desired_alignment <= 8);
20492 /* Set enough from DST to align DST known to by aligned by ALIGN to
20493 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20495 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20496 int desired_align, int align_bytes)
20499 rtx dst_size = MEM_SIZE (dst);
20500 if (align_bytes & 1)
20502 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20504 emit_insn (gen_strset (destreg, dst,
20505 gen_lowpart (QImode, value)));
20507 if (align_bytes & 2)
20509 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20510 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20511 set_mem_align (dst, 2 * BITS_PER_UNIT);
20513 emit_insn (gen_strset (destreg, dst,
20514 gen_lowpart (HImode, value)));
20516 if (align_bytes & 4)
20518 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20519 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20520 set_mem_align (dst, 4 * BITS_PER_UNIT);
20522 emit_insn (gen_strset (destreg, dst,
20523 gen_lowpart (SImode, value)));
20525 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20526 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20527 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20529 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20533 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20534 static enum stringop_alg
20535 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20536 int *dynamic_check)
20538 const struct stringop_algs * algs;
20539 bool optimize_for_speed;
20540 /* Algorithms using the rep prefix want at least edi and ecx;
20541 additionally, memset wants eax and memcpy wants esi. Don't
20542 consider such algorithms if the user has appropriated those
20543 registers for their own purposes. */
20544 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20546 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20548 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20549 || (alg != rep_prefix_1_byte \
20550 && alg != rep_prefix_4_byte \
20551 && alg != rep_prefix_8_byte))
20552 const struct processor_costs *cost;
20554 /* Even if the string operation call is cold, we still might spend a lot
20555 of time processing large blocks. */
20556 if (optimize_function_for_size_p (cfun)
20557 || (optimize_insn_for_size_p ()
20558 && expected_size != -1 && expected_size < 256))
20559 optimize_for_speed = false;
20561 optimize_for_speed = true;
20563 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20565 *dynamic_check = -1;
20567 algs = &cost->memset[TARGET_64BIT != 0];
20569 algs = &cost->memcpy[TARGET_64BIT != 0];
20570 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20571 return stringop_alg;
20572 /* rep; movq or rep; movl is the smallest variant. */
20573 else if (!optimize_for_speed)
20575 if (!count || (count & 3))
20576 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20578 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20580 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20582 else if (expected_size != -1 && expected_size < 4)
20583 return loop_1_byte;
20584 else if (expected_size != -1)
20587 enum stringop_alg alg = libcall;
20588 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20590 /* We get here if the algorithms that were not libcall-based
20591 were rep-prefix based and we are unable to use rep prefixes
20592 based on global register usage. Break out of the loop and
20593 use the heuristic below. */
20594 if (algs->size[i].max == 0)
20596 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20598 enum stringop_alg candidate = algs->size[i].alg;
20600 if (candidate != libcall && ALG_USABLE_P (candidate))
20602 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20603 last non-libcall inline algorithm. */
20604 if (TARGET_INLINE_ALL_STRINGOPS)
20606 /* When the current size is best to be copied by a libcall,
20607 but we are still forced to inline, run the heuristic below
20608 that will pick code for medium sized blocks. */
20609 if (alg != libcall)
20613 else if (ALG_USABLE_P (candidate))
20617 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20619 /* When asked to inline the call anyway, try to pick meaningful choice.
20620 We look for maximal size of block that is faster to copy by hand and
20621 take blocks of at most of that size guessing that average size will
20622 be roughly half of the block.
20624 If this turns out to be bad, we might simply specify the preferred
20625 choice in ix86_costs. */
20626 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20627 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20630 enum stringop_alg alg;
20632 bool any_alg_usable_p = true;
20634 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20636 enum stringop_alg candidate = algs->size[i].alg;
20637 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20639 if (candidate != libcall && candidate
20640 && ALG_USABLE_P (candidate))
20641 max = algs->size[i].max;
20643 /* If there aren't any usable algorithms, then recursing on
20644 smaller sizes isn't going to find anything. Just return the
20645 simple byte-at-a-time copy loop. */
20646 if (!any_alg_usable_p)
20648 /* Pick something reasonable. */
20649 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20650 *dynamic_check = 128;
20651 return loop_1_byte;
20655 alg = decide_alg (count, max / 2, memset, dynamic_check);
20656 gcc_assert (*dynamic_check == -1);
20657 gcc_assert (alg != libcall);
20658 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20659 *dynamic_check = max;
20662 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20663 #undef ALG_USABLE_P
20666 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20667 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20669 decide_alignment (int align,
20670 enum stringop_alg alg,
20673 int desired_align = 0;
20677 gcc_unreachable ();
20679 case unrolled_loop:
20680 desired_align = GET_MODE_SIZE (Pmode);
20682 case rep_prefix_8_byte:
20685 case rep_prefix_4_byte:
20686 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20687 copying whole cacheline at once. */
20688 if (TARGET_PENTIUMPRO)
20693 case rep_prefix_1_byte:
20694 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20695 copying whole cacheline at once. */
20696 if (TARGET_PENTIUMPRO)
20710 if (desired_align < align)
20711 desired_align = align;
20712 if (expected_size != -1 && expected_size < 4)
20713 desired_align = align;
20714 return desired_align;
20717 /* Return the smallest power of 2 greater than VAL. */
20719 smallest_pow2_greater_than (int val)
20727 /* Expand string move (memcpy) operation. Use i386 string operations when
20728 profitable. expand_setmem contains similar code. The code depends upon
20729 architecture, block size and alignment, but always has the same
20732 1) Prologue guard: Conditional that jumps up to epilogues for small
20733 blocks that can be handled by epilogue alone. This is faster but
20734 also needed for correctness, since prologue assume the block is larger
20735 than the desired alignment.
20737 Optional dynamic check for size and libcall for large
20738 blocks is emitted here too, with -minline-stringops-dynamically.
20740 2) Prologue: copy first few bytes in order to get destination aligned
20741 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20742 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20743 We emit either a jump tree on power of two sized blocks, or a byte loop.
20745 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20746 with specified algorithm.
20748 4) Epilogue: code copying tail of the block that is too small to be
20749 handled by main body (or up to size guarded by prologue guard). */
20752 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20753 rtx expected_align_exp, rtx expected_size_exp)
20759 rtx jump_around_label = NULL;
20760 HOST_WIDE_INT align = 1;
20761 unsigned HOST_WIDE_INT count = 0;
20762 HOST_WIDE_INT expected_size = -1;
20763 int size_needed = 0, epilogue_size_needed;
20764 int desired_align = 0, align_bytes = 0;
20765 enum stringop_alg alg;
20767 bool need_zero_guard = false;
20769 if (CONST_INT_P (align_exp))
20770 align = INTVAL (align_exp);
20771 /* i386 can do misaligned access on reasonably increased cost. */
20772 if (CONST_INT_P (expected_align_exp)
20773 && INTVAL (expected_align_exp) > align)
20774 align = INTVAL (expected_align_exp);
20775 /* ALIGN is the minimum of destination and source alignment, but we care here
20776 just about destination alignment. */
20777 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20778 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20780 if (CONST_INT_P (count_exp))
20781 count = expected_size = INTVAL (count_exp);
20782 if (CONST_INT_P (expected_size_exp) && count == 0)
20783 expected_size = INTVAL (expected_size_exp);
20785 /* Make sure we don't need to care about overflow later on. */
20786 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20789 /* Step 0: Decide on preferred algorithm, desired alignment and
20790 size of chunks to be copied by main loop. */
20792 alg = decide_alg (count, expected_size, false, &dynamic_check);
20793 desired_align = decide_alignment (align, alg, expected_size);
20795 if (!TARGET_ALIGN_STRINGOPS)
20796 align = desired_align;
20798 if (alg == libcall)
20800 gcc_assert (alg != no_stringop);
20802 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20803 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20804 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20809 gcc_unreachable ();
20811 need_zero_guard = true;
20812 size_needed = GET_MODE_SIZE (Pmode);
20814 case unrolled_loop:
20815 need_zero_guard = true;
20816 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20818 case rep_prefix_8_byte:
20821 case rep_prefix_4_byte:
20824 case rep_prefix_1_byte:
20828 need_zero_guard = true;
20833 epilogue_size_needed = size_needed;
20835 /* Step 1: Prologue guard. */
20837 /* Alignment code needs count to be in register. */
20838 if (CONST_INT_P (count_exp) && desired_align > align)
20840 if (INTVAL (count_exp) > desired_align
20841 && INTVAL (count_exp) > size_needed)
20844 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20845 if (align_bytes <= 0)
20848 align_bytes = desired_align - align_bytes;
20850 if (align_bytes == 0)
20851 count_exp = force_reg (counter_mode (count_exp), count_exp);
20853 gcc_assert (desired_align >= 1 && align >= 1);
20855 /* Ensure that alignment prologue won't copy past end of block. */
20856 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20858 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20859 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20860 Make sure it is power of 2. */
20861 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20865 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20867 /* If main algorithm works on QImode, no epilogue is needed.
20868 For small sizes just don't align anything. */
20869 if (size_needed == 1)
20870 desired_align = align;
20877 label = gen_label_rtx ();
20878 emit_cmp_and_jump_insns (count_exp,
20879 GEN_INT (epilogue_size_needed),
20880 LTU, 0, counter_mode (count_exp), 1, label);
20881 if (expected_size == -1 || expected_size < epilogue_size_needed)
20882 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20884 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20888 /* Emit code to decide on runtime whether library call or inline should be
20890 if (dynamic_check != -1)
20892 if (CONST_INT_P (count_exp))
20894 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20896 emit_block_move_via_libcall (dst, src, count_exp, false);
20897 count_exp = const0_rtx;
20903 rtx hot_label = gen_label_rtx ();
20904 jump_around_label = gen_label_rtx ();
20905 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20906 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20907 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20908 emit_block_move_via_libcall (dst, src, count_exp, false);
20909 emit_jump (jump_around_label);
20910 emit_label (hot_label);
20914 /* Step 2: Alignment prologue. */
20916 if (desired_align > align)
20918 if (align_bytes == 0)
20920 /* Except for the first move in epilogue, we no longer know
20921 constant offset in aliasing info. It don't seems to worth
20922 the pain to maintain it for the first move, so throw away
20924 src = change_address (src, BLKmode, srcreg);
20925 dst = change_address (dst, BLKmode, destreg);
20926 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20931 /* If we know how many bytes need to be stored before dst is
20932 sufficiently aligned, maintain aliasing info accurately. */
20933 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20934 desired_align, align_bytes);
20935 count_exp = plus_constant (count_exp, -align_bytes);
20936 count -= align_bytes;
20938 if (need_zero_guard
20939 && (count < (unsigned HOST_WIDE_INT) size_needed
20940 || (align_bytes == 0
20941 && count < ((unsigned HOST_WIDE_INT) size_needed
20942 + desired_align - align))))
20944 /* It is possible that we copied enough so the main loop will not
20946 gcc_assert (size_needed > 1);
20947 if (label == NULL_RTX)
20948 label = gen_label_rtx ();
20949 emit_cmp_and_jump_insns (count_exp,
20950 GEN_INT (size_needed),
20951 LTU, 0, counter_mode (count_exp), 1, label);
20952 if (expected_size == -1
20953 || expected_size < (desired_align - align) / 2 + size_needed)
20954 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20956 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20959 if (label && size_needed == 1)
20961 emit_label (label);
20962 LABEL_NUSES (label) = 1;
20964 epilogue_size_needed = 1;
20966 else if (label == NULL_RTX)
20967 epilogue_size_needed = size_needed;
20969 /* Step 3: Main loop. */
20975 gcc_unreachable ();
20977 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20978 count_exp, QImode, 1, expected_size);
20981 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20982 count_exp, Pmode, 1, expected_size);
20984 case unrolled_loop:
20985 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20986 registers for 4 temporaries anyway. */
20987 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20988 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20991 case rep_prefix_8_byte:
20992 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20995 case rep_prefix_4_byte:
20996 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20999 case rep_prefix_1_byte:
21000 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
21004 /* Adjust properly the offset of src and dest memory for aliasing. */
21005 if (CONST_INT_P (count_exp))
21007 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
21008 (count / size_needed) * size_needed);
21009 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21010 (count / size_needed) * size_needed);
21014 src = change_address (src, BLKmode, srcreg);
21015 dst = change_address (dst, BLKmode, destreg);
21018 /* Step 4: Epilogue to copy the remaining bytes. */
21022 /* When the main loop is done, COUNT_EXP might hold original count,
21023 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21024 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21025 bytes. Compensate if needed. */
21027 if (size_needed < epilogue_size_needed)
21030 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21031 GEN_INT (size_needed - 1), count_exp, 1,
21033 if (tmp != count_exp)
21034 emit_move_insn (count_exp, tmp);
21036 emit_label (label);
21037 LABEL_NUSES (label) = 1;
21040 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21041 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
21042 epilogue_size_needed);
21043 if (jump_around_label)
21044 emit_label (jump_around_label);
21048 /* Helper function for memcpy. For QImode value 0xXY produce
21049 0xXYXYXYXY of wide specified by MODE. This is essentially
21050 a * 0x10101010, but we can do slightly better than
21051 synth_mult by unwinding the sequence by hand on CPUs with
21054 promote_duplicated_reg (enum machine_mode mode, rtx val)
21056 enum machine_mode valmode = GET_MODE (val);
21058 int nops = mode == DImode ? 3 : 2;
21060 gcc_assert (mode == SImode || mode == DImode);
21061 if (val == const0_rtx)
21062 return copy_to_mode_reg (mode, const0_rtx);
21063 if (CONST_INT_P (val))
21065 HOST_WIDE_INT v = INTVAL (val) & 255;
21069 if (mode == DImode)
21070 v |= (v << 16) << 16;
21071 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
21074 if (valmode == VOIDmode)
21076 if (valmode != QImode)
21077 val = gen_lowpart (QImode, val);
21078 if (mode == QImode)
21080 if (!TARGET_PARTIAL_REG_STALL)
21082 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
21083 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
21084 <= (ix86_cost->shift_const + ix86_cost->add) * nops
21085 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
21087 rtx reg = convert_modes (mode, QImode, val, true);
21088 tmp = promote_duplicated_reg (mode, const1_rtx);
21089 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
21094 rtx reg = convert_modes (mode, QImode, val, true);
21096 if (!TARGET_PARTIAL_REG_STALL)
21097 if (mode == SImode)
21098 emit_insn (gen_movsi_insv_1 (reg, reg));
21100 emit_insn (gen_movdi_insv_1 (reg, reg));
21103 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
21104 NULL, 1, OPTAB_DIRECT);
21106 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21108 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
21109 NULL, 1, OPTAB_DIRECT);
21110 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21111 if (mode == SImode)
21113 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
21114 NULL, 1, OPTAB_DIRECT);
21115 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
21120 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
21121 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
21122 alignment from ALIGN to DESIRED_ALIGN. */
21124 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
21129 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
21130 promoted_val = promote_duplicated_reg (DImode, val);
21131 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
21132 promoted_val = promote_duplicated_reg (SImode, val);
21133 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
21134 promoted_val = promote_duplicated_reg (HImode, val);
21136 promoted_val = val;
21138 return promoted_val;
21141 /* Expand string clear operation (bzero). Use i386 string operations when
21142 profitable. See expand_movmem comment for explanation of individual
21143 steps performed. */
21145 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
21146 rtx expected_align_exp, rtx expected_size_exp)
21151 rtx jump_around_label = NULL;
21152 HOST_WIDE_INT align = 1;
21153 unsigned HOST_WIDE_INT count = 0;
21154 HOST_WIDE_INT expected_size = -1;
21155 int size_needed = 0, epilogue_size_needed;
21156 int desired_align = 0, align_bytes = 0;
21157 enum stringop_alg alg;
21158 rtx promoted_val = NULL;
21159 bool force_loopy_epilogue = false;
21161 bool need_zero_guard = false;
21163 if (CONST_INT_P (align_exp))
21164 align = INTVAL (align_exp);
21165 /* i386 can do misaligned access on reasonably increased cost. */
21166 if (CONST_INT_P (expected_align_exp)
21167 && INTVAL (expected_align_exp) > align)
21168 align = INTVAL (expected_align_exp);
21169 if (CONST_INT_P (count_exp))
21170 count = expected_size = INTVAL (count_exp);
21171 if (CONST_INT_P (expected_size_exp) && count == 0)
21172 expected_size = INTVAL (expected_size_exp);
21174 /* Make sure we don't need to care about overflow later on. */
21175 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
21178 /* Step 0: Decide on preferred algorithm, desired alignment and
21179 size of chunks to be copied by main loop. */
21181 alg = decide_alg (count, expected_size, true, &dynamic_check);
21182 desired_align = decide_alignment (align, alg, expected_size);
21184 if (!TARGET_ALIGN_STRINGOPS)
21185 align = desired_align;
21187 if (alg == libcall)
21189 gcc_assert (alg != no_stringop);
21191 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
21192 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
21197 gcc_unreachable ();
21199 need_zero_guard = true;
21200 size_needed = GET_MODE_SIZE (Pmode);
21202 case unrolled_loop:
21203 need_zero_guard = true;
21204 size_needed = GET_MODE_SIZE (Pmode) * 4;
21206 case rep_prefix_8_byte:
21209 case rep_prefix_4_byte:
21212 case rep_prefix_1_byte:
21216 need_zero_guard = true;
21220 epilogue_size_needed = size_needed;
21222 /* Step 1: Prologue guard. */
21224 /* Alignment code needs count to be in register. */
21225 if (CONST_INT_P (count_exp) && desired_align > align)
21227 if (INTVAL (count_exp) > desired_align
21228 && INTVAL (count_exp) > size_needed)
21231 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21232 if (align_bytes <= 0)
21235 align_bytes = desired_align - align_bytes;
21237 if (align_bytes == 0)
21239 enum machine_mode mode = SImode;
21240 if (TARGET_64BIT && (count & ~0xffffffff))
21242 count_exp = force_reg (mode, count_exp);
21245 /* Do the cheap promotion to allow better CSE across the
21246 main loop and epilogue (ie one load of the big constant in the
21247 front of all code. */
21248 if (CONST_INT_P (val_exp))
21249 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21250 desired_align, align);
21251 /* Ensure that alignment prologue won't copy past end of block. */
21252 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21254 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21255 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21256 Make sure it is power of 2. */
21257 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21259 /* To improve performance of small blocks, we jump around the VAL
21260 promoting mode. This mean that if the promoted VAL is not constant,
21261 we might not use it in the epilogue and have to use byte
21263 if (epilogue_size_needed > 2 && !promoted_val)
21264 force_loopy_epilogue = true;
21267 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21269 /* If main algorithm works on QImode, no epilogue is needed.
21270 For small sizes just don't align anything. */
21271 if (size_needed == 1)
21272 desired_align = align;
21279 label = gen_label_rtx ();
21280 emit_cmp_and_jump_insns (count_exp,
21281 GEN_INT (epilogue_size_needed),
21282 LTU, 0, counter_mode (count_exp), 1, label);
21283 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21284 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21286 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21289 if (dynamic_check != -1)
21291 rtx hot_label = gen_label_rtx ();
21292 jump_around_label = gen_label_rtx ();
21293 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21294 LEU, 0, counter_mode (count_exp), 1, hot_label);
21295 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21296 set_storage_via_libcall (dst, count_exp, val_exp, false);
21297 emit_jump (jump_around_label);
21298 emit_label (hot_label);
21301 /* Step 2: Alignment prologue. */
21303 /* Do the expensive promotion once we branched off the small blocks. */
21305 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21306 desired_align, align);
21307 gcc_assert (desired_align >= 1 && align >= 1);
21309 if (desired_align > align)
21311 if (align_bytes == 0)
21313 /* Except for the first move in epilogue, we no longer know
21314 constant offset in aliasing info. It don't seems to worth
21315 the pain to maintain it for the first move, so throw away
21317 dst = change_address (dst, BLKmode, destreg);
21318 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21323 /* If we know how many bytes need to be stored before dst is
21324 sufficiently aligned, maintain aliasing info accurately. */
21325 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21326 desired_align, align_bytes);
21327 count_exp = plus_constant (count_exp, -align_bytes);
21328 count -= align_bytes;
21330 if (need_zero_guard
21331 && (count < (unsigned HOST_WIDE_INT) size_needed
21332 || (align_bytes == 0
21333 && count < ((unsigned HOST_WIDE_INT) size_needed
21334 + desired_align - align))))
21336 /* It is possible that we copied enough so the main loop will not
21338 gcc_assert (size_needed > 1);
21339 if (label == NULL_RTX)
21340 label = gen_label_rtx ();
21341 emit_cmp_and_jump_insns (count_exp,
21342 GEN_INT (size_needed),
21343 LTU, 0, counter_mode (count_exp), 1, label);
21344 if (expected_size == -1
21345 || expected_size < (desired_align - align) / 2 + size_needed)
21346 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21348 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21351 if (label && size_needed == 1)
21353 emit_label (label);
21354 LABEL_NUSES (label) = 1;
21356 promoted_val = val_exp;
21357 epilogue_size_needed = 1;
21359 else if (label == NULL_RTX)
21360 epilogue_size_needed = size_needed;
21362 /* Step 3: Main loop. */
21368 gcc_unreachable ();
21370 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21371 count_exp, QImode, 1, expected_size);
21374 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21375 count_exp, Pmode, 1, expected_size);
21377 case unrolled_loop:
21378 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21379 count_exp, Pmode, 4, expected_size);
21381 case rep_prefix_8_byte:
21382 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21385 case rep_prefix_4_byte:
21386 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21389 case rep_prefix_1_byte:
21390 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21394 /* Adjust properly the offset of src and dest memory for aliasing. */
21395 if (CONST_INT_P (count_exp))
21396 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21397 (count / size_needed) * size_needed);
21399 dst = change_address (dst, BLKmode, destreg);
21401 /* Step 4: Epilogue to copy the remaining bytes. */
21405 /* When the main loop is done, COUNT_EXP might hold original count,
21406 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21407 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21408 bytes. Compensate if needed. */
21410 if (size_needed < epilogue_size_needed)
21413 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21414 GEN_INT (size_needed - 1), count_exp, 1,
21416 if (tmp != count_exp)
21417 emit_move_insn (count_exp, tmp);
21419 emit_label (label);
21420 LABEL_NUSES (label) = 1;
21423 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21425 if (force_loopy_epilogue)
21426 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21427 epilogue_size_needed);
21429 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21430 epilogue_size_needed);
21432 if (jump_around_label)
21433 emit_label (jump_around_label);
21437 /* Expand the appropriate insns for doing strlen if not just doing
21440 out = result, initialized with the start address
21441 align_rtx = alignment of the address.
21442 scratch = scratch register, initialized with the startaddress when
21443 not aligned, otherwise undefined
21445 This is just the body. It needs the initializations mentioned above and
21446 some address computing at the end. These things are done in i386.md. */
21449 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21453 rtx align_2_label = NULL_RTX;
21454 rtx align_3_label = NULL_RTX;
21455 rtx align_4_label = gen_label_rtx ();
21456 rtx end_0_label = gen_label_rtx ();
21458 rtx tmpreg = gen_reg_rtx (SImode);
21459 rtx scratch = gen_reg_rtx (SImode);
21463 if (CONST_INT_P (align_rtx))
21464 align = INTVAL (align_rtx);
21466 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21468 /* Is there a known alignment and is it less than 4? */
21471 rtx scratch1 = gen_reg_rtx (Pmode);
21472 emit_move_insn (scratch1, out);
21473 /* Is there a known alignment and is it not 2? */
21476 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21477 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21479 /* Leave just the 3 lower bits. */
21480 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21481 NULL_RTX, 0, OPTAB_WIDEN);
21483 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21484 Pmode, 1, align_4_label);
21485 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21486 Pmode, 1, align_2_label);
21487 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21488 Pmode, 1, align_3_label);
21492 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21493 check if is aligned to 4 - byte. */
21495 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21496 NULL_RTX, 0, OPTAB_WIDEN);
21498 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21499 Pmode, 1, align_4_label);
21502 mem = change_address (src, QImode, out);
21504 /* Now compare the bytes. */
21506 /* Compare the first n unaligned byte on a byte per byte basis. */
21507 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21508 QImode, 1, end_0_label);
21510 /* Increment the address. */
21511 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21513 /* Not needed with an alignment of 2 */
21516 emit_label (align_2_label);
21518 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21521 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21523 emit_label (align_3_label);
21526 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21529 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21532 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21533 align this loop. It gives only huge programs, but does not help to
21535 emit_label (align_4_label);
21537 mem = change_address (src, SImode, out);
21538 emit_move_insn (scratch, mem);
21539 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21541 /* This formula yields a nonzero result iff one of the bytes is zero.
21542 This saves three branches inside loop and many cycles. */
21544 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21545 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21546 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21547 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21548 gen_int_mode (0x80808080, SImode)));
21549 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21554 rtx reg = gen_reg_rtx (SImode);
21555 rtx reg2 = gen_reg_rtx (Pmode);
21556 emit_move_insn (reg, tmpreg);
21557 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21559 /* If zero is not in the first two bytes, move two bytes forward. */
21560 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21561 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21562 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21563 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21564 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21567 /* Emit lea manually to avoid clobbering of flags. */
21568 emit_insn (gen_rtx_SET (SImode, reg2,
21569 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21571 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21572 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21573 emit_insn (gen_rtx_SET (VOIDmode, out,
21574 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21580 rtx end_2_label = gen_label_rtx ();
21581 /* Is zero in the first two bytes? */
21583 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21584 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21585 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21586 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21587 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21589 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21590 JUMP_LABEL (tmp) = end_2_label;
21592 /* Not in the first two. Move two bytes forward. */
21593 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21594 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21596 emit_label (end_2_label);
21600 /* Avoid branch in fixing the byte. */
21601 tmpreg = gen_lowpart (QImode, tmpreg);
21602 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21603 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21604 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21605 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21607 emit_label (end_0_label);
21610 /* Expand strlen. */
21613 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21615 rtx addr, scratch1, scratch2, scratch3, scratch4;
21617 /* The generic case of strlen expander is long. Avoid it's
21618 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21620 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21621 && !TARGET_INLINE_ALL_STRINGOPS
21622 && !optimize_insn_for_size_p ()
21623 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21626 addr = force_reg (Pmode, XEXP (src, 0));
21627 scratch1 = gen_reg_rtx (Pmode);
21629 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21630 && !optimize_insn_for_size_p ())
21632 /* Well it seems that some optimizer does not combine a call like
21633 foo(strlen(bar), strlen(bar));
21634 when the move and the subtraction is done here. It does calculate
21635 the length just once when these instructions are done inside of
21636 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21637 often used and I use one fewer register for the lifetime of
21638 output_strlen_unroll() this is better. */
21640 emit_move_insn (out, addr);
21642 ix86_expand_strlensi_unroll_1 (out, src, align);
21644 /* strlensi_unroll_1 returns the address of the zero at the end of
21645 the string, like memchr(), so compute the length by subtracting
21646 the start address. */
21647 emit_insn (ix86_gen_sub3 (out, out, addr));
21653 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21654 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21657 scratch2 = gen_reg_rtx (Pmode);
21658 scratch3 = gen_reg_rtx (Pmode);
21659 scratch4 = force_reg (Pmode, constm1_rtx);
21661 emit_move_insn (scratch3, addr);
21662 eoschar = force_reg (QImode, eoschar);
21664 src = replace_equiv_address_nv (src, scratch3);
21666 /* If .md starts supporting :P, this can be done in .md. */
21667 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21668 scratch4), UNSPEC_SCAS);
21669 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21670 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21671 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21676 /* For given symbol (function) construct code to compute address of it's PLT
21677 entry in large x86-64 PIC model. */
21679 construct_plt_address (rtx symbol)
21681 rtx tmp = gen_reg_rtx (Pmode);
21682 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21684 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21685 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21687 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21688 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21693 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21695 rtx pop, int sibcall)
21697 rtx use = NULL, call;
21699 if (pop == const0_rtx)
21701 gcc_assert (!TARGET_64BIT || !pop);
21703 if (TARGET_MACHO && !TARGET_64BIT)
21706 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21707 fnaddr = machopic_indirect_call_target (fnaddr);
21712 /* Static functions and indirect calls don't need the pic register. */
21713 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21714 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21715 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21716 use_reg (&use, pic_offset_table_rtx);
21719 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21721 rtx al = gen_rtx_REG (QImode, AX_REG);
21722 emit_move_insn (al, callarg2);
21723 use_reg (&use, al);
21726 if (ix86_cmodel == CM_LARGE_PIC
21728 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21729 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21730 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21732 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21733 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21735 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21736 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21739 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21741 call = gen_rtx_SET (VOIDmode, retval, call);
21744 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21745 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21746 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21749 && ix86_cfun_abi () == MS_ABI
21750 && (!callarg2 || INTVAL (callarg2) != -2))
21752 /* We need to represent that SI and DI registers are clobbered
21754 static int clobbered_registers[] = {
21755 XMM6_REG, XMM7_REG, XMM8_REG,
21756 XMM9_REG, XMM10_REG, XMM11_REG,
21757 XMM12_REG, XMM13_REG, XMM14_REG,
21758 XMM15_REG, SI_REG, DI_REG
21761 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21762 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21763 UNSPEC_MS_TO_SYSV_CALL);
21767 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21768 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21771 (SSE_REGNO_P (clobbered_registers[i])
21773 clobbered_registers[i]));
21775 call = gen_rtx_PARALLEL (VOIDmode,
21776 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21780 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21781 if (TARGET_VZEROUPPER)
21786 if (cfun->machine->callee_pass_avx256_p)
21788 if (cfun->machine->callee_return_avx256_p)
21789 avx256 = callee_return_pass_avx256;
21791 avx256 = callee_pass_avx256;
21793 else if (cfun->machine->callee_return_avx256_p)
21794 avx256 = callee_return_avx256;
21796 avx256 = call_no_avx256;
21798 if (reload_completed)
21799 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21802 unspec = gen_rtx_UNSPEC (VOIDmode,
21803 gen_rtvec (1, GEN_INT (avx256)),
21804 UNSPEC_CALL_NEEDS_VZEROUPPER);
21805 call = gen_rtx_PARALLEL (VOIDmode,
21806 gen_rtvec (2, call, unspec));
21810 call = emit_call_insn (call);
21812 CALL_INSN_FUNCTION_USAGE (call) = use;
21818 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21820 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21821 emit_insn (gen_avx_vzeroupper (vzeroupper));
21822 emit_call_insn (call);
21825 /* Output the assembly for a call instruction. */
21828 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21830 bool direct_p = constant_call_address_operand (call_op, Pmode);
21831 bool seh_nop_p = false;
21833 gcc_assert (addr_op == 0 || addr_op == 1);
21835 if (SIBLING_CALL_P (insn))
21838 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21839 /* SEH epilogue detection requires the indirect branch case
21840 to include REX.W. */
21841 else if (TARGET_SEH)
21842 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21844 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21847 /* SEH unwinding can require an extra nop to be emitted in several
21848 circumstances. Determine if we have one of those. */
21853 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21855 /* If we get to another real insn, we don't need the nop. */
21859 /* If we get to the epilogue note, prevent a catch region from
21860 being adjacent to the standard epilogue sequence. If non-
21861 call-exceptions, we'll have done this during epilogue emission. */
21862 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21863 && !flag_non_call_exceptions
21864 && !can_throw_internal (insn))
21871 /* If we didn't find a real insn following the call, prevent the
21872 unwinder from looking into the next function. */
21880 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21882 return addr_op ? "call\t%P1" : "call\t%P0";
21887 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
21889 return addr_op ? "call\t%A1" : "call\t%A0";
21893 /* Clear stack slot assignments remembered from previous functions.
21894 This is called from INIT_EXPANDERS once before RTL is emitted for each
21897 static struct machine_function *
21898 ix86_init_machine_status (void)
21900 struct machine_function *f;
21902 f = ggc_alloc_cleared_machine_function ();
21903 f->use_fast_prologue_epilogue_nregs = -1;
21904 f->tls_descriptor_call_expanded_p = 0;
21905 f->call_abi = ix86_abi;
21910 /* Return a MEM corresponding to a stack slot with mode MODE.
21911 Allocate a new slot if necessary.
21913 The RTL for a function can have several slots available: N is
21914 which slot to use. */
21917 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21919 struct stack_local_entry *s;
21921 gcc_assert (n < MAX_386_STACK_LOCALS);
21923 /* Virtual slot is valid only before vregs are instantiated. */
21924 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21926 for (s = ix86_stack_locals; s; s = s->next)
21927 if (s->mode == mode && s->n == n)
21928 return copy_rtx (s->rtl);
21930 s = ggc_alloc_stack_local_entry ();
21933 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21935 s->next = ix86_stack_locals;
21936 ix86_stack_locals = s;
21940 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21942 static GTY(()) rtx ix86_tls_symbol;
21944 ix86_tls_get_addr (void)
21947 if (!ix86_tls_symbol)
21949 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
21950 (TARGET_ANY_GNU_TLS
21952 ? "___tls_get_addr"
21953 : "__tls_get_addr");
21956 return ix86_tls_symbol;
21959 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21961 static GTY(()) rtx ix86_tls_module_base_symbol;
21963 ix86_tls_module_base (void)
21966 if (!ix86_tls_module_base_symbol)
21968 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
21969 "_TLS_MODULE_BASE_");
21970 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
21971 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
21974 return ix86_tls_module_base_symbol;
21977 /* Calculate the length of the memory address in the instruction
21978 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21981 memory_address_length (rtx addr)
21983 struct ix86_address parts;
21984 rtx base, index, disp;
21988 if (GET_CODE (addr) == PRE_DEC
21989 || GET_CODE (addr) == POST_INC
21990 || GET_CODE (addr) == PRE_MODIFY
21991 || GET_CODE (addr) == POST_MODIFY)
21994 ok = ix86_decompose_address (addr, &parts);
21997 if (parts.base && GET_CODE (parts.base) == SUBREG)
21998 parts.base = SUBREG_REG (parts.base);
21999 if (parts.index && GET_CODE (parts.index) == SUBREG)
22000 parts.index = SUBREG_REG (parts.index);
22003 index = parts.index;
22008 - esp as the base always wants an index,
22009 - ebp as the base always wants a displacement,
22010 - r12 as the base always wants an index,
22011 - r13 as the base always wants a displacement. */
22013 /* Register Indirect. */
22014 if (base && !index && !disp)
22016 /* esp (for its index) and ebp (for its displacement) need
22017 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
22020 && (addr == arg_pointer_rtx
22021 || addr == frame_pointer_rtx
22022 || REGNO (addr) == SP_REG
22023 || REGNO (addr) == BP_REG
22024 || REGNO (addr) == R12_REG
22025 || REGNO (addr) == R13_REG))
22029 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
22030 is not disp32, but disp32(%rip), so for disp32
22031 SIB byte is needed, unless print_operand_address
22032 optimizes it into disp32(%rip) or (%rip) is implied
22034 else if (disp && !base && !index)
22041 if (GET_CODE (disp) == CONST)
22042 symbol = XEXP (disp, 0);
22043 if (GET_CODE (symbol) == PLUS
22044 && CONST_INT_P (XEXP (symbol, 1)))
22045 symbol = XEXP (symbol, 0);
22047 if (GET_CODE (symbol) != LABEL_REF
22048 && (GET_CODE (symbol) != SYMBOL_REF
22049 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
22050 && (GET_CODE (symbol) != UNSPEC
22051 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
22052 && XINT (symbol, 1) != UNSPEC_PCREL
22053 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
22060 /* Find the length of the displacement constant. */
22063 if (base && satisfies_constraint_K (disp))
22068 /* ebp always wants a displacement. Similarly r13. */
22069 else if (base && REG_P (base)
22070 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
22073 /* An index requires the two-byte modrm form.... */
22075 /* ...like esp (or r12), which always wants an index. */
22076 || base == arg_pointer_rtx
22077 || base == frame_pointer_rtx
22078 || (base && REG_P (base)
22079 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
22096 /* Compute default value for "length_immediate" attribute. When SHORTFORM
22097 is set, expect that insn have 8bit immediate alternative. */
22099 ix86_attr_length_immediate_default (rtx insn, int shortform)
22103 extract_insn_cached (insn);
22104 for (i = recog_data.n_operands - 1; i >= 0; --i)
22105 if (CONSTANT_P (recog_data.operand[i]))
22107 enum attr_mode mode = get_attr_mode (insn);
22110 if (shortform && CONST_INT_P (recog_data.operand[i]))
22112 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
22119 ival = trunc_int_for_mode (ival, HImode);
22122 ival = trunc_int_for_mode (ival, SImode);
22127 if (IN_RANGE (ival, -128, 127))
22144 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
22149 fatal_insn ("unknown insn mode", insn);
22154 /* Compute default value for "length_address" attribute. */
22156 ix86_attr_length_address_default (rtx insn)
22160 if (get_attr_type (insn) == TYPE_LEA)
22162 rtx set = PATTERN (insn), addr;
22164 if (GET_CODE (set) == PARALLEL)
22165 set = XVECEXP (set, 0, 0);
22167 gcc_assert (GET_CODE (set) == SET);
22169 addr = SET_SRC (set);
22170 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
22172 if (GET_CODE (addr) == ZERO_EXTEND)
22173 addr = XEXP (addr, 0);
22174 if (GET_CODE (addr) == SUBREG)
22175 addr = SUBREG_REG (addr);
22178 return memory_address_length (addr);
22181 extract_insn_cached (insn);
22182 for (i = recog_data.n_operands - 1; i >= 0; --i)
22183 if (MEM_P (recog_data.operand[i]))
22185 constrain_operands_cached (reload_completed);
22186 if (which_alternative != -1)
22188 const char *constraints = recog_data.constraints[i];
22189 int alt = which_alternative;
22191 while (*constraints == '=' || *constraints == '+')
22194 while (*constraints++ != ',')
22196 /* Skip ignored operands. */
22197 if (*constraints == 'X')
22200 return memory_address_length (XEXP (recog_data.operand[i], 0));
22205 /* Compute default value for "length_vex" attribute. It includes
22206 2 or 3 byte VEX prefix and 1 opcode byte. */
22209 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
22214 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
22215 byte VEX prefix. */
22216 if (!has_0f_opcode || has_vex_w)
22219 /* We can always use 2 byte VEX prefix in 32bit. */
22223 extract_insn_cached (insn);
22225 for (i = recog_data.n_operands - 1; i >= 0; --i)
22226 if (REG_P (recog_data.operand[i]))
22228 /* REX.W bit uses 3 byte VEX prefix. */
22229 if (GET_MODE (recog_data.operand[i]) == DImode
22230 && GENERAL_REG_P (recog_data.operand[i]))
22235 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22236 if (MEM_P (recog_data.operand[i])
22237 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22244 /* Return the maximum number of instructions a cpu can issue. */
22247 ix86_issue_rate (void)
22251 case PROCESSOR_PENTIUM:
22252 case PROCESSOR_ATOM:
22256 case PROCESSOR_PENTIUMPRO:
22257 case PROCESSOR_PENTIUM4:
22258 case PROCESSOR_CORE2_32:
22259 case PROCESSOR_CORE2_64:
22260 case PROCESSOR_COREI7_32:
22261 case PROCESSOR_COREI7_64:
22262 case PROCESSOR_ATHLON:
22264 case PROCESSOR_AMDFAM10:
22265 case PROCESSOR_NOCONA:
22266 case PROCESSOR_GENERIC32:
22267 case PROCESSOR_GENERIC64:
22268 case PROCESSOR_BDVER1:
22269 case PROCESSOR_BTVER1:
22277 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22278 by DEP_INSN and nothing set by DEP_INSN. */
22281 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22285 /* Simplify the test for uninteresting insns. */
22286 if (insn_type != TYPE_SETCC
22287 && insn_type != TYPE_ICMOV
22288 && insn_type != TYPE_FCMOV
22289 && insn_type != TYPE_IBR)
22292 if ((set = single_set (dep_insn)) != 0)
22294 set = SET_DEST (set);
22297 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22298 && XVECLEN (PATTERN (dep_insn), 0) == 2
22299 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22300 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22302 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22303 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22308 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22311 /* This test is true if the dependent insn reads the flags but
22312 not any other potentially set register. */
22313 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22316 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22322 /* Return true iff USE_INSN has a memory address with operands set by
22326 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22329 extract_insn_cached (use_insn);
22330 for (i = recog_data.n_operands - 1; i >= 0; --i)
22331 if (MEM_P (recog_data.operand[i]))
22333 rtx addr = XEXP (recog_data.operand[i], 0);
22334 return modified_in_p (addr, set_insn) != 0;
22340 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22342 enum attr_type insn_type, dep_insn_type;
22343 enum attr_memory memory;
22345 int dep_insn_code_number;
22347 /* Anti and output dependencies have zero cost on all CPUs. */
22348 if (REG_NOTE_KIND (link) != 0)
22351 dep_insn_code_number = recog_memoized (dep_insn);
22353 /* If we can't recognize the insns, we can't really do anything. */
22354 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22357 insn_type = get_attr_type (insn);
22358 dep_insn_type = get_attr_type (dep_insn);
22362 case PROCESSOR_PENTIUM:
22363 /* Address Generation Interlock adds a cycle of latency. */
22364 if (insn_type == TYPE_LEA)
22366 rtx addr = PATTERN (insn);
22368 if (GET_CODE (addr) == PARALLEL)
22369 addr = XVECEXP (addr, 0, 0);
22371 gcc_assert (GET_CODE (addr) == SET);
22373 addr = SET_SRC (addr);
22374 if (modified_in_p (addr, dep_insn))
22377 else if (ix86_agi_dependent (dep_insn, insn))
22380 /* ??? Compares pair with jump/setcc. */
22381 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22384 /* Floating point stores require value to be ready one cycle earlier. */
22385 if (insn_type == TYPE_FMOV
22386 && get_attr_memory (insn) == MEMORY_STORE
22387 && !ix86_agi_dependent (dep_insn, insn))
22391 case PROCESSOR_PENTIUMPRO:
22392 memory = get_attr_memory (insn);
22394 /* INT->FP conversion is expensive. */
22395 if (get_attr_fp_int_src (dep_insn))
22398 /* There is one cycle extra latency between an FP op and a store. */
22399 if (insn_type == TYPE_FMOV
22400 && (set = single_set (dep_insn)) != NULL_RTX
22401 && (set2 = single_set (insn)) != NULL_RTX
22402 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22403 && MEM_P (SET_DEST (set2)))
22406 /* Show ability of reorder buffer to hide latency of load by executing
22407 in parallel with previous instruction in case
22408 previous instruction is not needed to compute the address. */
22409 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22410 && !ix86_agi_dependent (dep_insn, insn))
22412 /* Claim moves to take one cycle, as core can issue one load
22413 at time and the next load can start cycle later. */
22414 if (dep_insn_type == TYPE_IMOV
22415 || dep_insn_type == TYPE_FMOV)
22423 memory = get_attr_memory (insn);
22425 /* The esp dependency is resolved before the instruction is really
22427 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22428 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22431 /* INT->FP conversion is expensive. */
22432 if (get_attr_fp_int_src (dep_insn))
22435 /* Show ability of reorder buffer to hide latency of load by executing
22436 in parallel with previous instruction in case
22437 previous instruction is not needed to compute the address. */
22438 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22439 && !ix86_agi_dependent (dep_insn, insn))
22441 /* Claim moves to take one cycle, as core can issue one load
22442 at time and the next load can start cycle later. */
22443 if (dep_insn_type == TYPE_IMOV
22444 || dep_insn_type == TYPE_FMOV)
22453 case PROCESSOR_ATHLON:
22455 case PROCESSOR_AMDFAM10:
22456 case PROCESSOR_BDVER1:
22457 case PROCESSOR_BTVER1:
22458 case PROCESSOR_ATOM:
22459 case PROCESSOR_GENERIC32:
22460 case PROCESSOR_GENERIC64:
22461 memory = get_attr_memory (insn);
22463 /* Show ability of reorder buffer to hide latency of load by executing
22464 in parallel with previous instruction in case
22465 previous instruction is not needed to compute the address. */
22466 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22467 && !ix86_agi_dependent (dep_insn, insn))
22469 enum attr_unit unit = get_attr_unit (insn);
22472 /* Because of the difference between the length of integer and
22473 floating unit pipeline preparation stages, the memory operands
22474 for floating point are cheaper.
22476 ??? For Athlon it the difference is most probably 2. */
22477 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22480 loadcost = TARGET_ATHLON ? 2 : 0;
22482 if (cost >= loadcost)
22495 /* How many alternative schedules to try. This should be as wide as the
22496 scheduling freedom in the DFA, but no wider. Making this value too
22497 large results extra work for the scheduler. */
22500 ia32_multipass_dfa_lookahead (void)
22504 case PROCESSOR_PENTIUM:
22507 case PROCESSOR_PENTIUMPRO:
22511 case PROCESSOR_CORE2_32:
22512 case PROCESSOR_CORE2_64:
22513 case PROCESSOR_COREI7_32:
22514 case PROCESSOR_COREI7_64:
22515 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22516 as many instructions can be executed on a cycle, i.e.,
22517 issue_rate. I wonder why tuning for many CPUs does not do this. */
22518 return ix86_issue_rate ();
22527 /* Model decoder of Core 2/i7.
22528 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22529 track the instruction fetch block boundaries and make sure that long
22530 (9+ bytes) instructions are assigned to D0. */
22532 /* Maximum length of an insn that can be handled by
22533 a secondary decoder unit. '8' for Core 2/i7. */
22534 static int core2i7_secondary_decoder_max_insn_size;
22536 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22537 '16' for Core 2/i7. */
22538 static int core2i7_ifetch_block_size;
22540 /* Maximum number of instructions decoder can handle per cycle.
22541 '6' for Core 2/i7. */
22542 static int core2i7_ifetch_block_max_insns;
22544 typedef struct ix86_first_cycle_multipass_data_ *
22545 ix86_first_cycle_multipass_data_t;
22546 typedef const struct ix86_first_cycle_multipass_data_ *
22547 const_ix86_first_cycle_multipass_data_t;
22549 /* A variable to store target state across calls to max_issue within
22551 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22552 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22554 /* Initialize DATA. */
22556 core2i7_first_cycle_multipass_init (void *_data)
22558 ix86_first_cycle_multipass_data_t data
22559 = (ix86_first_cycle_multipass_data_t) _data;
22561 data->ifetch_block_len = 0;
22562 data->ifetch_block_n_insns = 0;
22563 data->ready_try_change = NULL;
22564 data->ready_try_change_size = 0;
22567 /* Advancing the cycle; reset ifetch block counts. */
22569 core2i7_dfa_post_advance_cycle (void)
22571 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22573 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22575 data->ifetch_block_len = 0;
22576 data->ifetch_block_n_insns = 0;
22579 static int min_insn_size (rtx);
22581 /* Filter out insns from ready_try that the core will not be able to issue
22582 on current cycle due to decoder. */
22584 core2i7_first_cycle_multipass_filter_ready_try
22585 (const_ix86_first_cycle_multipass_data_t data,
22586 char *ready_try, int n_ready, bool first_cycle_insn_p)
22593 if (ready_try[n_ready])
22596 insn = get_ready_element (n_ready);
22597 insn_size = min_insn_size (insn);
22599 if (/* If this is a too long an insn for a secondary decoder ... */
22600 (!first_cycle_insn_p
22601 && insn_size > core2i7_secondary_decoder_max_insn_size)
22602 /* ... or it would not fit into the ifetch block ... */
22603 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22604 /* ... or the decoder is full already ... */
22605 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22606 /* ... mask the insn out. */
22608 ready_try[n_ready] = 1;
22610 if (data->ready_try_change)
22611 SET_BIT (data->ready_try_change, n_ready);
22616 /* Prepare for a new round of multipass lookahead scheduling. */
22618 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22619 bool first_cycle_insn_p)
22621 ix86_first_cycle_multipass_data_t data
22622 = (ix86_first_cycle_multipass_data_t) _data;
22623 const_ix86_first_cycle_multipass_data_t prev_data
22624 = ix86_first_cycle_multipass_data;
22626 /* Restore the state from the end of the previous round. */
22627 data->ifetch_block_len = prev_data->ifetch_block_len;
22628 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22630 /* Filter instructions that cannot be issued on current cycle due to
22631 decoder restrictions. */
22632 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22633 first_cycle_insn_p);
22636 /* INSN is being issued in current solution. Account for its impact on
22637 the decoder model. */
22639 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22640 rtx insn, const void *_prev_data)
22642 ix86_first_cycle_multipass_data_t data
22643 = (ix86_first_cycle_multipass_data_t) _data;
22644 const_ix86_first_cycle_multipass_data_t prev_data
22645 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22647 int insn_size = min_insn_size (insn);
22649 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22650 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22651 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22652 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22654 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22655 if (!data->ready_try_change)
22657 data->ready_try_change = sbitmap_alloc (n_ready);
22658 data->ready_try_change_size = n_ready;
22660 else if (data->ready_try_change_size < n_ready)
22662 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22664 data->ready_try_change_size = n_ready;
22666 sbitmap_zero (data->ready_try_change);
22668 /* Filter out insns from ready_try that the core will not be able to issue
22669 on current cycle due to decoder. */
22670 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22674 /* Revert the effect on ready_try. */
22676 core2i7_first_cycle_multipass_backtrack (const void *_data,
22678 int n_ready ATTRIBUTE_UNUSED)
22680 const_ix86_first_cycle_multipass_data_t data
22681 = (const_ix86_first_cycle_multipass_data_t) _data;
22682 unsigned int i = 0;
22683 sbitmap_iterator sbi;
22685 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22686 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22692 /* Save the result of multipass lookahead scheduling for the next round. */
22694 core2i7_first_cycle_multipass_end (const void *_data)
22696 const_ix86_first_cycle_multipass_data_t data
22697 = (const_ix86_first_cycle_multipass_data_t) _data;
22698 ix86_first_cycle_multipass_data_t next_data
22699 = ix86_first_cycle_multipass_data;
22703 next_data->ifetch_block_len = data->ifetch_block_len;
22704 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22708 /* Deallocate target data. */
22710 core2i7_first_cycle_multipass_fini (void *_data)
22712 ix86_first_cycle_multipass_data_t data
22713 = (ix86_first_cycle_multipass_data_t) _data;
22715 if (data->ready_try_change)
22717 sbitmap_free (data->ready_try_change);
22718 data->ready_try_change = NULL;
22719 data->ready_try_change_size = 0;
22723 /* Prepare for scheduling pass. */
22725 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22726 int verbose ATTRIBUTE_UNUSED,
22727 int max_uid ATTRIBUTE_UNUSED)
22729 /* Install scheduling hooks for current CPU. Some of these hooks are used
22730 in time-critical parts of the scheduler, so we only set them up when
22731 they are actually used. */
22734 case PROCESSOR_CORE2_32:
22735 case PROCESSOR_CORE2_64:
22736 case PROCESSOR_COREI7_32:
22737 case PROCESSOR_COREI7_64:
22738 targetm.sched.dfa_post_advance_cycle
22739 = core2i7_dfa_post_advance_cycle;
22740 targetm.sched.first_cycle_multipass_init
22741 = core2i7_first_cycle_multipass_init;
22742 targetm.sched.first_cycle_multipass_begin
22743 = core2i7_first_cycle_multipass_begin;
22744 targetm.sched.first_cycle_multipass_issue
22745 = core2i7_first_cycle_multipass_issue;
22746 targetm.sched.first_cycle_multipass_backtrack
22747 = core2i7_first_cycle_multipass_backtrack;
22748 targetm.sched.first_cycle_multipass_end
22749 = core2i7_first_cycle_multipass_end;
22750 targetm.sched.first_cycle_multipass_fini
22751 = core2i7_first_cycle_multipass_fini;
22753 /* Set decoder parameters. */
22754 core2i7_secondary_decoder_max_insn_size = 8;
22755 core2i7_ifetch_block_size = 16;
22756 core2i7_ifetch_block_max_insns = 6;
22760 targetm.sched.dfa_post_advance_cycle = NULL;
22761 targetm.sched.first_cycle_multipass_init = NULL;
22762 targetm.sched.first_cycle_multipass_begin = NULL;
22763 targetm.sched.first_cycle_multipass_issue = NULL;
22764 targetm.sched.first_cycle_multipass_backtrack = NULL;
22765 targetm.sched.first_cycle_multipass_end = NULL;
22766 targetm.sched.first_cycle_multipass_fini = NULL;
22772 /* Compute the alignment given to a constant that is being placed in memory.
22773 EXP is the constant and ALIGN is the alignment that the object would
22775 The value of this function is used instead of that alignment to align
22779 ix86_constant_alignment (tree exp, int align)
22781 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22782 || TREE_CODE (exp) == INTEGER_CST)
22784 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22786 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22789 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22790 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22791 return BITS_PER_WORD;
22796 /* Compute the alignment for a static variable.
22797 TYPE is the data type, and ALIGN is the alignment that
22798 the object would ordinarily have. The value of this function is used
22799 instead of that alignment to align the object. */
22802 ix86_data_alignment (tree type, int align)
22804 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22806 if (AGGREGATE_TYPE_P (type)
22807 && TYPE_SIZE (type)
22808 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22809 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22810 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22811 && align < max_align)
22814 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22815 to 16byte boundary. */
22818 if (AGGREGATE_TYPE_P (type)
22819 && TYPE_SIZE (type)
22820 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22821 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22822 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22826 if (TREE_CODE (type) == ARRAY_TYPE)
22828 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22830 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22833 else if (TREE_CODE (type) == COMPLEX_TYPE)
22836 if (TYPE_MODE (type) == DCmode && align < 64)
22838 if ((TYPE_MODE (type) == XCmode
22839 || TYPE_MODE (type) == TCmode) && align < 128)
22842 else if ((TREE_CODE (type) == RECORD_TYPE
22843 || TREE_CODE (type) == UNION_TYPE
22844 || TREE_CODE (type) == QUAL_UNION_TYPE)
22845 && TYPE_FIELDS (type))
22847 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22849 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22852 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22853 || TREE_CODE (type) == INTEGER_TYPE)
22855 if (TYPE_MODE (type) == DFmode && align < 64)
22857 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22864 /* Compute the alignment for a local variable or a stack slot. EXP is
22865 the data type or decl itself, MODE is the widest mode available and
22866 ALIGN is the alignment that the object would ordinarily have. The
22867 value of this macro is used instead of that alignment to align the
22871 ix86_local_alignment (tree exp, enum machine_mode mode,
22872 unsigned int align)
22876 if (exp && DECL_P (exp))
22878 type = TREE_TYPE (exp);
22887 /* Don't do dynamic stack realignment for long long objects with
22888 -mpreferred-stack-boundary=2. */
22891 && ix86_preferred_stack_boundary < 64
22892 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22893 && (!type || !TYPE_USER_ALIGN (type))
22894 && (!decl || !DECL_USER_ALIGN (decl)))
22897 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22898 register in MODE. We will return the largest alignment of XF
22902 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22903 align = GET_MODE_ALIGNMENT (DFmode);
22907 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22908 to 16byte boundary. Exact wording is:
22910 An array uses the same alignment as its elements, except that a local or
22911 global array variable of length at least 16 bytes or
22912 a C99 variable-length array variable always has alignment of at least 16 bytes.
22914 This was added to allow use of aligned SSE instructions at arrays. This
22915 rule is meant for static storage (where compiler can not do the analysis
22916 by itself). We follow it for automatic variables only when convenient.
22917 We fully control everything in the function compiled and functions from
22918 other unit can not rely on the alignment.
22920 Exclude va_list type. It is the common case of local array where
22921 we can not benefit from the alignment. */
22922 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22925 if (AGGREGATE_TYPE_P (type)
22926 && (TYPE_MAIN_VARIANT (type)
22927 != TYPE_MAIN_VARIANT (va_list_type_node))
22928 && TYPE_SIZE (type)
22929 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22930 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22931 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22934 if (TREE_CODE (type) == ARRAY_TYPE)
22936 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22938 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22941 else if (TREE_CODE (type) == COMPLEX_TYPE)
22943 if (TYPE_MODE (type) == DCmode && align < 64)
22945 if ((TYPE_MODE (type) == XCmode
22946 || TYPE_MODE (type) == TCmode) && align < 128)
22949 else if ((TREE_CODE (type) == RECORD_TYPE
22950 || TREE_CODE (type) == UNION_TYPE
22951 || TREE_CODE (type) == QUAL_UNION_TYPE)
22952 && TYPE_FIELDS (type))
22954 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22956 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22959 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22960 || TREE_CODE (type) == INTEGER_TYPE)
22963 if (TYPE_MODE (type) == DFmode && align < 64)
22965 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22971 /* Compute the minimum required alignment for dynamic stack realignment
22972 purposes for a local variable, parameter or a stack slot. EXP is
22973 the data type or decl itself, MODE is its mode and ALIGN is the
22974 alignment that the object would ordinarily have. */
22977 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22978 unsigned int align)
22982 if (exp && DECL_P (exp))
22984 type = TREE_TYPE (exp);
22993 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22996 /* Don't do dynamic stack realignment for long long objects with
22997 -mpreferred-stack-boundary=2. */
22998 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22999 && (!type || !TYPE_USER_ALIGN (type))
23000 && (!decl || !DECL_USER_ALIGN (decl)))
23006 /* Find a location for the static chain incoming to a nested function.
23007 This is a register, unless all free registers are used by arguments. */
23010 ix86_static_chain (const_tree fndecl, bool incoming_p)
23014 if (!DECL_STATIC_CHAIN (fndecl))
23019 /* We always use R10 in 64-bit mode. */
23025 /* By default in 32-bit mode we use ECX to pass the static chain. */
23028 fntype = TREE_TYPE (fndecl);
23029 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
23031 /* Fastcall functions use ecx/edx for arguments, which leaves
23032 us with EAX for the static chain. */
23035 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
23037 /* Thiscall functions use ecx for arguments, which leaves
23038 us with EAX for the static chain. */
23041 else if (ix86_function_regparm (fntype, fndecl) == 3)
23043 /* For regparm 3, we have no free call-clobbered registers in
23044 which to store the static chain. In order to implement this,
23045 we have the trampoline push the static chain to the stack.
23046 However, we can't push a value below the return address when
23047 we call the nested function directly, so we have to use an
23048 alternate entry point. For this we use ESI, and have the
23049 alternate entry point push ESI, so that things appear the
23050 same once we're executing the nested function. */
23053 if (fndecl == current_function_decl)
23054 ix86_static_chain_on_stack = true;
23055 return gen_frame_mem (SImode,
23056 plus_constant (arg_pointer_rtx, -8));
23062 return gen_rtx_REG (Pmode, regno);
23065 /* Emit RTL insns to initialize the variable parts of a trampoline.
23066 FNDECL is the decl of the target address; M_TRAMP is a MEM for
23067 the trampoline, and CHAIN_VALUE is an RTX for the static chain
23068 to be passed to the target function. */
23071 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
23075 fnaddr = XEXP (DECL_RTL (fndecl), 0);
23082 /* Depending on the static chain location, either load a register
23083 with a constant, or push the constant to the stack. All of the
23084 instructions are the same size. */
23085 chain = ix86_static_chain (fndecl, true);
23088 if (REGNO (chain) == CX_REG)
23090 else if (REGNO (chain) == AX_REG)
23093 gcc_unreachable ();
23098 mem = adjust_address (m_tramp, QImode, 0);
23099 emit_move_insn (mem, gen_int_mode (opcode, QImode));
23101 mem = adjust_address (m_tramp, SImode, 1);
23102 emit_move_insn (mem, chain_value);
23104 /* Compute offset from the end of the jmp to the target function.
23105 In the case in which the trampoline stores the static chain on
23106 the stack, we need to skip the first insn which pushes the
23107 (call-saved) register static chain; this push is 1 byte. */
23108 disp = expand_binop (SImode, sub_optab, fnaddr,
23109 plus_constant (XEXP (m_tramp, 0),
23110 MEM_P (chain) ? 9 : 10),
23111 NULL_RTX, 1, OPTAB_DIRECT);
23113 mem = adjust_address (m_tramp, QImode, 5);
23114 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
23116 mem = adjust_address (m_tramp, SImode, 6);
23117 emit_move_insn (mem, disp);
23123 /* Load the function address to r11. Try to load address using
23124 the shorter movl instead of movabs. We may want to support
23125 movq for kernel mode, but kernel does not use trampolines at
23127 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
23129 fnaddr = copy_to_mode_reg (DImode, fnaddr);
23131 mem = adjust_address (m_tramp, HImode, offset);
23132 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
23134 mem = adjust_address (m_tramp, SImode, offset + 2);
23135 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
23140 mem = adjust_address (m_tramp, HImode, offset);
23141 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
23143 mem = adjust_address (m_tramp, DImode, offset + 2);
23144 emit_move_insn (mem, fnaddr);
23148 /* Load static chain using movabs to r10. */
23149 mem = adjust_address (m_tramp, HImode, offset);
23150 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
23152 mem = adjust_address (m_tramp, DImode, offset + 2);
23153 emit_move_insn (mem, chain_value);
23156 /* Jump to r11; the last (unused) byte is a nop, only there to
23157 pad the write out to a single 32-bit store. */
23158 mem = adjust_address (m_tramp, SImode, offset);
23159 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
23162 gcc_assert (offset <= TRAMPOLINE_SIZE);
23165 #ifdef ENABLE_EXECUTE_STACK
23166 #ifdef CHECK_EXECUTE_STACK_ENABLED
23167 if (CHECK_EXECUTE_STACK_ENABLED)
23169 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
23170 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
23174 /* The following file contains several enumerations and data structures
23175 built from the definitions in i386-builtin-types.def. */
23177 #include "i386-builtin-types.inc"
23179 /* Table for the ix86 builtin non-function types. */
23180 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
23182 /* Retrieve an element from the above table, building some of
23183 the types lazily. */
23186 ix86_get_builtin_type (enum ix86_builtin_type tcode)
23188 unsigned int index;
23191 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
23193 type = ix86_builtin_type_tab[(int) tcode];
23197 gcc_assert (tcode > IX86_BT_LAST_PRIM);
23198 if (tcode <= IX86_BT_LAST_VECT)
23200 enum machine_mode mode;
23202 index = tcode - IX86_BT_LAST_PRIM - 1;
23203 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
23204 mode = ix86_builtin_type_vect_mode[index];
23206 type = build_vector_type_for_mode (itype, mode);
23212 index = tcode - IX86_BT_LAST_VECT - 1;
23213 if (tcode <= IX86_BT_LAST_PTR)
23214 quals = TYPE_UNQUALIFIED;
23216 quals = TYPE_QUAL_CONST;
23218 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
23219 if (quals != TYPE_UNQUALIFIED)
23220 itype = build_qualified_type (itype, quals);
23222 type = build_pointer_type (itype);
23225 ix86_builtin_type_tab[(int) tcode] = type;
23229 /* Table for the ix86 builtin function types. */
23230 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23232 /* Retrieve an element from the above table, building some of
23233 the types lazily. */
23236 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23240 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23242 type = ix86_builtin_func_type_tab[(int) tcode];
23246 if (tcode <= IX86_BT_LAST_FUNC)
23248 unsigned start = ix86_builtin_func_start[(int) tcode];
23249 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23250 tree rtype, atype, args = void_list_node;
23253 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23254 for (i = after - 1; i > start; --i)
23256 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23257 args = tree_cons (NULL, atype, args);
23260 type = build_function_type (rtype, args);
23264 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23265 enum ix86_builtin_func_type icode;
23267 icode = ix86_builtin_func_alias_base[index];
23268 type = ix86_get_builtin_func_type (icode);
23271 ix86_builtin_func_type_tab[(int) tcode] = type;
23276 /* Codes for all the SSE/MMX builtins. */
23279 IX86_BUILTIN_ADDPS,
23280 IX86_BUILTIN_ADDSS,
23281 IX86_BUILTIN_DIVPS,
23282 IX86_BUILTIN_DIVSS,
23283 IX86_BUILTIN_MULPS,
23284 IX86_BUILTIN_MULSS,
23285 IX86_BUILTIN_SUBPS,
23286 IX86_BUILTIN_SUBSS,
23288 IX86_BUILTIN_CMPEQPS,
23289 IX86_BUILTIN_CMPLTPS,
23290 IX86_BUILTIN_CMPLEPS,
23291 IX86_BUILTIN_CMPGTPS,
23292 IX86_BUILTIN_CMPGEPS,
23293 IX86_BUILTIN_CMPNEQPS,
23294 IX86_BUILTIN_CMPNLTPS,
23295 IX86_BUILTIN_CMPNLEPS,
23296 IX86_BUILTIN_CMPNGTPS,
23297 IX86_BUILTIN_CMPNGEPS,
23298 IX86_BUILTIN_CMPORDPS,
23299 IX86_BUILTIN_CMPUNORDPS,
23300 IX86_BUILTIN_CMPEQSS,
23301 IX86_BUILTIN_CMPLTSS,
23302 IX86_BUILTIN_CMPLESS,
23303 IX86_BUILTIN_CMPNEQSS,
23304 IX86_BUILTIN_CMPNLTSS,
23305 IX86_BUILTIN_CMPNLESS,
23306 IX86_BUILTIN_CMPNGTSS,
23307 IX86_BUILTIN_CMPNGESS,
23308 IX86_BUILTIN_CMPORDSS,
23309 IX86_BUILTIN_CMPUNORDSS,
23311 IX86_BUILTIN_COMIEQSS,
23312 IX86_BUILTIN_COMILTSS,
23313 IX86_BUILTIN_COMILESS,
23314 IX86_BUILTIN_COMIGTSS,
23315 IX86_BUILTIN_COMIGESS,
23316 IX86_BUILTIN_COMINEQSS,
23317 IX86_BUILTIN_UCOMIEQSS,
23318 IX86_BUILTIN_UCOMILTSS,
23319 IX86_BUILTIN_UCOMILESS,
23320 IX86_BUILTIN_UCOMIGTSS,
23321 IX86_BUILTIN_UCOMIGESS,
23322 IX86_BUILTIN_UCOMINEQSS,
23324 IX86_BUILTIN_CVTPI2PS,
23325 IX86_BUILTIN_CVTPS2PI,
23326 IX86_BUILTIN_CVTSI2SS,
23327 IX86_BUILTIN_CVTSI642SS,
23328 IX86_BUILTIN_CVTSS2SI,
23329 IX86_BUILTIN_CVTSS2SI64,
23330 IX86_BUILTIN_CVTTPS2PI,
23331 IX86_BUILTIN_CVTTSS2SI,
23332 IX86_BUILTIN_CVTTSS2SI64,
23334 IX86_BUILTIN_MAXPS,
23335 IX86_BUILTIN_MAXSS,
23336 IX86_BUILTIN_MINPS,
23337 IX86_BUILTIN_MINSS,
23339 IX86_BUILTIN_LOADUPS,
23340 IX86_BUILTIN_STOREUPS,
23341 IX86_BUILTIN_MOVSS,
23343 IX86_BUILTIN_MOVHLPS,
23344 IX86_BUILTIN_MOVLHPS,
23345 IX86_BUILTIN_LOADHPS,
23346 IX86_BUILTIN_LOADLPS,
23347 IX86_BUILTIN_STOREHPS,
23348 IX86_BUILTIN_STORELPS,
23350 IX86_BUILTIN_MASKMOVQ,
23351 IX86_BUILTIN_MOVMSKPS,
23352 IX86_BUILTIN_PMOVMSKB,
23354 IX86_BUILTIN_MOVNTPS,
23355 IX86_BUILTIN_MOVNTQ,
23357 IX86_BUILTIN_LOADDQU,
23358 IX86_BUILTIN_STOREDQU,
23360 IX86_BUILTIN_PACKSSWB,
23361 IX86_BUILTIN_PACKSSDW,
23362 IX86_BUILTIN_PACKUSWB,
23364 IX86_BUILTIN_PADDB,
23365 IX86_BUILTIN_PADDW,
23366 IX86_BUILTIN_PADDD,
23367 IX86_BUILTIN_PADDQ,
23368 IX86_BUILTIN_PADDSB,
23369 IX86_BUILTIN_PADDSW,
23370 IX86_BUILTIN_PADDUSB,
23371 IX86_BUILTIN_PADDUSW,
23372 IX86_BUILTIN_PSUBB,
23373 IX86_BUILTIN_PSUBW,
23374 IX86_BUILTIN_PSUBD,
23375 IX86_BUILTIN_PSUBQ,
23376 IX86_BUILTIN_PSUBSB,
23377 IX86_BUILTIN_PSUBSW,
23378 IX86_BUILTIN_PSUBUSB,
23379 IX86_BUILTIN_PSUBUSW,
23382 IX86_BUILTIN_PANDN,
23386 IX86_BUILTIN_PAVGB,
23387 IX86_BUILTIN_PAVGW,
23389 IX86_BUILTIN_PCMPEQB,
23390 IX86_BUILTIN_PCMPEQW,
23391 IX86_BUILTIN_PCMPEQD,
23392 IX86_BUILTIN_PCMPGTB,
23393 IX86_BUILTIN_PCMPGTW,
23394 IX86_BUILTIN_PCMPGTD,
23396 IX86_BUILTIN_PMADDWD,
23398 IX86_BUILTIN_PMAXSW,
23399 IX86_BUILTIN_PMAXUB,
23400 IX86_BUILTIN_PMINSW,
23401 IX86_BUILTIN_PMINUB,
23403 IX86_BUILTIN_PMULHUW,
23404 IX86_BUILTIN_PMULHW,
23405 IX86_BUILTIN_PMULLW,
23407 IX86_BUILTIN_PSADBW,
23408 IX86_BUILTIN_PSHUFW,
23410 IX86_BUILTIN_PSLLW,
23411 IX86_BUILTIN_PSLLD,
23412 IX86_BUILTIN_PSLLQ,
23413 IX86_BUILTIN_PSRAW,
23414 IX86_BUILTIN_PSRAD,
23415 IX86_BUILTIN_PSRLW,
23416 IX86_BUILTIN_PSRLD,
23417 IX86_BUILTIN_PSRLQ,
23418 IX86_BUILTIN_PSLLWI,
23419 IX86_BUILTIN_PSLLDI,
23420 IX86_BUILTIN_PSLLQI,
23421 IX86_BUILTIN_PSRAWI,
23422 IX86_BUILTIN_PSRADI,
23423 IX86_BUILTIN_PSRLWI,
23424 IX86_BUILTIN_PSRLDI,
23425 IX86_BUILTIN_PSRLQI,
23427 IX86_BUILTIN_PUNPCKHBW,
23428 IX86_BUILTIN_PUNPCKHWD,
23429 IX86_BUILTIN_PUNPCKHDQ,
23430 IX86_BUILTIN_PUNPCKLBW,
23431 IX86_BUILTIN_PUNPCKLWD,
23432 IX86_BUILTIN_PUNPCKLDQ,
23434 IX86_BUILTIN_SHUFPS,
23436 IX86_BUILTIN_RCPPS,
23437 IX86_BUILTIN_RCPSS,
23438 IX86_BUILTIN_RSQRTPS,
23439 IX86_BUILTIN_RSQRTPS_NR,
23440 IX86_BUILTIN_RSQRTSS,
23441 IX86_BUILTIN_RSQRTF,
23442 IX86_BUILTIN_SQRTPS,
23443 IX86_BUILTIN_SQRTPS_NR,
23444 IX86_BUILTIN_SQRTSS,
23446 IX86_BUILTIN_UNPCKHPS,
23447 IX86_BUILTIN_UNPCKLPS,
23449 IX86_BUILTIN_ANDPS,
23450 IX86_BUILTIN_ANDNPS,
23452 IX86_BUILTIN_XORPS,
23455 IX86_BUILTIN_LDMXCSR,
23456 IX86_BUILTIN_STMXCSR,
23457 IX86_BUILTIN_SFENCE,
23459 /* 3DNow! Original */
23460 IX86_BUILTIN_FEMMS,
23461 IX86_BUILTIN_PAVGUSB,
23462 IX86_BUILTIN_PF2ID,
23463 IX86_BUILTIN_PFACC,
23464 IX86_BUILTIN_PFADD,
23465 IX86_BUILTIN_PFCMPEQ,
23466 IX86_BUILTIN_PFCMPGE,
23467 IX86_BUILTIN_PFCMPGT,
23468 IX86_BUILTIN_PFMAX,
23469 IX86_BUILTIN_PFMIN,
23470 IX86_BUILTIN_PFMUL,
23471 IX86_BUILTIN_PFRCP,
23472 IX86_BUILTIN_PFRCPIT1,
23473 IX86_BUILTIN_PFRCPIT2,
23474 IX86_BUILTIN_PFRSQIT1,
23475 IX86_BUILTIN_PFRSQRT,
23476 IX86_BUILTIN_PFSUB,
23477 IX86_BUILTIN_PFSUBR,
23478 IX86_BUILTIN_PI2FD,
23479 IX86_BUILTIN_PMULHRW,
23481 /* 3DNow! Athlon Extensions */
23482 IX86_BUILTIN_PF2IW,
23483 IX86_BUILTIN_PFNACC,
23484 IX86_BUILTIN_PFPNACC,
23485 IX86_BUILTIN_PI2FW,
23486 IX86_BUILTIN_PSWAPDSI,
23487 IX86_BUILTIN_PSWAPDSF,
23490 IX86_BUILTIN_ADDPD,
23491 IX86_BUILTIN_ADDSD,
23492 IX86_BUILTIN_DIVPD,
23493 IX86_BUILTIN_DIVSD,
23494 IX86_BUILTIN_MULPD,
23495 IX86_BUILTIN_MULSD,
23496 IX86_BUILTIN_SUBPD,
23497 IX86_BUILTIN_SUBSD,
23499 IX86_BUILTIN_CMPEQPD,
23500 IX86_BUILTIN_CMPLTPD,
23501 IX86_BUILTIN_CMPLEPD,
23502 IX86_BUILTIN_CMPGTPD,
23503 IX86_BUILTIN_CMPGEPD,
23504 IX86_BUILTIN_CMPNEQPD,
23505 IX86_BUILTIN_CMPNLTPD,
23506 IX86_BUILTIN_CMPNLEPD,
23507 IX86_BUILTIN_CMPNGTPD,
23508 IX86_BUILTIN_CMPNGEPD,
23509 IX86_BUILTIN_CMPORDPD,
23510 IX86_BUILTIN_CMPUNORDPD,
23511 IX86_BUILTIN_CMPEQSD,
23512 IX86_BUILTIN_CMPLTSD,
23513 IX86_BUILTIN_CMPLESD,
23514 IX86_BUILTIN_CMPNEQSD,
23515 IX86_BUILTIN_CMPNLTSD,
23516 IX86_BUILTIN_CMPNLESD,
23517 IX86_BUILTIN_CMPORDSD,
23518 IX86_BUILTIN_CMPUNORDSD,
23520 IX86_BUILTIN_COMIEQSD,
23521 IX86_BUILTIN_COMILTSD,
23522 IX86_BUILTIN_COMILESD,
23523 IX86_BUILTIN_COMIGTSD,
23524 IX86_BUILTIN_COMIGESD,
23525 IX86_BUILTIN_COMINEQSD,
23526 IX86_BUILTIN_UCOMIEQSD,
23527 IX86_BUILTIN_UCOMILTSD,
23528 IX86_BUILTIN_UCOMILESD,
23529 IX86_BUILTIN_UCOMIGTSD,
23530 IX86_BUILTIN_UCOMIGESD,
23531 IX86_BUILTIN_UCOMINEQSD,
23533 IX86_BUILTIN_MAXPD,
23534 IX86_BUILTIN_MAXSD,
23535 IX86_BUILTIN_MINPD,
23536 IX86_BUILTIN_MINSD,
23538 IX86_BUILTIN_ANDPD,
23539 IX86_BUILTIN_ANDNPD,
23541 IX86_BUILTIN_XORPD,
23543 IX86_BUILTIN_SQRTPD,
23544 IX86_BUILTIN_SQRTSD,
23546 IX86_BUILTIN_UNPCKHPD,
23547 IX86_BUILTIN_UNPCKLPD,
23549 IX86_BUILTIN_SHUFPD,
23551 IX86_BUILTIN_LOADUPD,
23552 IX86_BUILTIN_STOREUPD,
23553 IX86_BUILTIN_MOVSD,
23555 IX86_BUILTIN_LOADHPD,
23556 IX86_BUILTIN_LOADLPD,
23558 IX86_BUILTIN_CVTDQ2PD,
23559 IX86_BUILTIN_CVTDQ2PS,
23561 IX86_BUILTIN_CVTPD2DQ,
23562 IX86_BUILTIN_CVTPD2PI,
23563 IX86_BUILTIN_CVTPD2PS,
23564 IX86_BUILTIN_CVTTPD2DQ,
23565 IX86_BUILTIN_CVTTPD2PI,
23567 IX86_BUILTIN_CVTPI2PD,
23568 IX86_BUILTIN_CVTSI2SD,
23569 IX86_BUILTIN_CVTSI642SD,
23571 IX86_BUILTIN_CVTSD2SI,
23572 IX86_BUILTIN_CVTSD2SI64,
23573 IX86_BUILTIN_CVTSD2SS,
23574 IX86_BUILTIN_CVTSS2SD,
23575 IX86_BUILTIN_CVTTSD2SI,
23576 IX86_BUILTIN_CVTTSD2SI64,
23578 IX86_BUILTIN_CVTPS2DQ,
23579 IX86_BUILTIN_CVTPS2PD,
23580 IX86_BUILTIN_CVTTPS2DQ,
23582 IX86_BUILTIN_MOVNTI,
23583 IX86_BUILTIN_MOVNTPD,
23584 IX86_BUILTIN_MOVNTDQ,
23586 IX86_BUILTIN_MOVQ128,
23589 IX86_BUILTIN_MASKMOVDQU,
23590 IX86_BUILTIN_MOVMSKPD,
23591 IX86_BUILTIN_PMOVMSKB128,
23593 IX86_BUILTIN_PACKSSWB128,
23594 IX86_BUILTIN_PACKSSDW128,
23595 IX86_BUILTIN_PACKUSWB128,
23597 IX86_BUILTIN_PADDB128,
23598 IX86_BUILTIN_PADDW128,
23599 IX86_BUILTIN_PADDD128,
23600 IX86_BUILTIN_PADDQ128,
23601 IX86_BUILTIN_PADDSB128,
23602 IX86_BUILTIN_PADDSW128,
23603 IX86_BUILTIN_PADDUSB128,
23604 IX86_BUILTIN_PADDUSW128,
23605 IX86_BUILTIN_PSUBB128,
23606 IX86_BUILTIN_PSUBW128,
23607 IX86_BUILTIN_PSUBD128,
23608 IX86_BUILTIN_PSUBQ128,
23609 IX86_BUILTIN_PSUBSB128,
23610 IX86_BUILTIN_PSUBSW128,
23611 IX86_BUILTIN_PSUBUSB128,
23612 IX86_BUILTIN_PSUBUSW128,
23614 IX86_BUILTIN_PAND128,
23615 IX86_BUILTIN_PANDN128,
23616 IX86_BUILTIN_POR128,
23617 IX86_BUILTIN_PXOR128,
23619 IX86_BUILTIN_PAVGB128,
23620 IX86_BUILTIN_PAVGW128,
23622 IX86_BUILTIN_PCMPEQB128,
23623 IX86_BUILTIN_PCMPEQW128,
23624 IX86_BUILTIN_PCMPEQD128,
23625 IX86_BUILTIN_PCMPGTB128,
23626 IX86_BUILTIN_PCMPGTW128,
23627 IX86_BUILTIN_PCMPGTD128,
23629 IX86_BUILTIN_PMADDWD128,
23631 IX86_BUILTIN_PMAXSW128,
23632 IX86_BUILTIN_PMAXUB128,
23633 IX86_BUILTIN_PMINSW128,
23634 IX86_BUILTIN_PMINUB128,
23636 IX86_BUILTIN_PMULUDQ,
23637 IX86_BUILTIN_PMULUDQ128,
23638 IX86_BUILTIN_PMULHUW128,
23639 IX86_BUILTIN_PMULHW128,
23640 IX86_BUILTIN_PMULLW128,
23642 IX86_BUILTIN_PSADBW128,
23643 IX86_BUILTIN_PSHUFHW,
23644 IX86_BUILTIN_PSHUFLW,
23645 IX86_BUILTIN_PSHUFD,
23647 IX86_BUILTIN_PSLLDQI128,
23648 IX86_BUILTIN_PSLLWI128,
23649 IX86_BUILTIN_PSLLDI128,
23650 IX86_BUILTIN_PSLLQI128,
23651 IX86_BUILTIN_PSRAWI128,
23652 IX86_BUILTIN_PSRADI128,
23653 IX86_BUILTIN_PSRLDQI128,
23654 IX86_BUILTIN_PSRLWI128,
23655 IX86_BUILTIN_PSRLDI128,
23656 IX86_BUILTIN_PSRLQI128,
23658 IX86_BUILTIN_PSLLDQ128,
23659 IX86_BUILTIN_PSLLW128,
23660 IX86_BUILTIN_PSLLD128,
23661 IX86_BUILTIN_PSLLQ128,
23662 IX86_BUILTIN_PSRAW128,
23663 IX86_BUILTIN_PSRAD128,
23664 IX86_BUILTIN_PSRLW128,
23665 IX86_BUILTIN_PSRLD128,
23666 IX86_BUILTIN_PSRLQ128,
23668 IX86_BUILTIN_PUNPCKHBW128,
23669 IX86_BUILTIN_PUNPCKHWD128,
23670 IX86_BUILTIN_PUNPCKHDQ128,
23671 IX86_BUILTIN_PUNPCKHQDQ128,
23672 IX86_BUILTIN_PUNPCKLBW128,
23673 IX86_BUILTIN_PUNPCKLWD128,
23674 IX86_BUILTIN_PUNPCKLDQ128,
23675 IX86_BUILTIN_PUNPCKLQDQ128,
23677 IX86_BUILTIN_CLFLUSH,
23678 IX86_BUILTIN_MFENCE,
23679 IX86_BUILTIN_LFENCE,
23681 IX86_BUILTIN_BSRSI,
23682 IX86_BUILTIN_BSRDI,
23683 IX86_BUILTIN_RDPMC,
23684 IX86_BUILTIN_RDTSC,
23685 IX86_BUILTIN_RDTSCP,
23686 IX86_BUILTIN_ROLQI,
23687 IX86_BUILTIN_ROLHI,
23688 IX86_BUILTIN_RORQI,
23689 IX86_BUILTIN_RORHI,
23692 IX86_BUILTIN_ADDSUBPS,
23693 IX86_BUILTIN_HADDPS,
23694 IX86_BUILTIN_HSUBPS,
23695 IX86_BUILTIN_MOVSHDUP,
23696 IX86_BUILTIN_MOVSLDUP,
23697 IX86_BUILTIN_ADDSUBPD,
23698 IX86_BUILTIN_HADDPD,
23699 IX86_BUILTIN_HSUBPD,
23700 IX86_BUILTIN_LDDQU,
23702 IX86_BUILTIN_MONITOR,
23703 IX86_BUILTIN_MWAIT,
23706 IX86_BUILTIN_PHADDW,
23707 IX86_BUILTIN_PHADDD,
23708 IX86_BUILTIN_PHADDSW,
23709 IX86_BUILTIN_PHSUBW,
23710 IX86_BUILTIN_PHSUBD,
23711 IX86_BUILTIN_PHSUBSW,
23712 IX86_BUILTIN_PMADDUBSW,
23713 IX86_BUILTIN_PMULHRSW,
23714 IX86_BUILTIN_PSHUFB,
23715 IX86_BUILTIN_PSIGNB,
23716 IX86_BUILTIN_PSIGNW,
23717 IX86_BUILTIN_PSIGND,
23718 IX86_BUILTIN_PALIGNR,
23719 IX86_BUILTIN_PABSB,
23720 IX86_BUILTIN_PABSW,
23721 IX86_BUILTIN_PABSD,
23723 IX86_BUILTIN_PHADDW128,
23724 IX86_BUILTIN_PHADDD128,
23725 IX86_BUILTIN_PHADDSW128,
23726 IX86_BUILTIN_PHSUBW128,
23727 IX86_BUILTIN_PHSUBD128,
23728 IX86_BUILTIN_PHSUBSW128,
23729 IX86_BUILTIN_PMADDUBSW128,
23730 IX86_BUILTIN_PMULHRSW128,
23731 IX86_BUILTIN_PSHUFB128,
23732 IX86_BUILTIN_PSIGNB128,
23733 IX86_BUILTIN_PSIGNW128,
23734 IX86_BUILTIN_PSIGND128,
23735 IX86_BUILTIN_PALIGNR128,
23736 IX86_BUILTIN_PABSB128,
23737 IX86_BUILTIN_PABSW128,
23738 IX86_BUILTIN_PABSD128,
23740 /* AMDFAM10 - SSE4A New Instructions. */
23741 IX86_BUILTIN_MOVNTSD,
23742 IX86_BUILTIN_MOVNTSS,
23743 IX86_BUILTIN_EXTRQI,
23744 IX86_BUILTIN_EXTRQ,
23745 IX86_BUILTIN_INSERTQI,
23746 IX86_BUILTIN_INSERTQ,
23749 IX86_BUILTIN_BLENDPD,
23750 IX86_BUILTIN_BLENDPS,
23751 IX86_BUILTIN_BLENDVPD,
23752 IX86_BUILTIN_BLENDVPS,
23753 IX86_BUILTIN_PBLENDVB128,
23754 IX86_BUILTIN_PBLENDW128,
23759 IX86_BUILTIN_INSERTPS128,
23761 IX86_BUILTIN_MOVNTDQA,
23762 IX86_BUILTIN_MPSADBW128,
23763 IX86_BUILTIN_PACKUSDW128,
23764 IX86_BUILTIN_PCMPEQQ,
23765 IX86_BUILTIN_PHMINPOSUW128,
23767 IX86_BUILTIN_PMAXSB128,
23768 IX86_BUILTIN_PMAXSD128,
23769 IX86_BUILTIN_PMAXUD128,
23770 IX86_BUILTIN_PMAXUW128,
23772 IX86_BUILTIN_PMINSB128,
23773 IX86_BUILTIN_PMINSD128,
23774 IX86_BUILTIN_PMINUD128,
23775 IX86_BUILTIN_PMINUW128,
23777 IX86_BUILTIN_PMOVSXBW128,
23778 IX86_BUILTIN_PMOVSXBD128,
23779 IX86_BUILTIN_PMOVSXBQ128,
23780 IX86_BUILTIN_PMOVSXWD128,
23781 IX86_BUILTIN_PMOVSXWQ128,
23782 IX86_BUILTIN_PMOVSXDQ128,
23784 IX86_BUILTIN_PMOVZXBW128,
23785 IX86_BUILTIN_PMOVZXBD128,
23786 IX86_BUILTIN_PMOVZXBQ128,
23787 IX86_BUILTIN_PMOVZXWD128,
23788 IX86_BUILTIN_PMOVZXWQ128,
23789 IX86_BUILTIN_PMOVZXDQ128,
23791 IX86_BUILTIN_PMULDQ128,
23792 IX86_BUILTIN_PMULLD128,
23794 IX86_BUILTIN_ROUNDPD,
23795 IX86_BUILTIN_ROUNDPS,
23796 IX86_BUILTIN_ROUNDSD,
23797 IX86_BUILTIN_ROUNDSS,
23799 IX86_BUILTIN_PTESTZ,
23800 IX86_BUILTIN_PTESTC,
23801 IX86_BUILTIN_PTESTNZC,
23803 IX86_BUILTIN_VEC_INIT_V2SI,
23804 IX86_BUILTIN_VEC_INIT_V4HI,
23805 IX86_BUILTIN_VEC_INIT_V8QI,
23806 IX86_BUILTIN_VEC_EXT_V2DF,
23807 IX86_BUILTIN_VEC_EXT_V2DI,
23808 IX86_BUILTIN_VEC_EXT_V4SF,
23809 IX86_BUILTIN_VEC_EXT_V4SI,
23810 IX86_BUILTIN_VEC_EXT_V8HI,
23811 IX86_BUILTIN_VEC_EXT_V2SI,
23812 IX86_BUILTIN_VEC_EXT_V4HI,
23813 IX86_BUILTIN_VEC_EXT_V16QI,
23814 IX86_BUILTIN_VEC_SET_V2DI,
23815 IX86_BUILTIN_VEC_SET_V4SF,
23816 IX86_BUILTIN_VEC_SET_V4SI,
23817 IX86_BUILTIN_VEC_SET_V8HI,
23818 IX86_BUILTIN_VEC_SET_V4HI,
23819 IX86_BUILTIN_VEC_SET_V16QI,
23821 IX86_BUILTIN_VEC_PACK_SFIX,
23824 IX86_BUILTIN_CRC32QI,
23825 IX86_BUILTIN_CRC32HI,
23826 IX86_BUILTIN_CRC32SI,
23827 IX86_BUILTIN_CRC32DI,
23829 IX86_BUILTIN_PCMPESTRI128,
23830 IX86_BUILTIN_PCMPESTRM128,
23831 IX86_BUILTIN_PCMPESTRA128,
23832 IX86_BUILTIN_PCMPESTRC128,
23833 IX86_BUILTIN_PCMPESTRO128,
23834 IX86_BUILTIN_PCMPESTRS128,
23835 IX86_BUILTIN_PCMPESTRZ128,
23836 IX86_BUILTIN_PCMPISTRI128,
23837 IX86_BUILTIN_PCMPISTRM128,
23838 IX86_BUILTIN_PCMPISTRA128,
23839 IX86_BUILTIN_PCMPISTRC128,
23840 IX86_BUILTIN_PCMPISTRO128,
23841 IX86_BUILTIN_PCMPISTRS128,
23842 IX86_BUILTIN_PCMPISTRZ128,
23844 IX86_BUILTIN_PCMPGTQ,
23846 /* AES instructions */
23847 IX86_BUILTIN_AESENC128,
23848 IX86_BUILTIN_AESENCLAST128,
23849 IX86_BUILTIN_AESDEC128,
23850 IX86_BUILTIN_AESDECLAST128,
23851 IX86_BUILTIN_AESIMC128,
23852 IX86_BUILTIN_AESKEYGENASSIST128,
23854 /* PCLMUL instruction */
23855 IX86_BUILTIN_PCLMULQDQ128,
23858 IX86_BUILTIN_ADDPD256,
23859 IX86_BUILTIN_ADDPS256,
23860 IX86_BUILTIN_ADDSUBPD256,
23861 IX86_BUILTIN_ADDSUBPS256,
23862 IX86_BUILTIN_ANDPD256,
23863 IX86_BUILTIN_ANDPS256,
23864 IX86_BUILTIN_ANDNPD256,
23865 IX86_BUILTIN_ANDNPS256,
23866 IX86_BUILTIN_BLENDPD256,
23867 IX86_BUILTIN_BLENDPS256,
23868 IX86_BUILTIN_BLENDVPD256,
23869 IX86_BUILTIN_BLENDVPS256,
23870 IX86_BUILTIN_DIVPD256,
23871 IX86_BUILTIN_DIVPS256,
23872 IX86_BUILTIN_DPPS256,
23873 IX86_BUILTIN_HADDPD256,
23874 IX86_BUILTIN_HADDPS256,
23875 IX86_BUILTIN_HSUBPD256,
23876 IX86_BUILTIN_HSUBPS256,
23877 IX86_BUILTIN_MAXPD256,
23878 IX86_BUILTIN_MAXPS256,
23879 IX86_BUILTIN_MINPD256,
23880 IX86_BUILTIN_MINPS256,
23881 IX86_BUILTIN_MULPD256,
23882 IX86_BUILTIN_MULPS256,
23883 IX86_BUILTIN_ORPD256,
23884 IX86_BUILTIN_ORPS256,
23885 IX86_BUILTIN_SHUFPD256,
23886 IX86_BUILTIN_SHUFPS256,
23887 IX86_BUILTIN_SUBPD256,
23888 IX86_BUILTIN_SUBPS256,
23889 IX86_BUILTIN_XORPD256,
23890 IX86_BUILTIN_XORPS256,
23891 IX86_BUILTIN_CMPSD,
23892 IX86_BUILTIN_CMPSS,
23893 IX86_BUILTIN_CMPPD,
23894 IX86_BUILTIN_CMPPS,
23895 IX86_BUILTIN_CMPPD256,
23896 IX86_BUILTIN_CMPPS256,
23897 IX86_BUILTIN_CVTDQ2PD256,
23898 IX86_BUILTIN_CVTDQ2PS256,
23899 IX86_BUILTIN_CVTPD2PS256,
23900 IX86_BUILTIN_CVTPS2DQ256,
23901 IX86_BUILTIN_CVTPS2PD256,
23902 IX86_BUILTIN_CVTTPD2DQ256,
23903 IX86_BUILTIN_CVTPD2DQ256,
23904 IX86_BUILTIN_CVTTPS2DQ256,
23905 IX86_BUILTIN_EXTRACTF128PD256,
23906 IX86_BUILTIN_EXTRACTF128PS256,
23907 IX86_BUILTIN_EXTRACTF128SI256,
23908 IX86_BUILTIN_VZEROALL,
23909 IX86_BUILTIN_VZEROUPPER,
23910 IX86_BUILTIN_VPERMILVARPD,
23911 IX86_BUILTIN_VPERMILVARPS,
23912 IX86_BUILTIN_VPERMILVARPD256,
23913 IX86_BUILTIN_VPERMILVARPS256,
23914 IX86_BUILTIN_VPERMILPD,
23915 IX86_BUILTIN_VPERMILPS,
23916 IX86_BUILTIN_VPERMILPD256,
23917 IX86_BUILTIN_VPERMILPS256,
23918 IX86_BUILTIN_VPERMIL2PD,
23919 IX86_BUILTIN_VPERMIL2PS,
23920 IX86_BUILTIN_VPERMIL2PD256,
23921 IX86_BUILTIN_VPERMIL2PS256,
23922 IX86_BUILTIN_VPERM2F128PD256,
23923 IX86_BUILTIN_VPERM2F128PS256,
23924 IX86_BUILTIN_VPERM2F128SI256,
23925 IX86_BUILTIN_VBROADCASTSS,
23926 IX86_BUILTIN_VBROADCASTSD256,
23927 IX86_BUILTIN_VBROADCASTSS256,
23928 IX86_BUILTIN_VBROADCASTPD256,
23929 IX86_BUILTIN_VBROADCASTPS256,
23930 IX86_BUILTIN_VINSERTF128PD256,
23931 IX86_BUILTIN_VINSERTF128PS256,
23932 IX86_BUILTIN_VINSERTF128SI256,
23933 IX86_BUILTIN_LOADUPD256,
23934 IX86_BUILTIN_LOADUPS256,
23935 IX86_BUILTIN_STOREUPD256,
23936 IX86_BUILTIN_STOREUPS256,
23937 IX86_BUILTIN_LDDQU256,
23938 IX86_BUILTIN_MOVNTDQ256,
23939 IX86_BUILTIN_MOVNTPD256,
23940 IX86_BUILTIN_MOVNTPS256,
23941 IX86_BUILTIN_LOADDQU256,
23942 IX86_BUILTIN_STOREDQU256,
23943 IX86_BUILTIN_MASKLOADPD,
23944 IX86_BUILTIN_MASKLOADPS,
23945 IX86_BUILTIN_MASKSTOREPD,
23946 IX86_BUILTIN_MASKSTOREPS,
23947 IX86_BUILTIN_MASKLOADPD256,
23948 IX86_BUILTIN_MASKLOADPS256,
23949 IX86_BUILTIN_MASKSTOREPD256,
23950 IX86_BUILTIN_MASKSTOREPS256,
23951 IX86_BUILTIN_MOVSHDUP256,
23952 IX86_BUILTIN_MOVSLDUP256,
23953 IX86_BUILTIN_MOVDDUP256,
23955 IX86_BUILTIN_SQRTPD256,
23956 IX86_BUILTIN_SQRTPS256,
23957 IX86_BUILTIN_SQRTPS_NR256,
23958 IX86_BUILTIN_RSQRTPS256,
23959 IX86_BUILTIN_RSQRTPS_NR256,
23961 IX86_BUILTIN_RCPPS256,
23963 IX86_BUILTIN_ROUNDPD256,
23964 IX86_BUILTIN_ROUNDPS256,
23966 IX86_BUILTIN_UNPCKHPD256,
23967 IX86_BUILTIN_UNPCKLPD256,
23968 IX86_BUILTIN_UNPCKHPS256,
23969 IX86_BUILTIN_UNPCKLPS256,
23971 IX86_BUILTIN_SI256_SI,
23972 IX86_BUILTIN_PS256_PS,
23973 IX86_BUILTIN_PD256_PD,
23974 IX86_BUILTIN_SI_SI256,
23975 IX86_BUILTIN_PS_PS256,
23976 IX86_BUILTIN_PD_PD256,
23978 IX86_BUILTIN_VTESTZPD,
23979 IX86_BUILTIN_VTESTCPD,
23980 IX86_BUILTIN_VTESTNZCPD,
23981 IX86_BUILTIN_VTESTZPS,
23982 IX86_BUILTIN_VTESTCPS,
23983 IX86_BUILTIN_VTESTNZCPS,
23984 IX86_BUILTIN_VTESTZPD256,
23985 IX86_BUILTIN_VTESTCPD256,
23986 IX86_BUILTIN_VTESTNZCPD256,
23987 IX86_BUILTIN_VTESTZPS256,
23988 IX86_BUILTIN_VTESTCPS256,
23989 IX86_BUILTIN_VTESTNZCPS256,
23990 IX86_BUILTIN_PTESTZ256,
23991 IX86_BUILTIN_PTESTC256,
23992 IX86_BUILTIN_PTESTNZC256,
23994 IX86_BUILTIN_MOVMSKPD256,
23995 IX86_BUILTIN_MOVMSKPS256,
23997 /* TFmode support builtins. */
23999 IX86_BUILTIN_HUGE_VALQ,
24000 IX86_BUILTIN_FABSQ,
24001 IX86_BUILTIN_COPYSIGNQ,
24003 /* Vectorizer support builtins. */
24004 IX86_BUILTIN_CPYSGNPS,
24005 IX86_BUILTIN_CPYSGNPD,
24006 IX86_BUILTIN_CPYSGNPS256,
24007 IX86_BUILTIN_CPYSGNPD256,
24009 IX86_BUILTIN_CVTUDQ2PS,
24011 IX86_BUILTIN_VEC_PERM_V2DF,
24012 IX86_BUILTIN_VEC_PERM_V4SF,
24013 IX86_BUILTIN_VEC_PERM_V2DI,
24014 IX86_BUILTIN_VEC_PERM_V4SI,
24015 IX86_BUILTIN_VEC_PERM_V8HI,
24016 IX86_BUILTIN_VEC_PERM_V16QI,
24017 IX86_BUILTIN_VEC_PERM_V2DI_U,
24018 IX86_BUILTIN_VEC_PERM_V4SI_U,
24019 IX86_BUILTIN_VEC_PERM_V8HI_U,
24020 IX86_BUILTIN_VEC_PERM_V16QI_U,
24021 IX86_BUILTIN_VEC_PERM_V4DF,
24022 IX86_BUILTIN_VEC_PERM_V8SF,
24024 /* FMA4 and XOP instructions. */
24025 IX86_BUILTIN_VFMADDSS,
24026 IX86_BUILTIN_VFMADDSD,
24027 IX86_BUILTIN_VFMADDPS,
24028 IX86_BUILTIN_VFMADDPD,
24029 IX86_BUILTIN_VFMADDPS256,
24030 IX86_BUILTIN_VFMADDPD256,
24031 IX86_BUILTIN_VFMADDSUBPS,
24032 IX86_BUILTIN_VFMADDSUBPD,
24033 IX86_BUILTIN_VFMADDSUBPS256,
24034 IX86_BUILTIN_VFMADDSUBPD256,
24036 IX86_BUILTIN_VPCMOV,
24037 IX86_BUILTIN_VPCMOV_V2DI,
24038 IX86_BUILTIN_VPCMOV_V4SI,
24039 IX86_BUILTIN_VPCMOV_V8HI,
24040 IX86_BUILTIN_VPCMOV_V16QI,
24041 IX86_BUILTIN_VPCMOV_V4SF,
24042 IX86_BUILTIN_VPCMOV_V2DF,
24043 IX86_BUILTIN_VPCMOV256,
24044 IX86_BUILTIN_VPCMOV_V4DI256,
24045 IX86_BUILTIN_VPCMOV_V8SI256,
24046 IX86_BUILTIN_VPCMOV_V16HI256,
24047 IX86_BUILTIN_VPCMOV_V32QI256,
24048 IX86_BUILTIN_VPCMOV_V8SF256,
24049 IX86_BUILTIN_VPCMOV_V4DF256,
24051 IX86_BUILTIN_VPPERM,
24053 IX86_BUILTIN_VPMACSSWW,
24054 IX86_BUILTIN_VPMACSWW,
24055 IX86_BUILTIN_VPMACSSWD,
24056 IX86_BUILTIN_VPMACSWD,
24057 IX86_BUILTIN_VPMACSSDD,
24058 IX86_BUILTIN_VPMACSDD,
24059 IX86_BUILTIN_VPMACSSDQL,
24060 IX86_BUILTIN_VPMACSSDQH,
24061 IX86_BUILTIN_VPMACSDQL,
24062 IX86_BUILTIN_VPMACSDQH,
24063 IX86_BUILTIN_VPMADCSSWD,
24064 IX86_BUILTIN_VPMADCSWD,
24066 IX86_BUILTIN_VPHADDBW,
24067 IX86_BUILTIN_VPHADDBD,
24068 IX86_BUILTIN_VPHADDBQ,
24069 IX86_BUILTIN_VPHADDWD,
24070 IX86_BUILTIN_VPHADDWQ,
24071 IX86_BUILTIN_VPHADDDQ,
24072 IX86_BUILTIN_VPHADDUBW,
24073 IX86_BUILTIN_VPHADDUBD,
24074 IX86_BUILTIN_VPHADDUBQ,
24075 IX86_BUILTIN_VPHADDUWD,
24076 IX86_BUILTIN_VPHADDUWQ,
24077 IX86_BUILTIN_VPHADDUDQ,
24078 IX86_BUILTIN_VPHSUBBW,
24079 IX86_BUILTIN_VPHSUBWD,
24080 IX86_BUILTIN_VPHSUBDQ,
24082 IX86_BUILTIN_VPROTB,
24083 IX86_BUILTIN_VPROTW,
24084 IX86_BUILTIN_VPROTD,
24085 IX86_BUILTIN_VPROTQ,
24086 IX86_BUILTIN_VPROTB_IMM,
24087 IX86_BUILTIN_VPROTW_IMM,
24088 IX86_BUILTIN_VPROTD_IMM,
24089 IX86_BUILTIN_VPROTQ_IMM,
24091 IX86_BUILTIN_VPSHLB,
24092 IX86_BUILTIN_VPSHLW,
24093 IX86_BUILTIN_VPSHLD,
24094 IX86_BUILTIN_VPSHLQ,
24095 IX86_BUILTIN_VPSHAB,
24096 IX86_BUILTIN_VPSHAW,
24097 IX86_BUILTIN_VPSHAD,
24098 IX86_BUILTIN_VPSHAQ,
24100 IX86_BUILTIN_VFRCZSS,
24101 IX86_BUILTIN_VFRCZSD,
24102 IX86_BUILTIN_VFRCZPS,
24103 IX86_BUILTIN_VFRCZPD,
24104 IX86_BUILTIN_VFRCZPS256,
24105 IX86_BUILTIN_VFRCZPD256,
24107 IX86_BUILTIN_VPCOMEQUB,
24108 IX86_BUILTIN_VPCOMNEUB,
24109 IX86_BUILTIN_VPCOMLTUB,
24110 IX86_BUILTIN_VPCOMLEUB,
24111 IX86_BUILTIN_VPCOMGTUB,
24112 IX86_BUILTIN_VPCOMGEUB,
24113 IX86_BUILTIN_VPCOMFALSEUB,
24114 IX86_BUILTIN_VPCOMTRUEUB,
24116 IX86_BUILTIN_VPCOMEQUW,
24117 IX86_BUILTIN_VPCOMNEUW,
24118 IX86_BUILTIN_VPCOMLTUW,
24119 IX86_BUILTIN_VPCOMLEUW,
24120 IX86_BUILTIN_VPCOMGTUW,
24121 IX86_BUILTIN_VPCOMGEUW,
24122 IX86_BUILTIN_VPCOMFALSEUW,
24123 IX86_BUILTIN_VPCOMTRUEUW,
24125 IX86_BUILTIN_VPCOMEQUD,
24126 IX86_BUILTIN_VPCOMNEUD,
24127 IX86_BUILTIN_VPCOMLTUD,
24128 IX86_BUILTIN_VPCOMLEUD,
24129 IX86_BUILTIN_VPCOMGTUD,
24130 IX86_BUILTIN_VPCOMGEUD,
24131 IX86_BUILTIN_VPCOMFALSEUD,
24132 IX86_BUILTIN_VPCOMTRUEUD,
24134 IX86_BUILTIN_VPCOMEQUQ,
24135 IX86_BUILTIN_VPCOMNEUQ,
24136 IX86_BUILTIN_VPCOMLTUQ,
24137 IX86_BUILTIN_VPCOMLEUQ,
24138 IX86_BUILTIN_VPCOMGTUQ,
24139 IX86_BUILTIN_VPCOMGEUQ,
24140 IX86_BUILTIN_VPCOMFALSEUQ,
24141 IX86_BUILTIN_VPCOMTRUEUQ,
24143 IX86_BUILTIN_VPCOMEQB,
24144 IX86_BUILTIN_VPCOMNEB,
24145 IX86_BUILTIN_VPCOMLTB,
24146 IX86_BUILTIN_VPCOMLEB,
24147 IX86_BUILTIN_VPCOMGTB,
24148 IX86_BUILTIN_VPCOMGEB,
24149 IX86_BUILTIN_VPCOMFALSEB,
24150 IX86_BUILTIN_VPCOMTRUEB,
24152 IX86_BUILTIN_VPCOMEQW,
24153 IX86_BUILTIN_VPCOMNEW,
24154 IX86_BUILTIN_VPCOMLTW,
24155 IX86_BUILTIN_VPCOMLEW,
24156 IX86_BUILTIN_VPCOMGTW,
24157 IX86_BUILTIN_VPCOMGEW,
24158 IX86_BUILTIN_VPCOMFALSEW,
24159 IX86_BUILTIN_VPCOMTRUEW,
24161 IX86_BUILTIN_VPCOMEQD,
24162 IX86_BUILTIN_VPCOMNED,
24163 IX86_BUILTIN_VPCOMLTD,
24164 IX86_BUILTIN_VPCOMLED,
24165 IX86_BUILTIN_VPCOMGTD,
24166 IX86_BUILTIN_VPCOMGED,
24167 IX86_BUILTIN_VPCOMFALSED,
24168 IX86_BUILTIN_VPCOMTRUED,
24170 IX86_BUILTIN_VPCOMEQQ,
24171 IX86_BUILTIN_VPCOMNEQ,
24172 IX86_BUILTIN_VPCOMLTQ,
24173 IX86_BUILTIN_VPCOMLEQ,
24174 IX86_BUILTIN_VPCOMGTQ,
24175 IX86_BUILTIN_VPCOMGEQ,
24176 IX86_BUILTIN_VPCOMFALSEQ,
24177 IX86_BUILTIN_VPCOMTRUEQ,
24179 /* LWP instructions. */
24180 IX86_BUILTIN_LLWPCB,
24181 IX86_BUILTIN_SLWPCB,
24182 IX86_BUILTIN_LWPVAL32,
24183 IX86_BUILTIN_LWPVAL64,
24184 IX86_BUILTIN_LWPINS32,
24185 IX86_BUILTIN_LWPINS64,
24189 /* BMI instructions. */
24190 IX86_BUILTIN_BEXTR32,
24191 IX86_BUILTIN_BEXTR64,
24194 /* TBM instructions. */
24195 IX86_BUILTIN_BEXTRI32,
24196 IX86_BUILTIN_BEXTRI64,
24199 /* FSGSBASE instructions. */
24200 IX86_BUILTIN_RDFSBASE32,
24201 IX86_BUILTIN_RDFSBASE64,
24202 IX86_BUILTIN_RDGSBASE32,
24203 IX86_BUILTIN_RDGSBASE64,
24204 IX86_BUILTIN_WRFSBASE32,
24205 IX86_BUILTIN_WRFSBASE64,
24206 IX86_BUILTIN_WRGSBASE32,
24207 IX86_BUILTIN_WRGSBASE64,
24209 /* RDRND instructions. */
24210 IX86_BUILTIN_RDRAND16_STEP,
24211 IX86_BUILTIN_RDRAND32_STEP,
24212 IX86_BUILTIN_RDRAND64_STEP,
24214 /* F16C instructions. */
24215 IX86_BUILTIN_CVTPH2PS,
24216 IX86_BUILTIN_CVTPH2PS256,
24217 IX86_BUILTIN_CVTPS2PH,
24218 IX86_BUILTIN_CVTPS2PH256,
24223 /* Table for the ix86 builtin decls. */
24224 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
24226 /* Table of all of the builtin functions that are possible with different ISA's
24227 but are waiting to be built until a function is declared to use that
24229 struct builtin_isa {
24230 const char *name; /* function name */
24231 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
24232 int isa; /* isa_flags this builtin is defined for */
24233 bool const_p; /* true if the declaration is constant */
24234 bool set_and_not_built_p;
24237 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
24240 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24241 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24242 function decl in the ix86_builtins array. Returns the function decl or
24243 NULL_TREE, if the builtin was not added.
24245 If the front end has a special hook for builtin functions, delay adding
24246 builtin functions that aren't in the current ISA until the ISA is changed
24247 with function specific optimization. Doing so, can save about 300K for the
24248 default compiler. When the builtin is expanded, check at that time whether
24251 If the front end doesn't have a special hook, record all builtins, even if
24252 it isn't an instruction set in the current ISA in case the user uses
24253 function specific options for a different ISA, so that we don't get scope
24254 errors if a builtin is added in the middle of a function scope. */
24257 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24258 enum ix86_builtins code)
24260 tree decl = NULL_TREE;
24262 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24264 ix86_builtins_isa[(int) code].isa = mask;
24266 mask &= ~OPTION_MASK_ISA_64BIT;
24268 || (mask & ix86_isa_flags) != 0
24269 || (lang_hooks.builtin_function
24270 == lang_hooks.builtin_function_ext_scope))
24273 tree type = ix86_get_builtin_func_type (tcode);
24274 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24276 ix86_builtins[(int) code] = decl;
24277 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24281 ix86_builtins[(int) code] = NULL_TREE;
24282 ix86_builtins_isa[(int) code].tcode = tcode;
24283 ix86_builtins_isa[(int) code].name = name;
24284 ix86_builtins_isa[(int) code].const_p = false;
24285 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24292 /* Like def_builtin, but also marks the function decl "const". */
24295 def_builtin_const (int mask, const char *name,
24296 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24298 tree decl = def_builtin (mask, name, tcode, code);
24300 TREE_READONLY (decl) = 1;
24302 ix86_builtins_isa[(int) code].const_p = true;
24307 /* Add any new builtin functions for a given ISA that may not have been
24308 declared. This saves a bit of space compared to adding all of the
24309 declarations to the tree, even if we didn't use them. */
24312 ix86_add_new_builtins (int isa)
24316 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24318 if ((ix86_builtins_isa[i].isa & isa) != 0
24319 && ix86_builtins_isa[i].set_and_not_built_p)
24323 /* Don't define the builtin again. */
24324 ix86_builtins_isa[i].set_and_not_built_p = false;
24326 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24327 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24328 type, i, BUILT_IN_MD, NULL,
24331 ix86_builtins[i] = decl;
24332 if (ix86_builtins_isa[i].const_p)
24333 TREE_READONLY (decl) = 1;
24338 /* Bits for builtin_description.flag. */
24340 /* Set when we don't support the comparison natively, and should
24341 swap_comparison in order to support it. */
24342 #define BUILTIN_DESC_SWAP_OPERANDS 1
24344 struct builtin_description
24346 const unsigned int mask;
24347 const enum insn_code icode;
24348 const char *const name;
24349 const enum ix86_builtins code;
24350 const enum rtx_code comparison;
24354 static const struct builtin_description bdesc_comi[] =
24356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24382 static const struct builtin_description bdesc_pcmpestr[] =
24385 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24386 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24387 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24388 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24389 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24390 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24391 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24394 static const struct builtin_description bdesc_pcmpistr[] =
24397 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24398 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24399 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24400 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24401 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24402 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24403 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24406 /* Special builtins with variable number of arguments. */
24407 static const struct builtin_description bdesc_special_args[] =
24409 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24410 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24416 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24428 /* SSE or 3DNow!A */
24429 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24430 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24447 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24450 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24453 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24454 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24460 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24461 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24462 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
24479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
24480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
24481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
24482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
24483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
24484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
24485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
24487 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24488 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24489 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24490 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24491 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24492 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24495 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24496 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24497 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24498 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24499 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24500 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24501 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24502 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24505 /* Builtins with variable number of arguments. */
24506 static const struct builtin_description bdesc_args[] =
24508 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24509 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24510 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24511 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24512 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24513 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24514 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24530 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24534 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24536 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24539 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24541 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24542 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24543 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24544 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24545 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24546 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24548 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24549 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24550 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24551 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24552 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24553 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24555 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24556 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24557 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24559 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24561 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24562 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24563 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24564 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24565 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24566 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24568 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24569 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24570 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24571 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24572 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24573 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24575 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24576 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24577 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24578 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24581 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24582 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24583 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24584 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24586 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24587 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24588 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24589 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24590 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24591 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24592 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24593 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24594 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24595 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24596 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24597 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24598 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24599 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24600 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24603 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24604 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24605 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24606 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24607 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24608 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24613 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24615 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24619 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24622 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24626 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24627 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24628 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24658 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24659 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24663 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24665 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24666 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24668 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24672 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24673 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24674 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24678 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24680 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24683 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24686 /* SSE MMX or 3Dnow!A */
24687 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24688 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24689 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24691 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24692 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24693 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24694 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24696 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24697 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24699 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24704 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24705 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24706 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24707 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24708 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24709 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24710 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24711 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24712 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24713 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24714 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24715 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24734 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24735 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24741 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24742 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24743 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24744 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24772 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24776 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24778 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24779 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24781 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24784 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24785 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24787 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24789 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24790 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24791 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24792 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24793 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24794 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24795 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24796 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24807 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24808 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24810 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24812 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24813 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24825 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24826 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24827 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24830 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24831 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24832 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24833 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24834 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24835 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24836 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24837 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24843 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24852 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24857 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24858 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24859 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24860 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24861 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24862 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24865 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24866 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24867 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24868 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24869 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24870 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24872 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24873 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24874 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24875 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24883 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24884 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24889 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24890 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24893 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24894 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24896 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24897 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24898 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24899 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24900 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24901 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24904 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24905 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24906 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24907 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24908 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24909 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24911 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24912 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24913 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24914 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24915 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24916 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24917 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24918 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24919 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24920 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24921 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24922 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24923 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24924 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24925 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24926 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24927 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24928 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24929 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24930 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24931 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24932 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24933 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24934 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24937 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24938 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24941 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24942 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24943 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24944 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24945 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24946 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24947 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24948 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24949 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24950 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24952 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24953 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24954 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24955 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24956 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24957 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24958 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24959 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24960 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24961 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24962 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24963 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24964 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24966 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24967 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24968 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24969 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24970 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24971 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24972 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24974 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24980 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24981 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24982 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24983 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24985 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24986 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24987 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24990 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24991 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24992 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24993 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24994 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24997 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24998 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24999 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
25000 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25003 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
25004 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
25006 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25007 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25008 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25009 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
25012 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
25015 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25016 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25019 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25020 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25023 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25029 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25030 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25031 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25032 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25033 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25034 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25035 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25036 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25037 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25038 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25039 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25040 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
25043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
25044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
25045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
25047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
25050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
25051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
25057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
25058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
25061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
25062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
25063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
25064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
25065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
25066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
25068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
25070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
25071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
25072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
25073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
25074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
25075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
25076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
25079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
25080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
25082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25086 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
25087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25088 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25090 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
25094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
25095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
25097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
25103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
25104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
25105 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
25106 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
25107 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
25109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
25112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
25115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
25118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
25121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
25125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
25126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
25128 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
25129 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
25131 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25134 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25135 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25136 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
25139 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
25140 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
25143 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
25144 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
25145 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
25146 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
25149 /* FMA4 and XOP. */
25150 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
25151 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
25152 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
25153 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
25154 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
25155 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
25156 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
25157 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
25158 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
25159 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
25160 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
25161 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
25162 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
25163 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
25164 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
25165 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
25166 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
25167 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
25168 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
25169 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
25170 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
25171 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
25172 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
25173 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
25174 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
25175 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
25176 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
25177 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
25178 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
25179 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
25180 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
25181 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
25182 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
25183 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
25184 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
25185 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
25186 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
25187 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
25188 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
25189 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
25190 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
25191 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
25192 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
25193 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
25194 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
25195 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
25196 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
25197 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
25198 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
25199 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
25200 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
25201 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
25203 static const struct builtin_description bdesc_multi_arg[] =
25205 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
25206 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
25207 UNKNOWN, (int)MULTI_ARG_3_SF },
25208 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
25209 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
25210 UNKNOWN, (int)MULTI_ARG_3_DF },
25212 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
25213 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
25214 UNKNOWN, (int)MULTI_ARG_3_SF },
25215 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
25216 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
25217 UNKNOWN, (int)MULTI_ARG_3_DF },
25218 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
25219 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
25220 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25221 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
25222 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
25223 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25225 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
25226 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
25227 UNKNOWN, (int)MULTI_ARG_3_SF },
25228 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
25229 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
25230 UNKNOWN, (int)MULTI_ARG_3_DF },
25231 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
25232 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
25233 UNKNOWN, (int)MULTI_ARG_3_SF2 },
25234 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
25235 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
25236 UNKNOWN, (int)MULTI_ARG_3_DF2 },
25238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
25239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
25240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
25241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
25242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
25243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
25244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25398 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25399 in the current target ISA to allow the user to compile particular modules
25400 with different target specific options that differ from the command line
25403 ix86_init_mmx_sse_builtins (void)
25405 const struct builtin_description * d;
25406 enum ix86_builtin_func_type ftype;
25409 /* Add all special builtins with variable number of operands. */
25410 for (i = 0, d = bdesc_special_args;
25411 i < ARRAY_SIZE (bdesc_special_args);
25417 ftype = (enum ix86_builtin_func_type) d->flag;
25418 def_builtin (d->mask, d->name, ftype, d->code);
25421 /* Add all builtins with variable number of operands. */
25422 for (i = 0, d = bdesc_args;
25423 i < ARRAY_SIZE (bdesc_args);
25429 ftype = (enum ix86_builtin_func_type) d->flag;
25430 def_builtin_const (d->mask, d->name, ftype, d->code);
25433 /* pcmpestr[im] insns. */
25434 for (i = 0, d = bdesc_pcmpestr;
25435 i < ARRAY_SIZE (bdesc_pcmpestr);
25438 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25439 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25441 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25442 def_builtin_const (d->mask, d->name, ftype, d->code);
25445 /* pcmpistr[im] insns. */
25446 for (i = 0, d = bdesc_pcmpistr;
25447 i < ARRAY_SIZE (bdesc_pcmpistr);
25450 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25451 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25453 ftype = INT_FTYPE_V16QI_V16QI_INT;
25454 def_builtin_const (d->mask, d->name, ftype, d->code);
25457 /* comi/ucomi insns. */
25458 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25460 if (d->mask == OPTION_MASK_ISA_SSE2)
25461 ftype = INT_FTYPE_V2DF_V2DF;
25463 ftype = INT_FTYPE_V4SF_V4SF;
25464 def_builtin_const (d->mask, d->name, ftype, d->code);
25468 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25469 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25470 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25471 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25473 /* SSE or 3DNow!A */
25474 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25475 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25476 IX86_BUILTIN_MASKMOVQ);
25479 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25480 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25482 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25483 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25484 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25485 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25488 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25489 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25490 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25491 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25494 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25495 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25496 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25497 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25498 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25499 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25500 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25501 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25502 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25503 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25504 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25505 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25508 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25509 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25512 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
25513 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
25514 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
25515 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
25516 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
25517 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
25518 IX86_BUILTIN_RDRAND64_STEP);
25520 /* MMX access to the vec_init patterns. */
25521 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25522 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25524 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25525 V4HI_FTYPE_HI_HI_HI_HI,
25526 IX86_BUILTIN_VEC_INIT_V4HI);
25528 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25529 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25530 IX86_BUILTIN_VEC_INIT_V8QI);
25532 /* Access to the vec_extract patterns. */
25533 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25534 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25535 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25536 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25537 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25538 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25539 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25540 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25541 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25542 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25544 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25545 "__builtin_ia32_vec_ext_v4hi",
25546 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25548 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25549 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25551 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25552 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25554 /* Access to the vec_set patterns. */
25555 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25556 "__builtin_ia32_vec_set_v2di",
25557 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25559 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25560 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25562 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25563 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25565 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25566 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25568 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25569 "__builtin_ia32_vec_set_v4hi",
25570 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25572 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25573 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25575 /* Add FMA4 multi-arg argument instructions */
25576 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25581 ftype = (enum ix86_builtin_func_type) d->flag;
25582 def_builtin_const (d->mask, d->name, ftype, d->code);
25586 /* Internal method for ix86_init_builtins. */
25589 ix86_init_builtins_va_builtins_abi (void)
25591 tree ms_va_ref, sysv_va_ref;
25592 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25593 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25594 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25595 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25599 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25600 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25601 ms_va_ref = build_reference_type (ms_va_list_type_node);
25603 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25606 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25607 fnvoid_va_start_ms =
25608 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25609 fnvoid_va_end_sysv =
25610 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25611 fnvoid_va_start_sysv =
25612 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25614 fnvoid_va_copy_ms =
25615 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25617 fnvoid_va_copy_sysv =
25618 build_function_type_list (void_type_node, sysv_va_ref,
25619 sysv_va_ref, NULL_TREE);
25621 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25622 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25623 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25624 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25625 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25626 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25627 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25628 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25629 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25630 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25631 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25632 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25636 ix86_init_builtin_types (void)
25638 tree float128_type_node, float80_type_node;
25640 /* The __float80 type. */
25641 float80_type_node = long_double_type_node;
25642 if (TYPE_MODE (float80_type_node) != XFmode)
25644 /* The __float80 type. */
25645 float80_type_node = make_node (REAL_TYPE);
25647 TYPE_PRECISION (float80_type_node) = 80;
25648 layout_type (float80_type_node);
25650 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25652 /* The __float128 type. */
25653 float128_type_node = make_node (REAL_TYPE);
25654 TYPE_PRECISION (float128_type_node) = 128;
25655 layout_type (float128_type_node);
25656 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25658 /* This macro is built by i386-builtin-types.awk. */
25659 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25663 ix86_init_builtins (void)
25667 ix86_init_builtin_types ();
25669 /* TFmode support builtins. */
25670 def_builtin_const (0, "__builtin_infq",
25671 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25672 def_builtin_const (0, "__builtin_huge_valq",
25673 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25675 /* We will expand them to normal call if SSE2 isn't available since
25676 they are used by libgcc. */
25677 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25678 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25679 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25680 TREE_READONLY (t) = 1;
25681 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25683 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25684 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25685 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25686 TREE_READONLY (t) = 1;
25687 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25689 ix86_init_mmx_sse_builtins ();
25692 ix86_init_builtins_va_builtins_abi ();
25694 #ifdef SUBTARGET_INIT_BUILTINS
25695 SUBTARGET_INIT_BUILTINS;
25699 /* Return the ix86 builtin for CODE. */
25702 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25704 if (code >= IX86_BUILTIN_MAX)
25705 return error_mark_node;
25707 return ix86_builtins[code];
25710 /* Errors in the source file can cause expand_expr to return const0_rtx
25711 where we expect a vector. To avoid crashing, use one of the vector
25712 clear instructions. */
25714 safe_vector_operand (rtx x, enum machine_mode mode)
25716 if (x == const0_rtx)
25717 x = CONST0_RTX (mode);
25721 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25724 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25727 tree arg0 = CALL_EXPR_ARG (exp, 0);
25728 tree arg1 = CALL_EXPR_ARG (exp, 1);
25729 rtx op0 = expand_normal (arg0);
25730 rtx op1 = expand_normal (arg1);
25731 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25732 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25733 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25735 if (VECTOR_MODE_P (mode0))
25736 op0 = safe_vector_operand (op0, mode0);
25737 if (VECTOR_MODE_P (mode1))
25738 op1 = safe_vector_operand (op1, mode1);
25740 if (optimize || !target
25741 || GET_MODE (target) != tmode
25742 || !insn_data[icode].operand[0].predicate (target, tmode))
25743 target = gen_reg_rtx (tmode);
25745 if (GET_MODE (op1) == SImode && mode1 == TImode)
25747 rtx x = gen_reg_rtx (V4SImode);
25748 emit_insn (gen_sse2_loadd (x, op1));
25749 op1 = gen_lowpart (TImode, x);
25752 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25753 op0 = copy_to_mode_reg (mode0, op0);
25754 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25755 op1 = copy_to_mode_reg (mode1, op1);
25757 pat = GEN_FCN (icode) (target, op0, op1);
25766 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25769 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25770 enum ix86_builtin_func_type m_type,
25771 enum rtx_code sub_code)
25776 bool comparison_p = false;
25778 bool last_arg_constant = false;
25779 int num_memory = 0;
25782 enum machine_mode mode;
25785 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25789 case MULTI_ARG_4_DF2_DI_I:
25790 case MULTI_ARG_4_DF2_DI_I1:
25791 case MULTI_ARG_4_SF2_SI_I:
25792 case MULTI_ARG_4_SF2_SI_I1:
25794 last_arg_constant = true;
25797 case MULTI_ARG_3_SF:
25798 case MULTI_ARG_3_DF:
25799 case MULTI_ARG_3_SF2:
25800 case MULTI_ARG_3_DF2:
25801 case MULTI_ARG_3_DI:
25802 case MULTI_ARG_3_SI:
25803 case MULTI_ARG_3_SI_DI:
25804 case MULTI_ARG_3_HI:
25805 case MULTI_ARG_3_HI_SI:
25806 case MULTI_ARG_3_QI:
25807 case MULTI_ARG_3_DI2:
25808 case MULTI_ARG_3_SI2:
25809 case MULTI_ARG_3_HI2:
25810 case MULTI_ARG_3_QI2:
25814 case MULTI_ARG_2_SF:
25815 case MULTI_ARG_2_DF:
25816 case MULTI_ARG_2_DI:
25817 case MULTI_ARG_2_SI:
25818 case MULTI_ARG_2_HI:
25819 case MULTI_ARG_2_QI:
25823 case MULTI_ARG_2_DI_IMM:
25824 case MULTI_ARG_2_SI_IMM:
25825 case MULTI_ARG_2_HI_IMM:
25826 case MULTI_ARG_2_QI_IMM:
25828 last_arg_constant = true;
25831 case MULTI_ARG_1_SF:
25832 case MULTI_ARG_1_DF:
25833 case MULTI_ARG_1_SF2:
25834 case MULTI_ARG_1_DF2:
25835 case MULTI_ARG_1_DI:
25836 case MULTI_ARG_1_SI:
25837 case MULTI_ARG_1_HI:
25838 case MULTI_ARG_1_QI:
25839 case MULTI_ARG_1_SI_DI:
25840 case MULTI_ARG_1_HI_DI:
25841 case MULTI_ARG_1_HI_SI:
25842 case MULTI_ARG_1_QI_DI:
25843 case MULTI_ARG_1_QI_SI:
25844 case MULTI_ARG_1_QI_HI:
25848 case MULTI_ARG_2_DI_CMP:
25849 case MULTI_ARG_2_SI_CMP:
25850 case MULTI_ARG_2_HI_CMP:
25851 case MULTI_ARG_2_QI_CMP:
25853 comparison_p = true;
25856 case MULTI_ARG_2_SF_TF:
25857 case MULTI_ARG_2_DF_TF:
25858 case MULTI_ARG_2_DI_TF:
25859 case MULTI_ARG_2_SI_TF:
25860 case MULTI_ARG_2_HI_TF:
25861 case MULTI_ARG_2_QI_TF:
25867 gcc_unreachable ();
25870 if (optimize || !target
25871 || GET_MODE (target) != tmode
25872 || !insn_data[icode].operand[0].predicate (target, tmode))
25873 target = gen_reg_rtx (tmode);
25875 gcc_assert (nargs <= 4);
25877 for (i = 0; i < nargs; i++)
25879 tree arg = CALL_EXPR_ARG (exp, i);
25880 rtx op = expand_normal (arg);
25881 int adjust = (comparison_p) ? 1 : 0;
25882 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25884 if (last_arg_constant && i == nargs-1)
25886 if (!CONST_INT_P (op))
25888 error ("last argument must be an immediate");
25889 return gen_reg_rtx (tmode);
25894 if (VECTOR_MODE_P (mode))
25895 op = safe_vector_operand (op, mode);
25897 /* If we aren't optimizing, only allow one memory operand to be
25899 if (memory_operand (op, mode))
25902 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25905 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25907 op = force_reg (mode, op);
25911 args[i].mode = mode;
25917 pat = GEN_FCN (icode) (target, args[0].op);
25922 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25923 GEN_INT ((int)sub_code));
25924 else if (! comparison_p)
25925 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25928 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25932 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25937 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25941 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25945 gcc_unreachable ();
25955 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25956 insns with vec_merge. */
25959 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25963 tree arg0 = CALL_EXPR_ARG (exp, 0);
25964 rtx op1, op0 = expand_normal (arg0);
25965 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25966 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25968 if (optimize || !target
25969 || GET_MODE (target) != tmode
25970 || !insn_data[icode].operand[0].predicate (target, tmode))
25971 target = gen_reg_rtx (tmode);
25973 if (VECTOR_MODE_P (mode0))
25974 op0 = safe_vector_operand (op0, mode0);
25976 if ((optimize && !register_operand (op0, mode0))
25977 || !insn_data[icode].operand[1].predicate (op0, mode0))
25978 op0 = copy_to_mode_reg (mode0, op0);
25981 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25982 op1 = copy_to_mode_reg (mode0, op1);
25984 pat = GEN_FCN (icode) (target, op0, op1);
25991 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25994 ix86_expand_sse_compare (const struct builtin_description *d,
25995 tree exp, rtx target, bool swap)
25998 tree arg0 = CALL_EXPR_ARG (exp, 0);
25999 tree arg1 = CALL_EXPR_ARG (exp, 1);
26000 rtx op0 = expand_normal (arg0);
26001 rtx op1 = expand_normal (arg1);
26003 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
26004 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
26005 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
26006 enum rtx_code comparison = d->comparison;
26008 if (VECTOR_MODE_P (mode0))
26009 op0 = safe_vector_operand (op0, mode0);
26010 if (VECTOR_MODE_P (mode1))
26011 op1 = safe_vector_operand (op1, mode1);
26013 /* Swap operands if we have a comparison that isn't available in
26017 rtx tmp = gen_reg_rtx (mode1);
26018 emit_move_insn (tmp, op1);
26023 if (optimize || !target
26024 || GET_MODE (target) != tmode
26025 || !insn_data[d->icode].operand[0].predicate (target, tmode))
26026 target = gen_reg_rtx (tmode);
26028 if ((optimize && !register_operand (op0, mode0))
26029 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
26030 op0 = copy_to_mode_reg (mode0, op0);
26031 if ((optimize && !register_operand (op1, mode1))
26032 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
26033 op1 = copy_to_mode_reg (mode1, op1);
26035 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
26036 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
26043 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
26046 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
26050 tree arg0 = CALL_EXPR_ARG (exp, 0);
26051 tree arg1 = CALL_EXPR_ARG (exp, 1);
26052 rtx op0 = expand_normal (arg0);
26053 rtx op1 = expand_normal (arg1);
26054 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26055 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26056 enum rtx_code comparison = d->comparison;
26058 if (VECTOR_MODE_P (mode0))
26059 op0 = safe_vector_operand (op0, mode0);
26060 if (VECTOR_MODE_P (mode1))
26061 op1 = safe_vector_operand (op1, mode1);
26063 /* Swap operands if we have a comparison that isn't available in
26065 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
26072 target = gen_reg_rtx (SImode);
26073 emit_move_insn (target, const0_rtx);
26074 target = gen_rtx_SUBREG (QImode, target, 0);
26076 if ((optimize && !register_operand (op0, mode0))
26077 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26078 op0 = copy_to_mode_reg (mode0, op0);
26079 if ((optimize && !register_operand (op1, mode1))
26080 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26081 op1 = copy_to_mode_reg (mode1, op1);
26083 pat = GEN_FCN (d->icode) (op0, op1);
26087 emit_insn (gen_rtx_SET (VOIDmode,
26088 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26089 gen_rtx_fmt_ee (comparison, QImode,
26093 return SUBREG_REG (target);
26096 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
26099 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
26103 tree arg0 = CALL_EXPR_ARG (exp, 0);
26104 tree arg1 = CALL_EXPR_ARG (exp, 1);
26105 rtx op0 = expand_normal (arg0);
26106 rtx op1 = expand_normal (arg1);
26107 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
26108 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
26109 enum rtx_code comparison = d->comparison;
26111 if (VECTOR_MODE_P (mode0))
26112 op0 = safe_vector_operand (op0, mode0);
26113 if (VECTOR_MODE_P (mode1))
26114 op1 = safe_vector_operand (op1, mode1);
26116 target = gen_reg_rtx (SImode);
26117 emit_move_insn (target, const0_rtx);
26118 target = gen_rtx_SUBREG (QImode, target, 0);
26120 if ((optimize && !register_operand (op0, mode0))
26121 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
26122 op0 = copy_to_mode_reg (mode0, op0);
26123 if ((optimize && !register_operand (op1, mode1))
26124 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
26125 op1 = copy_to_mode_reg (mode1, op1);
26127 pat = GEN_FCN (d->icode) (op0, op1);
26131 emit_insn (gen_rtx_SET (VOIDmode,
26132 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26133 gen_rtx_fmt_ee (comparison, QImode,
26137 return SUBREG_REG (target);
26140 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
26143 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
26144 tree exp, rtx target)
26147 tree arg0 = CALL_EXPR_ARG (exp, 0);
26148 tree arg1 = CALL_EXPR_ARG (exp, 1);
26149 tree arg2 = CALL_EXPR_ARG (exp, 2);
26150 tree arg3 = CALL_EXPR_ARG (exp, 3);
26151 tree arg4 = CALL_EXPR_ARG (exp, 4);
26152 rtx scratch0, scratch1;
26153 rtx op0 = expand_normal (arg0);
26154 rtx op1 = expand_normal (arg1);
26155 rtx op2 = expand_normal (arg2);
26156 rtx op3 = expand_normal (arg3);
26157 rtx op4 = expand_normal (arg4);
26158 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
26160 tmode0 = insn_data[d->icode].operand[0].mode;
26161 tmode1 = insn_data[d->icode].operand[1].mode;
26162 modev2 = insn_data[d->icode].operand[2].mode;
26163 modei3 = insn_data[d->icode].operand[3].mode;
26164 modev4 = insn_data[d->icode].operand[4].mode;
26165 modei5 = insn_data[d->icode].operand[5].mode;
26166 modeimm = insn_data[d->icode].operand[6].mode;
26168 if (VECTOR_MODE_P (modev2))
26169 op0 = safe_vector_operand (op0, modev2);
26170 if (VECTOR_MODE_P (modev4))
26171 op2 = safe_vector_operand (op2, modev4);
26173 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26174 op0 = copy_to_mode_reg (modev2, op0);
26175 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
26176 op1 = copy_to_mode_reg (modei3, op1);
26177 if ((optimize && !register_operand (op2, modev4))
26178 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
26179 op2 = copy_to_mode_reg (modev4, op2);
26180 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
26181 op3 = copy_to_mode_reg (modei5, op3);
26183 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
26185 error ("the fifth argument must be a 8-bit immediate");
26189 if (d->code == IX86_BUILTIN_PCMPESTRI128)
26191 if (optimize || !target
26192 || GET_MODE (target) != tmode0
26193 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26194 target = gen_reg_rtx (tmode0);
26196 scratch1 = gen_reg_rtx (tmode1);
26198 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
26200 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
26202 if (optimize || !target
26203 || GET_MODE (target) != tmode1
26204 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26205 target = gen_reg_rtx (tmode1);
26207 scratch0 = gen_reg_rtx (tmode0);
26209 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
26213 gcc_assert (d->flag);
26215 scratch0 = gen_reg_rtx (tmode0);
26216 scratch1 = gen_reg_rtx (tmode1);
26218 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
26228 target = gen_reg_rtx (SImode);
26229 emit_move_insn (target, const0_rtx);
26230 target = gen_rtx_SUBREG (QImode, target, 0);
26233 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26234 gen_rtx_fmt_ee (EQ, QImode,
26235 gen_rtx_REG ((enum machine_mode) d->flag,
26238 return SUBREG_REG (target);
26245 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
26248 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
26249 tree exp, rtx target)
26252 tree arg0 = CALL_EXPR_ARG (exp, 0);
26253 tree arg1 = CALL_EXPR_ARG (exp, 1);
26254 tree arg2 = CALL_EXPR_ARG (exp, 2);
26255 rtx scratch0, scratch1;
26256 rtx op0 = expand_normal (arg0);
26257 rtx op1 = expand_normal (arg1);
26258 rtx op2 = expand_normal (arg2);
26259 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26261 tmode0 = insn_data[d->icode].operand[0].mode;
26262 tmode1 = insn_data[d->icode].operand[1].mode;
26263 modev2 = insn_data[d->icode].operand[2].mode;
26264 modev3 = insn_data[d->icode].operand[3].mode;
26265 modeimm = insn_data[d->icode].operand[4].mode;
26267 if (VECTOR_MODE_P (modev2))
26268 op0 = safe_vector_operand (op0, modev2);
26269 if (VECTOR_MODE_P (modev3))
26270 op1 = safe_vector_operand (op1, modev3);
26272 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26273 op0 = copy_to_mode_reg (modev2, op0);
26274 if ((optimize && !register_operand (op1, modev3))
26275 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26276 op1 = copy_to_mode_reg (modev3, op1);
26278 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26280 error ("the third argument must be a 8-bit immediate");
26284 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26286 if (optimize || !target
26287 || GET_MODE (target) != tmode0
26288 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26289 target = gen_reg_rtx (tmode0);
26291 scratch1 = gen_reg_rtx (tmode1);
26293 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26295 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26297 if (optimize || !target
26298 || GET_MODE (target) != tmode1
26299 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26300 target = gen_reg_rtx (tmode1);
26302 scratch0 = gen_reg_rtx (tmode0);
26304 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26308 gcc_assert (d->flag);
26310 scratch0 = gen_reg_rtx (tmode0);
26311 scratch1 = gen_reg_rtx (tmode1);
26313 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26323 target = gen_reg_rtx (SImode);
26324 emit_move_insn (target, const0_rtx);
26325 target = gen_rtx_SUBREG (QImode, target, 0);
26328 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26329 gen_rtx_fmt_ee (EQ, QImode,
26330 gen_rtx_REG ((enum machine_mode) d->flag,
26333 return SUBREG_REG (target);
26339 /* Subroutine of ix86_expand_builtin to take care of insns with
26340 variable number of operands. */
26343 ix86_expand_args_builtin (const struct builtin_description *d,
26344 tree exp, rtx target)
26346 rtx pat, real_target;
26347 unsigned int i, nargs;
26348 unsigned int nargs_constant = 0;
26349 int num_memory = 0;
26353 enum machine_mode mode;
26355 bool last_arg_count = false;
26356 enum insn_code icode = d->icode;
26357 const struct insn_data_d *insn_p = &insn_data[icode];
26358 enum machine_mode tmode = insn_p->operand[0].mode;
26359 enum machine_mode rmode = VOIDmode;
26361 enum rtx_code comparison = d->comparison;
26363 switch ((enum ix86_builtin_func_type) d->flag)
26365 case INT_FTYPE_V8SF_V8SF_PTEST:
26366 case INT_FTYPE_V4DI_V4DI_PTEST:
26367 case INT_FTYPE_V4DF_V4DF_PTEST:
26368 case INT_FTYPE_V4SF_V4SF_PTEST:
26369 case INT_FTYPE_V2DI_V2DI_PTEST:
26370 case INT_FTYPE_V2DF_V2DF_PTEST:
26371 return ix86_expand_sse_ptest (d, exp, target);
26372 case FLOAT128_FTYPE_FLOAT128:
26373 case FLOAT_FTYPE_FLOAT:
26374 case INT_FTYPE_INT:
26375 case UINT64_FTYPE_INT:
26376 case UINT16_FTYPE_UINT16:
26377 case INT64_FTYPE_INT64:
26378 case INT64_FTYPE_V4SF:
26379 case INT64_FTYPE_V2DF:
26380 case INT_FTYPE_V16QI:
26381 case INT_FTYPE_V8QI:
26382 case INT_FTYPE_V8SF:
26383 case INT_FTYPE_V4DF:
26384 case INT_FTYPE_V4SF:
26385 case INT_FTYPE_V2DF:
26386 case V16QI_FTYPE_V16QI:
26387 case V8SI_FTYPE_V8SF:
26388 case V8SI_FTYPE_V4SI:
26389 case V8HI_FTYPE_V8HI:
26390 case V8HI_FTYPE_V16QI:
26391 case V8QI_FTYPE_V8QI:
26392 case V8SF_FTYPE_V8SF:
26393 case V8SF_FTYPE_V8SI:
26394 case V8SF_FTYPE_V4SF:
26395 case V8SF_FTYPE_V8HI:
26396 case V4SI_FTYPE_V4SI:
26397 case V4SI_FTYPE_V16QI:
26398 case V4SI_FTYPE_V4SF:
26399 case V4SI_FTYPE_V8SI:
26400 case V4SI_FTYPE_V8HI:
26401 case V4SI_FTYPE_V4DF:
26402 case V4SI_FTYPE_V2DF:
26403 case V4HI_FTYPE_V4HI:
26404 case V4DF_FTYPE_V4DF:
26405 case V4DF_FTYPE_V4SI:
26406 case V4DF_FTYPE_V4SF:
26407 case V4DF_FTYPE_V2DF:
26408 case V4SF_FTYPE_V4SF:
26409 case V4SF_FTYPE_V4SI:
26410 case V4SF_FTYPE_V8SF:
26411 case V4SF_FTYPE_V4DF:
26412 case V4SF_FTYPE_V8HI:
26413 case V4SF_FTYPE_V2DF:
26414 case V2DI_FTYPE_V2DI:
26415 case V2DI_FTYPE_V16QI:
26416 case V2DI_FTYPE_V8HI:
26417 case V2DI_FTYPE_V4SI:
26418 case V2DF_FTYPE_V2DF:
26419 case V2DF_FTYPE_V4SI:
26420 case V2DF_FTYPE_V4DF:
26421 case V2DF_FTYPE_V4SF:
26422 case V2DF_FTYPE_V2SI:
26423 case V2SI_FTYPE_V2SI:
26424 case V2SI_FTYPE_V4SF:
26425 case V2SI_FTYPE_V2SF:
26426 case V2SI_FTYPE_V2DF:
26427 case V2SF_FTYPE_V2SF:
26428 case V2SF_FTYPE_V2SI:
26431 case V4SF_FTYPE_V4SF_VEC_MERGE:
26432 case V2DF_FTYPE_V2DF_VEC_MERGE:
26433 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26434 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26435 case V16QI_FTYPE_V16QI_V16QI:
26436 case V16QI_FTYPE_V8HI_V8HI:
26437 case V8QI_FTYPE_V8QI_V8QI:
26438 case V8QI_FTYPE_V4HI_V4HI:
26439 case V8HI_FTYPE_V8HI_V8HI:
26440 case V8HI_FTYPE_V16QI_V16QI:
26441 case V8HI_FTYPE_V4SI_V4SI:
26442 case V8SF_FTYPE_V8SF_V8SF:
26443 case V8SF_FTYPE_V8SF_V8SI:
26444 case V4SI_FTYPE_V4SI_V4SI:
26445 case V4SI_FTYPE_V8HI_V8HI:
26446 case V4SI_FTYPE_V4SF_V4SF:
26447 case V4SI_FTYPE_V2DF_V2DF:
26448 case V4HI_FTYPE_V4HI_V4HI:
26449 case V4HI_FTYPE_V8QI_V8QI:
26450 case V4HI_FTYPE_V2SI_V2SI:
26451 case V4DF_FTYPE_V4DF_V4DF:
26452 case V4DF_FTYPE_V4DF_V4DI:
26453 case V4SF_FTYPE_V4SF_V4SF:
26454 case V4SF_FTYPE_V4SF_V4SI:
26455 case V4SF_FTYPE_V4SF_V2SI:
26456 case V4SF_FTYPE_V4SF_V2DF:
26457 case V4SF_FTYPE_V4SF_DI:
26458 case V4SF_FTYPE_V4SF_SI:
26459 case V2DI_FTYPE_V2DI_V2DI:
26460 case V2DI_FTYPE_V16QI_V16QI:
26461 case V2DI_FTYPE_V4SI_V4SI:
26462 case V2DI_FTYPE_V2DI_V16QI:
26463 case V2DI_FTYPE_V2DF_V2DF:
26464 case V2SI_FTYPE_V2SI_V2SI:
26465 case V2SI_FTYPE_V4HI_V4HI:
26466 case V2SI_FTYPE_V2SF_V2SF:
26467 case V2DF_FTYPE_V2DF_V2DF:
26468 case V2DF_FTYPE_V2DF_V4SF:
26469 case V2DF_FTYPE_V2DF_V2DI:
26470 case V2DF_FTYPE_V2DF_DI:
26471 case V2DF_FTYPE_V2DF_SI:
26472 case V2SF_FTYPE_V2SF_V2SF:
26473 case V1DI_FTYPE_V1DI_V1DI:
26474 case V1DI_FTYPE_V8QI_V8QI:
26475 case V1DI_FTYPE_V2SI_V2SI:
26476 if (comparison == UNKNOWN)
26477 return ix86_expand_binop_builtin (icode, exp, target);
26480 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26481 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26482 gcc_assert (comparison != UNKNOWN);
26486 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26487 case V8HI_FTYPE_V8HI_SI_COUNT:
26488 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26489 case V4SI_FTYPE_V4SI_SI_COUNT:
26490 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26491 case V4HI_FTYPE_V4HI_SI_COUNT:
26492 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26493 case V2DI_FTYPE_V2DI_SI_COUNT:
26494 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26495 case V2SI_FTYPE_V2SI_SI_COUNT:
26496 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26497 case V1DI_FTYPE_V1DI_SI_COUNT:
26499 last_arg_count = true;
26501 case UINT64_FTYPE_UINT64_UINT64:
26502 case UINT_FTYPE_UINT_UINT:
26503 case UINT_FTYPE_UINT_USHORT:
26504 case UINT_FTYPE_UINT_UCHAR:
26505 case UINT16_FTYPE_UINT16_INT:
26506 case UINT8_FTYPE_UINT8_INT:
26509 case V2DI_FTYPE_V2DI_INT_CONVERT:
26512 nargs_constant = 1;
26514 case V8HI_FTYPE_V8HI_INT:
26515 case V8HI_FTYPE_V8SF_INT:
26516 case V8HI_FTYPE_V4SF_INT:
26517 case V8SF_FTYPE_V8SF_INT:
26518 case V4SI_FTYPE_V4SI_INT:
26519 case V4SI_FTYPE_V8SI_INT:
26520 case V4HI_FTYPE_V4HI_INT:
26521 case V4DF_FTYPE_V4DF_INT:
26522 case V4SF_FTYPE_V4SF_INT:
26523 case V4SF_FTYPE_V8SF_INT:
26524 case V2DI_FTYPE_V2DI_INT:
26525 case V2DF_FTYPE_V2DF_INT:
26526 case V2DF_FTYPE_V4DF_INT:
26528 nargs_constant = 1;
26530 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26531 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26532 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26533 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26534 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26537 case V16QI_FTYPE_V16QI_V16QI_INT:
26538 case V8HI_FTYPE_V8HI_V8HI_INT:
26539 case V8SI_FTYPE_V8SI_V8SI_INT:
26540 case V8SI_FTYPE_V8SI_V4SI_INT:
26541 case V8SF_FTYPE_V8SF_V8SF_INT:
26542 case V8SF_FTYPE_V8SF_V4SF_INT:
26543 case V4SI_FTYPE_V4SI_V4SI_INT:
26544 case V4DF_FTYPE_V4DF_V4DF_INT:
26545 case V4DF_FTYPE_V4DF_V2DF_INT:
26546 case V4SF_FTYPE_V4SF_V4SF_INT:
26547 case V2DI_FTYPE_V2DI_V2DI_INT:
26548 case V2DF_FTYPE_V2DF_V2DF_INT:
26550 nargs_constant = 1;
26552 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26555 nargs_constant = 1;
26557 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26560 nargs_constant = 1;
26562 case V2DI_FTYPE_V2DI_UINT_UINT:
26564 nargs_constant = 2;
26566 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26567 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26568 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26569 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26571 nargs_constant = 1;
26573 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26575 nargs_constant = 2;
26578 gcc_unreachable ();
26581 gcc_assert (nargs <= ARRAY_SIZE (args));
26583 if (comparison != UNKNOWN)
26585 gcc_assert (nargs == 2);
26586 return ix86_expand_sse_compare (d, exp, target, swap);
26589 if (rmode == VOIDmode || rmode == tmode)
26593 || GET_MODE (target) != tmode
26594 || !insn_p->operand[0].predicate (target, tmode))
26595 target = gen_reg_rtx (tmode);
26596 real_target = target;
26600 target = gen_reg_rtx (rmode);
26601 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26604 for (i = 0; i < nargs; i++)
26606 tree arg = CALL_EXPR_ARG (exp, i);
26607 rtx op = expand_normal (arg);
26608 enum machine_mode mode = insn_p->operand[i + 1].mode;
26609 bool match = insn_p->operand[i + 1].predicate (op, mode);
26611 if (last_arg_count && (i + 1) == nargs)
26613 /* SIMD shift insns take either an 8-bit immediate or
26614 register as count. But builtin functions take int as
26615 count. If count doesn't match, we put it in register. */
26618 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26619 if (!insn_p->operand[i + 1].predicate (op, mode))
26620 op = copy_to_reg (op);
26623 else if ((nargs - i) <= nargs_constant)
26628 case CODE_FOR_sse4_1_roundpd:
26629 case CODE_FOR_sse4_1_roundps:
26630 case CODE_FOR_sse4_1_roundsd:
26631 case CODE_FOR_sse4_1_roundss:
26632 case CODE_FOR_sse4_1_blendps:
26633 case CODE_FOR_avx_blendpd256:
26634 case CODE_FOR_avx_vpermilv4df:
26635 case CODE_FOR_avx_roundpd256:
26636 case CODE_FOR_avx_roundps256:
26637 error ("the last argument must be a 4-bit immediate");
26640 case CODE_FOR_sse4_1_blendpd:
26641 case CODE_FOR_avx_vpermilv2df:
26642 case CODE_FOR_xop_vpermil2v2df3:
26643 case CODE_FOR_xop_vpermil2v4sf3:
26644 case CODE_FOR_xop_vpermil2v4df3:
26645 case CODE_FOR_xop_vpermil2v8sf3:
26646 error ("the last argument must be a 2-bit immediate");
26649 case CODE_FOR_avx_vextractf128v4df:
26650 case CODE_FOR_avx_vextractf128v8sf:
26651 case CODE_FOR_avx_vextractf128v8si:
26652 case CODE_FOR_avx_vinsertf128v4df:
26653 case CODE_FOR_avx_vinsertf128v8sf:
26654 case CODE_FOR_avx_vinsertf128v8si:
26655 error ("the last argument must be a 1-bit immediate");
26658 case CODE_FOR_avx_cmpsdv2df3:
26659 case CODE_FOR_avx_cmpssv4sf3:
26660 case CODE_FOR_avx_cmppdv2df3:
26661 case CODE_FOR_avx_cmppsv4sf3:
26662 case CODE_FOR_avx_cmppdv4df3:
26663 case CODE_FOR_avx_cmppsv8sf3:
26664 error ("the last argument must be a 5-bit immediate");
26668 switch (nargs_constant)
26671 if ((nargs - i) == nargs_constant)
26673 error ("the next to last argument must be an 8-bit immediate");
26677 error ("the last argument must be an 8-bit immediate");
26680 gcc_unreachable ();
26687 if (VECTOR_MODE_P (mode))
26688 op = safe_vector_operand (op, mode);
26690 /* If we aren't optimizing, only allow one memory operand to
26692 if (memory_operand (op, mode))
26695 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26697 if (optimize || !match || num_memory > 1)
26698 op = copy_to_mode_reg (mode, op);
26702 op = copy_to_reg (op);
26703 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26708 args[i].mode = mode;
26714 pat = GEN_FCN (icode) (real_target, args[0].op);
26717 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26720 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26724 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26725 args[2].op, args[3].op);
26728 gcc_unreachable ();
26738 /* Subroutine of ix86_expand_builtin to take care of special insns
26739 with variable number of operands. */
26742 ix86_expand_special_args_builtin (const struct builtin_description *d,
26743 tree exp, rtx target)
26747 unsigned int i, nargs, arg_adjust, memory;
26751 enum machine_mode mode;
26753 enum insn_code icode = d->icode;
26754 bool last_arg_constant = false;
26755 const struct insn_data_d *insn_p = &insn_data[icode];
26756 enum machine_mode tmode = insn_p->operand[0].mode;
26757 enum { load, store } klass;
26759 switch ((enum ix86_builtin_func_type) d->flag)
26761 case VOID_FTYPE_VOID:
26762 if (icode == CODE_FOR_avx_vzeroupper)
26763 target = GEN_INT (vzeroupper_intrinsic);
26764 emit_insn (GEN_FCN (icode) (target));
26766 case VOID_FTYPE_UINT64:
26767 case VOID_FTYPE_UNSIGNED:
26773 case UINT64_FTYPE_VOID:
26774 case UNSIGNED_FTYPE_VOID:
26779 case UINT64_FTYPE_PUNSIGNED:
26780 case V2DI_FTYPE_PV2DI:
26781 case V32QI_FTYPE_PCCHAR:
26782 case V16QI_FTYPE_PCCHAR:
26783 case V8SF_FTYPE_PCV4SF:
26784 case V8SF_FTYPE_PCFLOAT:
26785 case V4SF_FTYPE_PCFLOAT:
26786 case V4DF_FTYPE_PCV2DF:
26787 case V4DF_FTYPE_PCDOUBLE:
26788 case V2DF_FTYPE_PCDOUBLE:
26789 case VOID_FTYPE_PVOID:
26794 case VOID_FTYPE_PV2SF_V4SF:
26795 case VOID_FTYPE_PV4DI_V4DI:
26796 case VOID_FTYPE_PV2DI_V2DI:
26797 case VOID_FTYPE_PCHAR_V32QI:
26798 case VOID_FTYPE_PCHAR_V16QI:
26799 case VOID_FTYPE_PFLOAT_V8SF:
26800 case VOID_FTYPE_PFLOAT_V4SF:
26801 case VOID_FTYPE_PDOUBLE_V4DF:
26802 case VOID_FTYPE_PDOUBLE_V2DF:
26803 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26804 case VOID_FTYPE_PINT_INT:
26807 /* Reserve memory operand for target. */
26808 memory = ARRAY_SIZE (args);
26810 case V4SF_FTYPE_V4SF_PCV2SF:
26811 case V2DF_FTYPE_V2DF_PCDOUBLE:
26816 case V8SF_FTYPE_PCV8SF_V8SF:
26817 case V4DF_FTYPE_PCV4DF_V4DF:
26818 case V4SF_FTYPE_PCV4SF_V4SF:
26819 case V2DF_FTYPE_PCV2DF_V2DF:
26824 case VOID_FTYPE_PV8SF_V8SF_V8SF:
26825 case VOID_FTYPE_PV4DF_V4DF_V4DF:
26826 case VOID_FTYPE_PV4SF_V4SF_V4SF:
26827 case VOID_FTYPE_PV2DF_V2DF_V2DF:
26830 /* Reserve memory operand for target. */
26831 memory = ARRAY_SIZE (args);
26833 case VOID_FTYPE_UINT_UINT_UINT:
26834 case VOID_FTYPE_UINT64_UINT_UINT:
26835 case UCHAR_FTYPE_UINT_UINT_UINT:
26836 case UCHAR_FTYPE_UINT64_UINT_UINT:
26839 memory = ARRAY_SIZE (args);
26840 last_arg_constant = true;
26843 gcc_unreachable ();
26846 gcc_assert (nargs <= ARRAY_SIZE (args));
26848 if (klass == store)
26850 arg = CALL_EXPR_ARG (exp, 0);
26851 op = expand_normal (arg);
26852 gcc_assert (target == 0);
26854 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26856 target = force_reg (tmode, op);
26864 || GET_MODE (target) != tmode
26865 || !insn_p->operand[0].predicate (target, tmode))
26866 target = gen_reg_rtx (tmode);
26869 for (i = 0; i < nargs; i++)
26871 enum machine_mode mode = insn_p->operand[i + 1].mode;
26874 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26875 op = expand_normal (arg);
26876 match = insn_p->operand[i + 1].predicate (op, mode);
26878 if (last_arg_constant && (i + 1) == nargs)
26882 if (icode == CODE_FOR_lwp_lwpvalsi3
26883 || icode == CODE_FOR_lwp_lwpinssi3
26884 || icode == CODE_FOR_lwp_lwpvaldi3
26885 || icode == CODE_FOR_lwp_lwpinsdi3)
26886 error ("the last argument must be a 32-bit immediate");
26888 error ("the last argument must be an 8-bit immediate");
26896 /* This must be the memory operand. */
26897 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26898 gcc_assert (GET_MODE (op) == mode
26899 || GET_MODE (op) == VOIDmode);
26903 /* This must be register. */
26904 if (VECTOR_MODE_P (mode))
26905 op = safe_vector_operand (op, mode);
26907 gcc_assert (GET_MODE (op) == mode
26908 || GET_MODE (op) == VOIDmode);
26909 op = copy_to_mode_reg (mode, op);
26914 args[i].mode = mode;
26920 pat = GEN_FCN (icode) (target);
26923 pat = GEN_FCN (icode) (target, args[0].op);
26926 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26929 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26932 gcc_unreachable ();
26938 return klass == store ? 0 : target;
26941 /* Return the integer constant in ARG. Constrain it to be in the range
26942 of the subparts of VEC_TYPE; issue an error if not. */
26945 get_element_number (tree vec_type, tree arg)
26947 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26949 if (!host_integerp (arg, 1)
26950 || (elt = tree_low_cst (arg, 1), elt > max))
26952 error ("selector must be an integer constant in the range 0..%wi", max);
26959 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26960 ix86_expand_vector_init. We DO have language-level syntax for this, in
26961 the form of (type){ init-list }. Except that since we can't place emms
26962 instructions from inside the compiler, we can't allow the use of MMX
26963 registers unless the user explicitly asks for it. So we do *not* define
26964 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26965 we have builtins invoked by mmintrin.h that gives us license to emit
26966 these sorts of instructions. */
26969 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26971 enum machine_mode tmode = TYPE_MODE (type);
26972 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26973 int i, n_elt = GET_MODE_NUNITS (tmode);
26974 rtvec v = rtvec_alloc (n_elt);
26976 gcc_assert (VECTOR_MODE_P (tmode));
26977 gcc_assert (call_expr_nargs (exp) == n_elt);
26979 for (i = 0; i < n_elt; ++i)
26981 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26982 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26985 if (!target || !register_operand (target, tmode))
26986 target = gen_reg_rtx (tmode);
26988 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26992 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26993 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26994 had a language-level syntax for referencing vector elements. */
26997 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26999 enum machine_mode tmode, mode0;
27004 arg0 = CALL_EXPR_ARG (exp, 0);
27005 arg1 = CALL_EXPR_ARG (exp, 1);
27007 op0 = expand_normal (arg0);
27008 elt = get_element_number (TREE_TYPE (arg0), arg1);
27010 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27011 mode0 = TYPE_MODE (TREE_TYPE (arg0));
27012 gcc_assert (VECTOR_MODE_P (mode0));
27014 op0 = force_reg (mode0, op0);
27016 if (optimize || !target || !register_operand (target, tmode))
27017 target = gen_reg_rtx (tmode);
27019 ix86_expand_vector_extract (true, target, op0, elt);
27024 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
27025 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
27026 a language-level syntax for referencing vector elements. */
27029 ix86_expand_vec_set_builtin (tree exp)
27031 enum machine_mode tmode, mode1;
27032 tree arg0, arg1, arg2;
27034 rtx op0, op1, target;
27036 arg0 = CALL_EXPR_ARG (exp, 0);
27037 arg1 = CALL_EXPR_ARG (exp, 1);
27038 arg2 = CALL_EXPR_ARG (exp, 2);
27040 tmode = TYPE_MODE (TREE_TYPE (arg0));
27041 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
27042 gcc_assert (VECTOR_MODE_P (tmode));
27044 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
27045 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
27046 elt = get_element_number (TREE_TYPE (arg0), arg2);
27048 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
27049 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
27051 op0 = force_reg (tmode, op0);
27052 op1 = force_reg (mode1, op1);
27054 /* OP0 is the source of these builtin functions and shouldn't be
27055 modified. Create a copy, use it and return it as target. */
27056 target = gen_reg_rtx (tmode);
27057 emit_move_insn (target, op0);
27058 ix86_expand_vector_set (true, target, op1, elt);
27063 /* Expand an expression EXP that calls a built-in function,
27064 with result going to TARGET if that's convenient
27065 (and in mode MODE if that's convenient).
27066 SUBTARGET may be used as the target for computing one of EXP's operands.
27067 IGNORE is nonzero if the value is to be ignored. */
27070 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
27071 enum machine_mode mode ATTRIBUTE_UNUSED,
27072 int ignore ATTRIBUTE_UNUSED)
27074 const struct builtin_description *d;
27076 enum insn_code icode;
27077 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
27078 tree arg0, arg1, arg2;
27079 rtx op0, op1, op2, pat;
27080 enum machine_mode mode0, mode1, mode2;
27081 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
27083 /* Determine whether the builtin function is available under the current ISA.
27084 Originally the builtin was not created if it wasn't applicable to the
27085 current ISA based on the command line switches. With function specific
27086 options, we need to check in the context of the function making the call
27087 whether it is supported. */
27088 if (ix86_builtins_isa[fcode].isa
27089 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
27091 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
27092 NULL, NULL, false);
27095 error ("%qE needs unknown isa option", fndecl);
27098 gcc_assert (opts != NULL);
27099 error ("%qE needs isa option %s", fndecl, opts);
27107 case IX86_BUILTIN_MASKMOVQ:
27108 case IX86_BUILTIN_MASKMOVDQU:
27109 icode = (fcode == IX86_BUILTIN_MASKMOVQ
27110 ? CODE_FOR_mmx_maskmovq
27111 : CODE_FOR_sse2_maskmovdqu);
27112 /* Note the arg order is different from the operand order. */
27113 arg1 = CALL_EXPR_ARG (exp, 0);
27114 arg2 = CALL_EXPR_ARG (exp, 1);
27115 arg0 = CALL_EXPR_ARG (exp, 2);
27116 op0 = expand_normal (arg0);
27117 op1 = expand_normal (arg1);
27118 op2 = expand_normal (arg2);
27119 mode0 = insn_data[icode].operand[0].mode;
27120 mode1 = insn_data[icode].operand[1].mode;
27121 mode2 = insn_data[icode].operand[2].mode;
27123 op0 = force_reg (Pmode, op0);
27124 op0 = gen_rtx_MEM (mode1, op0);
27126 if (!insn_data[icode].operand[0].predicate (op0, mode0))
27127 op0 = copy_to_mode_reg (mode0, op0);
27128 if (!insn_data[icode].operand[1].predicate (op1, mode1))
27129 op1 = copy_to_mode_reg (mode1, op1);
27130 if (!insn_data[icode].operand[2].predicate (op2, mode2))
27131 op2 = copy_to_mode_reg (mode2, op2);
27132 pat = GEN_FCN (icode) (op0, op1, op2);
27138 case IX86_BUILTIN_LDMXCSR:
27139 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
27140 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27141 emit_move_insn (target, op0);
27142 emit_insn (gen_sse_ldmxcsr (target));
27145 case IX86_BUILTIN_STMXCSR:
27146 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
27147 emit_insn (gen_sse_stmxcsr (target));
27148 return copy_to_mode_reg (SImode, target);
27150 case IX86_BUILTIN_CLFLUSH:
27151 arg0 = CALL_EXPR_ARG (exp, 0);
27152 op0 = expand_normal (arg0);
27153 icode = CODE_FOR_sse2_clflush;
27154 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27155 op0 = copy_to_mode_reg (Pmode, op0);
27157 emit_insn (gen_sse2_clflush (op0));
27160 case IX86_BUILTIN_MONITOR:
27161 arg0 = CALL_EXPR_ARG (exp, 0);
27162 arg1 = CALL_EXPR_ARG (exp, 1);
27163 arg2 = CALL_EXPR_ARG (exp, 2);
27164 op0 = expand_normal (arg0);
27165 op1 = expand_normal (arg1);
27166 op2 = expand_normal (arg2);
27168 op0 = copy_to_mode_reg (Pmode, op0);
27170 op1 = copy_to_mode_reg (SImode, op1);
27172 op2 = copy_to_mode_reg (SImode, op2);
27173 emit_insn (ix86_gen_monitor (op0, op1, op2));
27176 case IX86_BUILTIN_MWAIT:
27177 arg0 = CALL_EXPR_ARG (exp, 0);
27178 arg1 = CALL_EXPR_ARG (exp, 1);
27179 op0 = expand_normal (arg0);
27180 op1 = expand_normal (arg1);
27182 op0 = copy_to_mode_reg (SImode, op0);
27184 op1 = copy_to_mode_reg (SImode, op1);
27185 emit_insn (gen_sse3_mwait (op0, op1));
27188 case IX86_BUILTIN_VEC_INIT_V2SI:
27189 case IX86_BUILTIN_VEC_INIT_V4HI:
27190 case IX86_BUILTIN_VEC_INIT_V8QI:
27191 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
27193 case IX86_BUILTIN_VEC_EXT_V2DF:
27194 case IX86_BUILTIN_VEC_EXT_V2DI:
27195 case IX86_BUILTIN_VEC_EXT_V4SF:
27196 case IX86_BUILTIN_VEC_EXT_V4SI:
27197 case IX86_BUILTIN_VEC_EXT_V8HI:
27198 case IX86_BUILTIN_VEC_EXT_V2SI:
27199 case IX86_BUILTIN_VEC_EXT_V4HI:
27200 case IX86_BUILTIN_VEC_EXT_V16QI:
27201 return ix86_expand_vec_ext_builtin (exp, target);
27203 case IX86_BUILTIN_VEC_SET_V2DI:
27204 case IX86_BUILTIN_VEC_SET_V4SF:
27205 case IX86_BUILTIN_VEC_SET_V4SI:
27206 case IX86_BUILTIN_VEC_SET_V8HI:
27207 case IX86_BUILTIN_VEC_SET_V4HI:
27208 case IX86_BUILTIN_VEC_SET_V16QI:
27209 return ix86_expand_vec_set_builtin (exp);
27211 case IX86_BUILTIN_VEC_PERM_V2DF:
27212 case IX86_BUILTIN_VEC_PERM_V4SF:
27213 case IX86_BUILTIN_VEC_PERM_V2DI:
27214 case IX86_BUILTIN_VEC_PERM_V4SI:
27215 case IX86_BUILTIN_VEC_PERM_V8HI:
27216 case IX86_BUILTIN_VEC_PERM_V16QI:
27217 case IX86_BUILTIN_VEC_PERM_V2DI_U:
27218 case IX86_BUILTIN_VEC_PERM_V4SI_U:
27219 case IX86_BUILTIN_VEC_PERM_V8HI_U:
27220 case IX86_BUILTIN_VEC_PERM_V16QI_U:
27221 case IX86_BUILTIN_VEC_PERM_V4DF:
27222 case IX86_BUILTIN_VEC_PERM_V8SF:
27223 return ix86_expand_vec_perm_builtin (exp);
27225 case IX86_BUILTIN_INFQ:
27226 case IX86_BUILTIN_HUGE_VALQ:
27228 REAL_VALUE_TYPE inf;
27232 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
27234 tmp = validize_mem (force_const_mem (mode, tmp));
27237 target = gen_reg_rtx (mode);
27239 emit_move_insn (target, tmp);
27243 case IX86_BUILTIN_LLWPCB:
27244 arg0 = CALL_EXPR_ARG (exp, 0);
27245 op0 = expand_normal (arg0);
27246 icode = CODE_FOR_lwp_llwpcb;
27247 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
27248 op0 = copy_to_mode_reg (Pmode, op0);
27249 emit_insn (gen_lwp_llwpcb (op0));
27252 case IX86_BUILTIN_SLWPCB:
27253 icode = CODE_FOR_lwp_slwpcb;
27255 || !insn_data[icode].operand[0].predicate (target, Pmode))
27256 target = gen_reg_rtx (Pmode);
27257 emit_insn (gen_lwp_slwpcb (target));
27260 case IX86_BUILTIN_BEXTRI32:
27261 case IX86_BUILTIN_BEXTRI64:
27262 arg0 = CALL_EXPR_ARG (exp, 0);
27263 arg1 = CALL_EXPR_ARG (exp, 1);
27264 op0 = expand_normal (arg0);
27265 op1 = expand_normal (arg1);
27266 icode = (fcode == IX86_BUILTIN_BEXTRI32
27267 ? CODE_FOR_tbm_bextri_si
27268 : CODE_FOR_tbm_bextri_di);
27269 if (!CONST_INT_P (op1))
27271 error ("last argument must be an immediate");
27276 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
27277 unsigned char lsb_index = INTVAL (op1) & 0xFF;
27278 op1 = GEN_INT (length);
27279 op2 = GEN_INT (lsb_index);
27280 pat = GEN_FCN (icode) (target, op0, op1, op2);
27286 case IX86_BUILTIN_RDRAND16_STEP:
27287 icode = CODE_FOR_rdrandhi_1;
27291 case IX86_BUILTIN_RDRAND32_STEP:
27292 icode = CODE_FOR_rdrandsi_1;
27296 case IX86_BUILTIN_RDRAND64_STEP:
27297 icode = CODE_FOR_rdranddi_1;
27301 op0 = gen_reg_rtx (mode0);
27302 emit_insn (GEN_FCN (icode) (op0));
27304 op1 = gen_reg_rtx (SImode);
27305 emit_move_insn (op1, CONST1_RTX (SImode));
27307 /* Emit SImode conditional move. */
27308 if (mode0 == HImode)
27310 op2 = gen_reg_rtx (SImode);
27311 emit_insn (gen_zero_extendhisi2 (op2, op0));
27313 else if (mode0 == SImode)
27316 op2 = gen_rtx_SUBREG (SImode, op0, 0);
27318 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
27320 emit_insn (gen_rtx_SET (VOIDmode, op1,
27321 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
27322 emit_move_insn (target, op1);
27324 arg0 = CALL_EXPR_ARG (exp, 0);
27325 op1 = expand_normal (arg0);
27326 if (!address_operand (op1, VOIDmode))
27327 op1 = copy_addr_to_reg (op1);
27328 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
27335 for (i = 0, d = bdesc_special_args;
27336 i < ARRAY_SIZE (bdesc_special_args);
27338 if (d->code == fcode)
27339 return ix86_expand_special_args_builtin (d, exp, target);
27341 for (i = 0, d = bdesc_args;
27342 i < ARRAY_SIZE (bdesc_args);
27344 if (d->code == fcode)
27347 case IX86_BUILTIN_FABSQ:
27348 case IX86_BUILTIN_COPYSIGNQ:
27350 /* Emit a normal call if SSE2 isn't available. */
27351 return expand_call (exp, target, ignore);
27353 return ix86_expand_args_builtin (d, exp, target);
27356 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27357 if (d->code == fcode)
27358 return ix86_expand_sse_comi (d, exp, target);
27360 for (i = 0, d = bdesc_pcmpestr;
27361 i < ARRAY_SIZE (bdesc_pcmpestr);
27363 if (d->code == fcode)
27364 return ix86_expand_sse_pcmpestr (d, exp, target);
27366 for (i = 0, d = bdesc_pcmpistr;
27367 i < ARRAY_SIZE (bdesc_pcmpistr);
27369 if (d->code == fcode)
27370 return ix86_expand_sse_pcmpistr (d, exp, target);
27372 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27373 if (d->code == fcode)
27374 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27375 (enum ix86_builtin_func_type)
27376 d->flag, d->comparison);
27378 gcc_unreachable ();
27381 /* Returns a function decl for a vectorized version of the builtin function
27382 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27383 if it is not available. */
27386 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27389 enum machine_mode in_mode, out_mode;
27391 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27393 if (TREE_CODE (type_out) != VECTOR_TYPE
27394 || TREE_CODE (type_in) != VECTOR_TYPE
27395 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27398 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27399 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27400 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27401 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27405 case BUILT_IN_SQRT:
27406 if (out_mode == DFmode && in_mode == DFmode)
27408 if (out_n == 2 && in_n == 2)
27409 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27410 else if (out_n == 4 && in_n == 4)
27411 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27415 case BUILT_IN_SQRTF:
27416 if (out_mode == SFmode && in_mode == SFmode)
27418 if (out_n == 4 && in_n == 4)
27419 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27420 else if (out_n == 8 && in_n == 8)
27421 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27425 case BUILT_IN_LRINT:
27426 if (out_mode == SImode && out_n == 4
27427 && in_mode == DFmode && in_n == 2)
27428 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27431 case BUILT_IN_LRINTF:
27432 if (out_mode == SImode && in_mode == SFmode)
27434 if (out_n == 4 && in_n == 4)
27435 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27436 else if (out_n == 8 && in_n == 8)
27437 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27441 case BUILT_IN_COPYSIGN:
27442 if (out_mode == DFmode && in_mode == DFmode)
27444 if (out_n == 2 && in_n == 2)
27445 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27446 else if (out_n == 4 && in_n == 4)
27447 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27451 case BUILT_IN_COPYSIGNF:
27452 if (out_mode == SFmode && in_mode == SFmode)
27454 if (out_n == 4 && in_n == 4)
27455 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27456 else if (out_n == 8 && in_n == 8)
27457 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27462 if (out_mode == DFmode && in_mode == DFmode)
27464 if (out_n == 2 && in_n == 2)
27465 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27466 if (out_n == 4 && in_n == 4)
27467 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27471 case BUILT_IN_FMAF:
27472 if (out_mode == SFmode && in_mode == SFmode)
27474 if (out_n == 4 && in_n == 4)
27475 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27476 if (out_n == 8 && in_n == 8)
27477 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27485 /* Dispatch to a handler for a vectorization library. */
27486 if (ix86_veclib_handler)
27487 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27493 /* Handler for an SVML-style interface to
27494 a library with vectorized intrinsics. */
27497 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27500 tree fntype, new_fndecl, args;
27503 enum machine_mode el_mode, in_mode;
27506 /* The SVML is suitable for unsafe math only. */
27507 if (!flag_unsafe_math_optimizations)
27510 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27511 n = TYPE_VECTOR_SUBPARTS (type_out);
27512 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27513 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27514 if (el_mode != in_mode
27522 case BUILT_IN_LOG10:
27524 case BUILT_IN_TANH:
27526 case BUILT_IN_ATAN:
27527 case BUILT_IN_ATAN2:
27528 case BUILT_IN_ATANH:
27529 case BUILT_IN_CBRT:
27530 case BUILT_IN_SINH:
27532 case BUILT_IN_ASINH:
27533 case BUILT_IN_ASIN:
27534 case BUILT_IN_COSH:
27536 case BUILT_IN_ACOSH:
27537 case BUILT_IN_ACOS:
27538 if (el_mode != DFmode || n != 2)
27542 case BUILT_IN_EXPF:
27543 case BUILT_IN_LOGF:
27544 case BUILT_IN_LOG10F:
27545 case BUILT_IN_POWF:
27546 case BUILT_IN_TANHF:
27547 case BUILT_IN_TANF:
27548 case BUILT_IN_ATANF:
27549 case BUILT_IN_ATAN2F:
27550 case BUILT_IN_ATANHF:
27551 case BUILT_IN_CBRTF:
27552 case BUILT_IN_SINHF:
27553 case BUILT_IN_SINF:
27554 case BUILT_IN_ASINHF:
27555 case BUILT_IN_ASINF:
27556 case BUILT_IN_COSHF:
27557 case BUILT_IN_COSF:
27558 case BUILT_IN_ACOSHF:
27559 case BUILT_IN_ACOSF:
27560 if (el_mode != SFmode || n != 4)
27568 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27570 if (fn == BUILT_IN_LOGF)
27571 strcpy (name, "vmlsLn4");
27572 else if (fn == BUILT_IN_LOG)
27573 strcpy (name, "vmldLn2");
27576 sprintf (name, "vmls%s", bname+10);
27577 name[strlen (name)-1] = '4';
27580 sprintf (name, "vmld%s2", bname+10);
27582 /* Convert to uppercase. */
27586 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27587 args = TREE_CHAIN (args))
27591 fntype = build_function_type_list (type_out, type_in, NULL);
27593 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27595 /* Build a function declaration for the vectorized function. */
27596 new_fndecl = build_decl (BUILTINS_LOCATION,
27597 FUNCTION_DECL, get_identifier (name), fntype);
27598 TREE_PUBLIC (new_fndecl) = 1;
27599 DECL_EXTERNAL (new_fndecl) = 1;
27600 DECL_IS_NOVOPS (new_fndecl) = 1;
27601 TREE_READONLY (new_fndecl) = 1;
27606 /* Handler for an ACML-style interface to
27607 a library with vectorized intrinsics. */
27610 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27612 char name[20] = "__vr.._";
27613 tree fntype, new_fndecl, args;
27616 enum machine_mode el_mode, in_mode;
27619 /* The ACML is 64bits only and suitable for unsafe math only as
27620 it does not correctly support parts of IEEE with the required
27621 precision such as denormals. */
27623 || !flag_unsafe_math_optimizations)
27626 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27627 n = TYPE_VECTOR_SUBPARTS (type_out);
27628 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27629 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27630 if (el_mode != in_mode
27640 case BUILT_IN_LOG2:
27641 case BUILT_IN_LOG10:
27644 if (el_mode != DFmode
27649 case BUILT_IN_SINF:
27650 case BUILT_IN_COSF:
27651 case BUILT_IN_EXPF:
27652 case BUILT_IN_POWF:
27653 case BUILT_IN_LOGF:
27654 case BUILT_IN_LOG2F:
27655 case BUILT_IN_LOG10F:
27658 if (el_mode != SFmode
27667 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27668 sprintf (name + 7, "%s", bname+10);
27671 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27672 args = TREE_CHAIN (args))
27676 fntype = build_function_type_list (type_out, type_in, NULL);
27678 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27680 /* Build a function declaration for the vectorized function. */
27681 new_fndecl = build_decl (BUILTINS_LOCATION,
27682 FUNCTION_DECL, get_identifier (name), fntype);
27683 TREE_PUBLIC (new_fndecl) = 1;
27684 DECL_EXTERNAL (new_fndecl) = 1;
27685 DECL_IS_NOVOPS (new_fndecl) = 1;
27686 TREE_READONLY (new_fndecl) = 1;
27692 /* Returns a decl of a function that implements conversion of an integer vector
27693 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27694 are the types involved when converting according to CODE.
27695 Return NULL_TREE if it is not available. */
27698 ix86_vectorize_builtin_conversion (unsigned int code,
27699 tree dest_type, tree src_type)
27707 switch (TYPE_MODE (src_type))
27710 switch (TYPE_MODE (dest_type))
27713 return (TYPE_UNSIGNED (src_type)
27714 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27715 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27717 return (TYPE_UNSIGNED (src_type)
27719 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27725 switch (TYPE_MODE (dest_type))
27728 return (TYPE_UNSIGNED (src_type)
27730 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27739 case FIX_TRUNC_EXPR:
27740 switch (TYPE_MODE (dest_type))
27743 switch (TYPE_MODE (src_type))
27746 return (TYPE_UNSIGNED (dest_type)
27748 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27750 return (TYPE_UNSIGNED (dest_type)
27752 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27759 switch (TYPE_MODE (src_type))
27762 return (TYPE_UNSIGNED (dest_type)
27764 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27781 /* Returns a code for a target-specific builtin that implements
27782 reciprocal of the function, or NULL_TREE if not available. */
27785 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27786 bool sqrt ATTRIBUTE_UNUSED)
27788 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27789 && flag_finite_math_only && !flag_trapping_math
27790 && flag_unsafe_math_optimizations))
27794 /* Machine dependent builtins. */
27797 /* Vectorized version of sqrt to rsqrt conversion. */
27798 case IX86_BUILTIN_SQRTPS_NR:
27799 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27801 case IX86_BUILTIN_SQRTPS_NR256:
27802 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27808 /* Normal builtins. */
27811 /* Sqrt to rsqrt conversion. */
27812 case BUILT_IN_SQRTF:
27813 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27820 /* Helper for avx_vpermilps256_operand et al. This is also used by
27821 the expansion functions to turn the parallel back into a mask.
27822 The return value is 0 for no match and the imm8+1 for a match. */
27825 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27827 unsigned i, nelt = GET_MODE_NUNITS (mode);
27829 unsigned char ipar[8];
27831 if (XVECLEN (par, 0) != (int) nelt)
27834 /* Validate that all of the elements are constants, and not totally
27835 out of range. Copy the data into an integral array to make the
27836 subsequent checks easier. */
27837 for (i = 0; i < nelt; ++i)
27839 rtx er = XVECEXP (par, 0, i);
27840 unsigned HOST_WIDE_INT ei;
27842 if (!CONST_INT_P (er))
27853 /* In the 256-bit DFmode case, we can only move elements within
27855 for (i = 0; i < 2; ++i)
27859 mask |= ipar[i] << i;
27861 for (i = 2; i < 4; ++i)
27865 mask |= (ipar[i] - 2) << i;
27870 /* In the 256-bit SFmode case, we have full freedom of movement
27871 within the low 128-bit lane, but the high 128-bit lane must
27872 mirror the exact same pattern. */
27873 for (i = 0; i < 4; ++i)
27874 if (ipar[i] + 4 != ipar[i + 4])
27881 /* In the 128-bit case, we've full freedom in the placement of
27882 the elements from the source operand. */
27883 for (i = 0; i < nelt; ++i)
27884 mask |= ipar[i] << (i * (nelt / 2));
27888 gcc_unreachable ();
27891 /* Make sure success has a non-zero value by adding one. */
27895 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27896 the expansion functions to turn the parallel back into a mask.
27897 The return value is 0 for no match and the imm8+1 for a match. */
27900 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27902 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27904 unsigned char ipar[8];
27906 if (XVECLEN (par, 0) != (int) nelt)
27909 /* Validate that all of the elements are constants, and not totally
27910 out of range. Copy the data into an integral array to make the
27911 subsequent checks easier. */
27912 for (i = 0; i < nelt; ++i)
27914 rtx er = XVECEXP (par, 0, i);
27915 unsigned HOST_WIDE_INT ei;
27917 if (!CONST_INT_P (er))
27920 if (ei >= 2 * nelt)
27925 /* Validate that the halves of the permute are halves. */
27926 for (i = 0; i < nelt2 - 1; ++i)
27927 if (ipar[i] + 1 != ipar[i + 1])
27929 for (i = nelt2; i < nelt - 1; ++i)
27930 if (ipar[i] + 1 != ipar[i + 1])
27933 /* Reconstruct the mask. */
27934 for (i = 0; i < 2; ++i)
27936 unsigned e = ipar[i * nelt2];
27940 mask |= e << (i * 4);
27943 /* Make sure success has a non-zero value by adding one. */
27948 /* Store OPERAND to the memory after reload is completed. This means
27949 that we can't easily use assign_stack_local. */
27951 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27955 gcc_assert (reload_completed);
27956 if (ix86_using_red_zone ())
27958 result = gen_rtx_MEM (mode,
27959 gen_rtx_PLUS (Pmode,
27961 GEN_INT (-RED_ZONE_SIZE)));
27962 emit_move_insn (result, operand);
27964 else if (TARGET_64BIT)
27970 operand = gen_lowpart (DImode, operand);
27974 gen_rtx_SET (VOIDmode,
27975 gen_rtx_MEM (DImode,
27976 gen_rtx_PRE_DEC (DImode,
27977 stack_pointer_rtx)),
27981 gcc_unreachable ();
27983 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27992 split_double_mode (mode, &operand, 1, operands, operands + 1);
27994 gen_rtx_SET (VOIDmode,
27995 gen_rtx_MEM (SImode,
27996 gen_rtx_PRE_DEC (Pmode,
27997 stack_pointer_rtx)),
28000 gen_rtx_SET (VOIDmode,
28001 gen_rtx_MEM (SImode,
28002 gen_rtx_PRE_DEC (Pmode,
28003 stack_pointer_rtx)),
28008 /* Store HImodes as SImodes. */
28009 operand = gen_lowpart (SImode, operand);
28013 gen_rtx_SET (VOIDmode,
28014 gen_rtx_MEM (GET_MODE (operand),
28015 gen_rtx_PRE_DEC (SImode,
28016 stack_pointer_rtx)),
28020 gcc_unreachable ();
28022 result = gen_rtx_MEM (mode, stack_pointer_rtx);
28027 /* Free operand from the memory. */
28029 ix86_free_from_memory (enum machine_mode mode)
28031 if (!ix86_using_red_zone ())
28035 if (mode == DImode || TARGET_64BIT)
28039 /* Use LEA to deallocate stack space. In peephole2 it will be converted
28040 to pop or add instruction if registers are available. */
28041 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
28042 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
28047 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
28048 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
28050 static const reg_class_t *
28051 i386_ira_cover_classes (void)
28053 static const reg_class_t sse_fpmath_classes[] = {
28054 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
28056 static const reg_class_t no_sse_fpmath_classes[] = {
28057 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
28060 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
28063 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
28065 Put float CONST_DOUBLE in the constant pool instead of fp regs.
28066 QImode must go into class Q_REGS.
28067 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
28068 movdf to do mem-to-mem moves through integer regs. */
28071 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
28073 enum machine_mode mode = GET_MODE (x);
28075 /* We're only allowed to return a subclass of CLASS. Many of the
28076 following checks fail for NO_REGS, so eliminate that early. */
28077 if (regclass == NO_REGS)
28080 /* All classes can load zeros. */
28081 if (x == CONST0_RTX (mode))
28084 /* Force constants into memory if we are loading a (nonzero) constant into
28085 an MMX or SSE register. This is because there are no MMX/SSE instructions
28086 to load from a constant. */
28088 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
28091 /* Prefer SSE regs only, if we can use them for math. */
28092 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
28093 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
28095 /* Floating-point constants need more complex checks. */
28096 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
28098 /* General regs can load everything. */
28099 if (reg_class_subset_p (regclass, GENERAL_REGS))
28102 /* Floats can load 0 and 1 plus some others. Note that we eliminated
28103 zero above. We only want to wind up preferring 80387 registers if
28104 we plan on doing computation with them. */
28106 && standard_80387_constant_p (x))
28108 /* Limit class to non-sse. */
28109 if (regclass == FLOAT_SSE_REGS)
28111 if (regclass == FP_TOP_SSE_REGS)
28113 if (regclass == FP_SECOND_SSE_REGS)
28114 return FP_SECOND_REG;
28115 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
28122 /* Generally when we see PLUS here, it's the function invariant
28123 (plus soft-fp const_int). Which can only be computed into general
28125 if (GET_CODE (x) == PLUS)
28126 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
28128 /* QImode constants are easy to load, but non-constant QImode data
28129 must go into Q_REGS. */
28130 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
28132 if (reg_class_subset_p (regclass, Q_REGS))
28134 if (reg_class_subset_p (Q_REGS, regclass))
28142 /* Discourage putting floating-point values in SSE registers unless
28143 SSE math is being used, and likewise for the 387 registers. */
28145 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
28147 enum machine_mode mode = GET_MODE (x);
28149 /* Restrict the output reload class to the register bank that we are doing
28150 math on. If we would like not to return a subset of CLASS, reject this
28151 alternative: if reload cannot do this, it will still use its choice. */
28152 mode = GET_MODE (x);
28153 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
28154 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
28156 if (X87_FLOAT_MODE_P (mode))
28158 if (regclass == FP_TOP_SSE_REGS)
28160 else if (regclass == FP_SECOND_SSE_REGS)
28161 return FP_SECOND_REG;
28163 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
28170 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
28171 enum machine_mode mode,
28172 secondary_reload_info *sri ATTRIBUTE_UNUSED)
28174 /* QImode spills from non-QI registers require
28175 intermediate register on 32bit targets. */
28176 if (!in_p && mode == QImode && !TARGET_64BIT
28177 && (rclass == GENERAL_REGS
28178 || rclass == LEGACY_REGS
28179 || rclass == INDEX_REGS))
28188 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
28189 regno = true_regnum (x);
28191 /* Return Q_REGS if the operand is in memory. */
28199 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
28202 ix86_class_likely_spilled_p (reg_class_t rclass)
28213 case SSE_FIRST_REG:
28215 case FP_SECOND_REG:
28225 /* If we are copying between general and FP registers, we need a memory
28226 location. The same is true for SSE and MMX registers.
28228 To optimize register_move_cost performance, allow inline variant.
28230 The macro can't work reliably when one of the CLASSES is class containing
28231 registers from multiple units (SSE, MMX, integer). We avoid this by never
28232 combining those units in single alternative in the machine description.
28233 Ensure that this constraint holds to avoid unexpected surprises.
28235 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
28236 enforce these sanity checks. */
28239 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28240 enum machine_mode mode, int strict)
28242 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
28243 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
28244 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
28245 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
28246 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
28247 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
28249 gcc_assert (!strict);
28253 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
28256 /* ??? This is a lie. We do have moves between mmx/general, and for
28257 mmx/sse2. But by saying we need secondary memory we discourage the
28258 register allocator from using the mmx registers unless needed. */
28259 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
28262 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28264 /* SSE1 doesn't have any direct moves from other classes. */
28268 /* If the target says that inter-unit moves are more expensive
28269 than moving through memory, then don't generate them. */
28270 if (!TARGET_INTER_UNIT_MOVES)
28273 /* Between SSE and general, we have moves no larger than word size. */
28274 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
28282 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
28283 enum machine_mode mode, int strict)
28285 return inline_secondary_memory_needed (class1, class2, mode, strict);
28288 /* Return true if the registers in CLASS cannot represent the change from
28289 modes FROM to TO. */
28292 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
28293 enum reg_class regclass)
28298 /* x87 registers can't do subreg at all, as all values are reformatted
28299 to extended precision. */
28300 if (MAYBE_FLOAT_CLASS_P (regclass))
28303 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
28305 /* Vector registers do not support QI or HImode loads. If we don't
28306 disallow a change to these modes, reload will assume it's ok to
28307 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
28308 the vec_dupv4hi pattern. */
28309 if (GET_MODE_SIZE (from) < 4)
28312 /* Vector registers do not support subreg with nonzero offsets, which
28313 are otherwise valid for integer registers. Since we can't see
28314 whether we have a nonzero offset from here, prohibit all
28315 nonparadoxical subregs changing size. */
28316 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
28323 /* Return the cost of moving data of mode M between a
28324 register and memory. A value of 2 is the default; this cost is
28325 relative to those in `REGISTER_MOVE_COST'.
28327 This function is used extensively by register_move_cost that is used to
28328 build tables at startup. Make it inline in this case.
28329 When IN is 2, return maximum of in and out move cost.
28331 If moving between registers and memory is more expensive than
28332 between two registers, you should define this macro to express the
28335 Model also increased moving costs of QImode registers in non
28339 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28343 if (FLOAT_CLASS_P (regclass))
28361 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28362 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28364 if (SSE_CLASS_P (regclass))
28367 switch (GET_MODE_SIZE (mode))
28382 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28383 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28385 if (MMX_CLASS_P (regclass))
28388 switch (GET_MODE_SIZE (mode))
28400 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28401 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28403 switch (GET_MODE_SIZE (mode))
28406 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28409 return ix86_cost->int_store[0];
28410 if (TARGET_PARTIAL_REG_DEPENDENCY
28411 && optimize_function_for_speed_p (cfun))
28412 cost = ix86_cost->movzbl_load;
28414 cost = ix86_cost->int_load[0];
28416 return MAX (cost, ix86_cost->int_store[0]);
28422 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28424 return ix86_cost->movzbl_load;
28426 return ix86_cost->int_store[0] + 4;
28431 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28432 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28434 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28435 if (mode == TFmode)
28438 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28440 cost = ix86_cost->int_load[2];
28442 cost = ix86_cost->int_store[2];
28443 return (cost * (((int) GET_MODE_SIZE (mode)
28444 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28449 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28452 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28456 /* Return the cost of moving data from a register in class CLASS1 to
28457 one in class CLASS2.
28459 It is not required that the cost always equal 2 when FROM is the same as TO;
28460 on some machines it is expensive to move between registers if they are not
28461 general registers. */
28464 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28465 reg_class_t class2_i)
28467 enum reg_class class1 = (enum reg_class) class1_i;
28468 enum reg_class class2 = (enum reg_class) class2_i;
28470 /* In case we require secondary memory, compute cost of the store followed
28471 by load. In order to avoid bad register allocation choices, we need
28472 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28474 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28478 cost += inline_memory_move_cost (mode, class1, 2);
28479 cost += inline_memory_move_cost (mode, class2, 2);
28481 /* In case of copying from general_purpose_register we may emit multiple
28482 stores followed by single load causing memory size mismatch stall.
28483 Count this as arbitrarily high cost of 20. */
28484 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28487 /* In the case of FP/MMX moves, the registers actually overlap, and we
28488 have to switch modes in order to treat them differently. */
28489 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28490 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28496 /* Moves between SSE/MMX and integer unit are expensive. */
28497 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28498 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28500 /* ??? By keeping returned value relatively high, we limit the number
28501 of moves between integer and MMX/SSE registers for all targets.
28502 Additionally, high value prevents problem with x86_modes_tieable_p(),
28503 where integer modes in MMX/SSE registers are not tieable
28504 because of missing QImode and HImode moves to, from or between
28505 MMX/SSE registers. */
28506 return MAX (8, ix86_cost->mmxsse_to_integer);
28508 if (MAYBE_FLOAT_CLASS_P (class1))
28509 return ix86_cost->fp_move;
28510 if (MAYBE_SSE_CLASS_P (class1))
28511 return ix86_cost->sse_move;
28512 if (MAYBE_MMX_CLASS_P (class1))
28513 return ix86_cost->mmx_move;
28517 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28520 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28522 /* Flags and only flags can only hold CCmode values. */
28523 if (CC_REGNO_P (regno))
28524 return GET_MODE_CLASS (mode) == MODE_CC;
28525 if (GET_MODE_CLASS (mode) == MODE_CC
28526 || GET_MODE_CLASS (mode) == MODE_RANDOM
28527 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28529 if (FP_REGNO_P (regno))
28530 return VALID_FP_MODE_P (mode);
28531 if (SSE_REGNO_P (regno))
28533 /* We implement the move patterns for all vector modes into and
28534 out of SSE registers, even when no operation instructions
28535 are available. OImode move is available only when AVX is
28537 return ((TARGET_AVX && mode == OImode)
28538 || VALID_AVX256_REG_MODE (mode)
28539 || VALID_SSE_REG_MODE (mode)
28540 || VALID_SSE2_REG_MODE (mode)
28541 || VALID_MMX_REG_MODE (mode)
28542 || VALID_MMX_REG_MODE_3DNOW (mode));
28544 if (MMX_REGNO_P (regno))
28546 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28547 so if the register is available at all, then we can move data of
28548 the given mode into or out of it. */
28549 return (VALID_MMX_REG_MODE (mode)
28550 || VALID_MMX_REG_MODE_3DNOW (mode));
28553 if (mode == QImode)
28555 /* Take care for QImode values - they can be in non-QI regs,
28556 but then they do cause partial register stalls. */
28557 if (regno <= BX_REG || TARGET_64BIT)
28559 if (!TARGET_PARTIAL_REG_STALL)
28561 return reload_in_progress || reload_completed;
28563 /* We handle both integer and floats in the general purpose registers. */
28564 else if (VALID_INT_MODE_P (mode))
28566 else if (VALID_FP_MODE_P (mode))
28568 else if (VALID_DFP_MODE_P (mode))
28570 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28571 on to use that value in smaller contexts, this can easily force a
28572 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28573 supporting DImode, allow it. */
28574 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28580 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28581 tieable integer mode. */
28584 ix86_tieable_integer_mode_p (enum machine_mode mode)
28593 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28596 return TARGET_64BIT;
28603 /* Return true if MODE1 is accessible in a register that can hold MODE2
28604 without copying. That is, all register classes that can hold MODE2
28605 can also hold MODE1. */
28608 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28610 if (mode1 == mode2)
28613 if (ix86_tieable_integer_mode_p (mode1)
28614 && ix86_tieable_integer_mode_p (mode2))
28617 /* MODE2 being XFmode implies fp stack or general regs, which means we
28618 can tie any smaller floating point modes to it. Note that we do not
28619 tie this with TFmode. */
28620 if (mode2 == XFmode)
28621 return mode1 == SFmode || mode1 == DFmode;
28623 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28624 that we can tie it with SFmode. */
28625 if (mode2 == DFmode)
28626 return mode1 == SFmode;
28628 /* If MODE2 is only appropriate for an SSE register, then tie with
28629 any other mode acceptable to SSE registers. */
28630 if (GET_MODE_SIZE (mode2) == 16
28631 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28632 return (GET_MODE_SIZE (mode1) == 16
28633 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28635 /* If MODE2 is appropriate for an MMX register, then tie
28636 with any other mode acceptable to MMX registers. */
28637 if (GET_MODE_SIZE (mode2) == 8
28638 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28639 return (GET_MODE_SIZE (mode1) == 8
28640 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28645 /* Compute a (partial) cost for rtx X. Return true if the complete
28646 cost has been computed, and false if subexpressions should be
28647 scanned. In either case, *TOTAL contains the cost result. */
28650 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28652 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28653 enum machine_mode mode = GET_MODE (x);
28654 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28662 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28664 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28666 else if (flag_pic && SYMBOLIC_CONST (x)
28668 || (!GET_CODE (x) != LABEL_REF
28669 && (GET_CODE (x) != SYMBOL_REF
28670 || !SYMBOL_REF_LOCAL_P (x)))))
28677 if (mode == VOIDmode)
28680 switch (standard_80387_constant_p (x))
28685 default: /* Other constants */
28690 /* Start with (MEM (SYMBOL_REF)), since that's where
28691 it'll probably end up. Add a penalty for size. */
28692 *total = (COSTS_N_INSNS (1)
28693 + (flag_pic != 0 && !TARGET_64BIT)
28694 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28700 /* The zero extensions is often completely free on x86_64, so make
28701 it as cheap as possible. */
28702 if (TARGET_64BIT && mode == DImode
28703 && GET_MODE (XEXP (x, 0)) == SImode)
28705 else if (TARGET_ZERO_EXTEND_WITH_AND)
28706 *total = cost->add;
28708 *total = cost->movzx;
28712 *total = cost->movsx;
28716 if (CONST_INT_P (XEXP (x, 1))
28717 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28719 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28722 *total = cost->add;
28725 if ((value == 2 || value == 3)
28726 && cost->lea <= cost->shift_const)
28728 *total = cost->lea;
28738 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28740 if (CONST_INT_P (XEXP (x, 1)))
28742 if (INTVAL (XEXP (x, 1)) > 32)
28743 *total = cost->shift_const + COSTS_N_INSNS (2);
28745 *total = cost->shift_const * 2;
28749 if (GET_CODE (XEXP (x, 1)) == AND)
28750 *total = cost->shift_var * 2;
28752 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28757 if (CONST_INT_P (XEXP (x, 1)))
28758 *total = cost->shift_const;
28760 *total = cost->shift_var;
28768 gcc_assert (FLOAT_MODE_P (mode));
28769 gcc_assert (TARGET_FMA || TARGET_FMA4);
28771 /* ??? SSE scalar/vector cost should be used here. */
28772 /* ??? Bald assumption that fma has the same cost as fmul. */
28773 *total = cost->fmul;
28774 *total += rtx_cost (XEXP (x, 1), FMA, speed);
28776 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
28778 if (GET_CODE (sub) == NEG)
28780 *total += rtx_cost (sub, FMA, speed);
28783 if (GET_CODE (sub) == NEG)
28785 *total += rtx_cost (sub, FMA, speed);
28790 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28792 /* ??? SSE scalar cost should be used here. */
28793 *total = cost->fmul;
28796 else if (X87_FLOAT_MODE_P (mode))
28798 *total = cost->fmul;
28801 else if (FLOAT_MODE_P (mode))
28803 /* ??? SSE vector cost should be used here. */
28804 *total = cost->fmul;
28809 rtx op0 = XEXP (x, 0);
28810 rtx op1 = XEXP (x, 1);
28812 if (CONST_INT_P (XEXP (x, 1)))
28814 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28815 for (nbits = 0; value != 0; value &= value - 1)
28819 /* This is arbitrary. */
28822 /* Compute costs correctly for widening multiplication. */
28823 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28824 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28825 == GET_MODE_SIZE (mode))
28827 int is_mulwiden = 0;
28828 enum machine_mode inner_mode = GET_MODE (op0);
28830 if (GET_CODE (op0) == GET_CODE (op1))
28831 is_mulwiden = 1, op1 = XEXP (op1, 0);
28832 else if (CONST_INT_P (op1))
28834 if (GET_CODE (op0) == SIGN_EXTEND)
28835 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28838 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28842 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28845 *total = (cost->mult_init[MODE_INDEX (mode)]
28846 + nbits * cost->mult_bit
28847 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28856 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28857 /* ??? SSE cost should be used here. */
28858 *total = cost->fdiv;
28859 else if (X87_FLOAT_MODE_P (mode))
28860 *total = cost->fdiv;
28861 else if (FLOAT_MODE_P (mode))
28862 /* ??? SSE vector cost should be used here. */
28863 *total = cost->fdiv;
28865 *total = cost->divide[MODE_INDEX (mode)];
28869 if (GET_MODE_CLASS (mode) == MODE_INT
28870 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28872 if (GET_CODE (XEXP (x, 0)) == PLUS
28873 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28874 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28875 && CONSTANT_P (XEXP (x, 1)))
28877 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28878 if (val == 2 || val == 4 || val == 8)
28880 *total = cost->lea;
28881 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28882 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28883 outer_code, speed);
28884 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28888 else if (GET_CODE (XEXP (x, 0)) == MULT
28889 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28891 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28892 if (val == 2 || val == 4 || val == 8)
28894 *total = cost->lea;
28895 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28896 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28900 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28902 *total = cost->lea;
28903 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28904 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28905 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28912 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28914 /* ??? SSE cost should be used here. */
28915 *total = cost->fadd;
28918 else if (X87_FLOAT_MODE_P (mode))
28920 *total = cost->fadd;
28923 else if (FLOAT_MODE_P (mode))
28925 /* ??? SSE vector cost should be used here. */
28926 *total = cost->fadd;
28934 if (!TARGET_64BIT && mode == DImode)
28936 *total = (cost->add * 2
28937 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28938 << (GET_MODE (XEXP (x, 0)) != DImode))
28939 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28940 << (GET_MODE (XEXP (x, 1)) != DImode)));
28946 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28948 /* ??? SSE cost should be used here. */
28949 *total = cost->fchs;
28952 else if (X87_FLOAT_MODE_P (mode))
28954 *total = cost->fchs;
28957 else if (FLOAT_MODE_P (mode))
28959 /* ??? SSE vector cost should be used here. */
28960 *total = cost->fchs;
28966 if (!TARGET_64BIT && mode == DImode)
28967 *total = cost->add * 2;
28969 *total = cost->add;
28973 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28974 && XEXP (XEXP (x, 0), 1) == const1_rtx
28975 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28976 && XEXP (x, 1) == const0_rtx)
28978 /* This kind of construct is implemented using test[bwl].
28979 Treat it as if we had an AND. */
28980 *total = (cost->add
28981 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28982 + rtx_cost (const1_rtx, outer_code, speed));
28988 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28993 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28994 /* ??? SSE cost should be used here. */
28995 *total = cost->fabs;
28996 else if (X87_FLOAT_MODE_P (mode))
28997 *total = cost->fabs;
28998 else if (FLOAT_MODE_P (mode))
28999 /* ??? SSE vector cost should be used here. */
29000 *total = cost->fabs;
29004 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
29005 /* ??? SSE cost should be used here. */
29006 *total = cost->fsqrt;
29007 else if (X87_FLOAT_MODE_P (mode))
29008 *total = cost->fsqrt;
29009 else if (FLOAT_MODE_P (mode))
29010 /* ??? SSE vector cost should be used here. */
29011 *total = cost->fsqrt;
29015 if (XINT (x, 1) == UNSPEC_TP)
29022 case VEC_DUPLICATE:
29023 /* ??? Assume all of these vector manipulation patterns are
29024 recognizable. In which case they all pretty much have the
29026 *total = COSTS_N_INSNS (1);
29036 static int current_machopic_label_num;
29038 /* Given a symbol name and its associated stub, write out the
29039 definition of the stub. */
29042 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29044 unsigned int length;
29045 char *binder_name, *symbol_name, lazy_ptr_name[32];
29046 int label = ++current_machopic_label_num;
29048 /* For 64-bit we shouldn't get here. */
29049 gcc_assert (!TARGET_64BIT);
29051 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29052 symb = targetm.strip_name_encoding (symb);
29054 length = strlen (stub);
29055 binder_name = XALLOCAVEC (char, length + 32);
29056 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
29058 length = strlen (symb);
29059 symbol_name = XALLOCAVEC (char, length + 32);
29060 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29062 sprintf (lazy_ptr_name, "L%d$lz", label);
29064 if (MACHOPIC_ATT_STUB)
29065 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
29066 else if (MACHOPIC_PURE)
29068 if (TARGET_DEEP_BRANCH_PREDICTION)
29069 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
29071 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
29074 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
29076 fprintf (file, "%s:\n", stub);
29077 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29079 if (MACHOPIC_ATT_STUB)
29081 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
29083 else if (MACHOPIC_PURE)
29086 if (TARGET_DEEP_BRANCH_PREDICTION)
29088 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29089 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
29090 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
29091 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
29095 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
29096 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
29097 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
29099 fprintf (file, "\tjmp\t*%%ecx\n");
29102 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
29104 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
29105 it needs no stub-binding-helper. */
29106 if (MACHOPIC_ATT_STUB)
29109 fprintf (file, "%s:\n", binder_name);
29113 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
29114 fprintf (file, "\tpushl\t%%ecx\n");
29117 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
29119 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
29121 /* N.B. Keep the correspondence of these
29122 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
29123 old-pic/new-pic/non-pic stubs; altering this will break
29124 compatibility with existing dylibs. */
29128 if (TARGET_DEEP_BRANCH_PREDICTION)
29129 /* 25-byte PIC stub using "CALL get_pc_thunk". */
29130 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
29132 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
29133 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29136 /* 16-byte -mdynamic-no-pic stub. */
29137 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
29139 fprintf (file, "%s:\n", lazy_ptr_name);
29140 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29141 fprintf (file, ASM_LONG "%s\n", binder_name);
29143 #endif /* TARGET_MACHO */
29145 /* Order the registers for register allocator. */
29148 x86_order_regs_for_local_alloc (void)
29153 /* First allocate the local general purpose registers. */
29154 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29155 if (GENERAL_REGNO_P (i) && call_used_regs[i])
29156 reg_alloc_order [pos++] = i;
29158 /* Global general purpose registers. */
29159 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
29160 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
29161 reg_alloc_order [pos++] = i;
29163 /* x87 registers come first in case we are doing FP math
29165 if (!TARGET_SSE_MATH)
29166 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29167 reg_alloc_order [pos++] = i;
29169 /* SSE registers. */
29170 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
29171 reg_alloc_order [pos++] = i;
29172 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
29173 reg_alloc_order [pos++] = i;
29175 /* x87 registers. */
29176 if (TARGET_SSE_MATH)
29177 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
29178 reg_alloc_order [pos++] = i;
29180 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
29181 reg_alloc_order [pos++] = i;
29183 /* Initialize the rest of array as we do not allocate some registers
29185 while (pos < FIRST_PSEUDO_REGISTER)
29186 reg_alloc_order [pos++] = 0;
29189 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
29190 in struct attribute_spec handler. */
29192 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
29194 int flags ATTRIBUTE_UNUSED,
29195 bool *no_add_attrs)
29197 if (TREE_CODE (*node) != FUNCTION_TYPE
29198 && TREE_CODE (*node) != METHOD_TYPE
29199 && TREE_CODE (*node) != FIELD_DECL
29200 && TREE_CODE (*node) != TYPE_DECL)
29202 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29204 *no_add_attrs = true;
29209 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
29211 *no_add_attrs = true;
29214 if (is_attribute_p ("callee_pop_aggregate_return", name))
29218 cst = TREE_VALUE (args);
29219 if (TREE_CODE (cst) != INTEGER_CST)
29221 warning (OPT_Wattributes,
29222 "%qE attribute requires an integer constant argument",
29224 *no_add_attrs = true;
29226 else if (compare_tree_int (cst, 0) != 0
29227 && compare_tree_int (cst, 1) != 0)
29229 warning (OPT_Wattributes,
29230 "argument to %qE attribute is neither zero, nor one",
29232 *no_add_attrs = true;
29241 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
29242 struct attribute_spec.handler. */
29244 ix86_handle_abi_attribute (tree *node, tree name,
29245 tree args ATTRIBUTE_UNUSED,
29246 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29248 if (TREE_CODE (*node) != FUNCTION_TYPE
29249 && TREE_CODE (*node) != METHOD_TYPE
29250 && TREE_CODE (*node) != FIELD_DECL
29251 && TREE_CODE (*node) != TYPE_DECL)
29253 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29255 *no_add_attrs = true;
29260 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
29262 *no_add_attrs = true;
29266 /* Can combine regparm with all attributes but fastcall. */
29267 if (is_attribute_p ("ms_abi", name))
29269 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
29271 error ("ms_abi and sysv_abi attributes are not compatible");
29276 else if (is_attribute_p ("sysv_abi", name))
29278 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
29280 error ("ms_abi and sysv_abi attributes are not compatible");
29289 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29290 struct attribute_spec.handler. */
29292 ix86_handle_struct_attribute (tree *node, tree name,
29293 tree args ATTRIBUTE_UNUSED,
29294 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29297 if (DECL_P (*node))
29299 if (TREE_CODE (*node) == TYPE_DECL)
29300 type = &TREE_TYPE (*node);
29305 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29306 || TREE_CODE (*type) == UNION_TYPE)))
29308 warning (OPT_Wattributes, "%qE attribute ignored",
29310 *no_add_attrs = true;
29313 else if ((is_attribute_p ("ms_struct", name)
29314 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29315 || ((is_attribute_p ("gcc_struct", name)
29316 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29318 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29320 *no_add_attrs = true;
29327 ix86_handle_fndecl_attribute (tree *node, tree name,
29328 tree args ATTRIBUTE_UNUSED,
29329 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29331 if (TREE_CODE (*node) != FUNCTION_DECL)
29333 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29335 *no_add_attrs = true;
29341 ix86_ms_bitfield_layout_p (const_tree record_type)
29343 return ((TARGET_MS_BITFIELD_LAYOUT
29344 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29345 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
29348 /* Returns an expression indicating where the this parameter is
29349 located on entry to the FUNCTION. */
29352 x86_this_parameter (tree function)
29354 tree type = TREE_TYPE (function);
29355 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
29360 const int *parm_regs;
29362 if (ix86_function_type_abi (type) == MS_ABI)
29363 parm_regs = x86_64_ms_abi_int_parameter_registers;
29365 parm_regs = x86_64_int_parameter_registers;
29366 return gen_rtx_REG (DImode, parm_regs[aggr]);
29369 nregs = ix86_function_regparm (type, function);
29371 if (nregs > 0 && !stdarg_p (type))
29375 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
29376 regno = aggr ? DX_REG : CX_REG;
29377 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
29381 return gen_rtx_MEM (SImode,
29382 plus_constant (stack_pointer_rtx, 4));
29391 return gen_rtx_MEM (SImode,
29392 plus_constant (stack_pointer_rtx, 4));
29395 return gen_rtx_REG (SImode, regno);
29398 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29401 /* Determine whether x86_output_mi_thunk can succeed. */
29404 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29405 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29406 HOST_WIDE_INT vcall_offset, const_tree function)
29408 /* 64-bit can handle anything. */
29412 /* For 32-bit, everything's fine if we have one free register. */
29413 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29416 /* Need a free register for vcall_offset. */
29420 /* Need a free register for GOT references. */
29421 if (flag_pic && !targetm.binds_local_p (function))
29424 /* Otherwise ok. */
29428 /* Output the assembler code for a thunk function. THUNK_DECL is the
29429 declaration for the thunk function itself, FUNCTION is the decl for
29430 the target function. DELTA is an immediate constant offset to be
29431 added to THIS. If VCALL_OFFSET is nonzero, the word at
29432 *(*this + vcall_offset) should be added to THIS. */
29435 x86_output_mi_thunk (FILE *file,
29436 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29437 HOST_WIDE_INT vcall_offset, tree function)
29440 rtx this_param = x86_this_parameter (function);
29443 /* Make sure unwind info is emitted for the thunk if needed. */
29444 final_start_function (emit_barrier (), file, 1);
29446 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29447 pull it in now and let DELTA benefit. */
29448 if (REG_P (this_param))
29449 this_reg = this_param;
29450 else if (vcall_offset)
29452 /* Put the this parameter into %eax. */
29453 xops[0] = this_param;
29454 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29455 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29458 this_reg = NULL_RTX;
29460 /* Adjust the this parameter by a fixed constant. */
29463 xops[0] = GEN_INT (delta);
29464 xops[1] = this_reg ? this_reg : this_param;
29467 if (!x86_64_general_operand (xops[0], DImode))
29469 tmp = gen_rtx_REG (DImode, R10_REG);
29471 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29473 xops[1] = this_param;
29475 if (x86_maybe_negate_const_int (&xops[0], DImode))
29476 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29478 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29480 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29481 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29483 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29486 /* Adjust the this parameter by a value stored in the vtable. */
29490 tmp = gen_rtx_REG (DImode, R10_REG);
29493 int tmp_regno = CX_REG;
29494 if (lookup_attribute ("fastcall",
29495 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29496 || lookup_attribute ("thiscall",
29497 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29498 tmp_regno = AX_REG;
29499 tmp = gen_rtx_REG (SImode, tmp_regno);
29502 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29504 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29506 /* Adjust the this parameter. */
29507 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29508 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29510 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29511 xops[0] = GEN_INT (vcall_offset);
29513 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29514 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29516 xops[1] = this_reg;
29517 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29520 /* If necessary, drop THIS back to its stack slot. */
29521 if (this_reg && this_reg != this_param)
29523 xops[0] = this_reg;
29524 xops[1] = this_param;
29525 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29528 xops[0] = XEXP (DECL_RTL (function), 0);
29531 if (!flag_pic || targetm.binds_local_p (function)
29532 || DEFAULT_ABI == MS_ABI)
29533 output_asm_insn ("jmp\t%P0", xops);
29534 /* All thunks should be in the same object as their target,
29535 and thus binds_local_p should be true. */
29536 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29537 gcc_unreachable ();
29540 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29541 tmp = gen_rtx_CONST (Pmode, tmp);
29542 tmp = gen_rtx_MEM (QImode, tmp);
29544 output_asm_insn ("jmp\t%A0", xops);
29549 if (!flag_pic || targetm.binds_local_p (function))
29550 output_asm_insn ("jmp\t%P0", xops);
29555 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29556 if (TARGET_MACHO_BRANCH_ISLANDS)
29557 sym_ref = (gen_rtx_SYMBOL_REF
29559 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29560 tmp = gen_rtx_MEM (QImode, sym_ref);
29562 output_asm_insn ("jmp\t%0", xops);
29565 #endif /* TARGET_MACHO */
29567 tmp = gen_rtx_REG (SImode, CX_REG);
29568 output_set_got (tmp, NULL_RTX);
29571 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29572 output_asm_insn ("jmp\t{*}%1", xops);
29575 final_end_function ();
29579 x86_file_start (void)
29581 default_file_start ();
29583 darwin_file_start ();
29585 if (X86_FILE_START_VERSION_DIRECTIVE)
29586 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29587 if (X86_FILE_START_FLTUSED)
29588 fputs ("\t.global\t__fltused\n", asm_out_file);
29589 if (ix86_asm_dialect == ASM_INTEL)
29590 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29594 x86_field_alignment (tree field, int computed)
29596 enum machine_mode mode;
29597 tree type = TREE_TYPE (field);
29599 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29601 mode = TYPE_MODE (strip_array_types (type));
29602 if (mode == DFmode || mode == DCmode
29603 || GET_MODE_CLASS (mode) == MODE_INT
29604 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29605 return MIN (32, computed);
29609 /* Output assembler code to FILE to increment profiler label # LABELNO
29610 for profiling a function entry. */
29612 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29614 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29619 #ifndef NO_PROFILE_COUNTERS
29620 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29623 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29624 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29626 fprintf (file, "\tcall\t%s\n", mcount_name);
29630 #ifndef NO_PROFILE_COUNTERS
29631 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29634 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29638 #ifndef NO_PROFILE_COUNTERS
29639 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29642 fprintf (file, "\tcall\t%s\n", mcount_name);
29646 /* We don't have exact information about the insn sizes, but we may assume
29647 quite safely that we are informed about all 1 byte insns and memory
29648 address sizes. This is enough to eliminate unnecessary padding in
29652 min_insn_size (rtx insn)
29656 if (!INSN_P (insn) || !active_insn_p (insn))
29659 /* Discard alignments we've emit and jump instructions. */
29660 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29661 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29663 if (JUMP_TABLE_DATA_P (insn))
29666 /* Important case - calls are always 5 bytes.
29667 It is common to have many calls in the row. */
29669 && symbolic_reference_mentioned_p (PATTERN (insn))
29670 && !SIBLING_CALL_P (insn))
29672 len = get_attr_length (insn);
29676 /* For normal instructions we rely on get_attr_length being exact,
29677 with a few exceptions. */
29678 if (!JUMP_P (insn))
29680 enum attr_type type = get_attr_type (insn);
29685 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29686 || asm_noperands (PATTERN (insn)) >= 0)
29693 /* Otherwise trust get_attr_length. */
29697 l = get_attr_length_address (insn);
29698 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29707 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29709 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29713 ix86_avoid_jump_mispredicts (void)
29715 rtx insn, start = get_insns ();
29716 int nbytes = 0, njumps = 0;
29719 /* Look for all minimal intervals of instructions containing 4 jumps.
29720 The intervals are bounded by START and INSN. NBYTES is the total
29721 size of instructions in the interval including INSN and not including
29722 START. When the NBYTES is smaller than 16 bytes, it is possible
29723 that the end of START and INSN ends up in the same 16byte page.
29725 The smallest offset in the page INSN can start is the case where START
29726 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29727 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29729 for (insn = start; insn; insn = NEXT_INSN (insn))
29733 if (LABEL_P (insn))
29735 int align = label_to_alignment (insn);
29736 int max_skip = label_to_max_skip (insn);
29740 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29741 already in the current 16 byte page, because otherwise
29742 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29743 bytes to reach 16 byte boundary. */
29745 || (align <= 3 && max_skip != (1 << align) - 1))
29748 fprintf (dump_file, "Label %i with max_skip %i\n",
29749 INSN_UID (insn), max_skip);
29752 while (nbytes + max_skip >= 16)
29754 start = NEXT_INSN (start);
29755 if ((JUMP_P (start)
29756 && GET_CODE (PATTERN (start)) != ADDR_VEC
29757 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29759 njumps--, isjump = 1;
29762 nbytes -= min_insn_size (start);
29768 min_size = min_insn_size (insn);
29769 nbytes += min_size;
29771 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29772 INSN_UID (insn), min_size);
29774 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29775 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29783 start = NEXT_INSN (start);
29784 if ((JUMP_P (start)
29785 && GET_CODE (PATTERN (start)) != ADDR_VEC
29786 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29788 njumps--, isjump = 1;
29791 nbytes -= min_insn_size (start);
29793 gcc_assert (njumps >= 0);
29795 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29796 INSN_UID (start), INSN_UID (insn), nbytes);
29798 if (njumps == 3 && isjump && nbytes < 16)
29800 int padsize = 15 - nbytes + min_insn_size (insn);
29803 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29804 INSN_UID (insn), padsize);
29805 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29811 /* AMD Athlon works faster
29812 when RET is not destination of conditional jump or directly preceded
29813 by other jump instruction. We avoid the penalty by inserting NOP just
29814 before the RET instructions in such cases. */
29816 ix86_pad_returns (void)
29821 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29823 basic_block bb = e->src;
29824 rtx ret = BB_END (bb);
29826 bool replace = false;
29828 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29829 || optimize_bb_for_size_p (bb))
29831 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29832 if (active_insn_p (prev) || LABEL_P (prev))
29834 if (prev && LABEL_P (prev))
29839 FOR_EACH_EDGE (e, ei, bb->preds)
29840 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29841 && !(e->flags & EDGE_FALLTHRU))
29846 prev = prev_active_insn (ret);
29848 && ((JUMP_P (prev) && any_condjump_p (prev))
29851 /* Empty functions get branch mispredict even when
29852 the jump destination is not visible to us. */
29853 if (!prev && !optimize_function_for_size_p (cfun))
29858 emit_jump_insn_before (gen_return_internal_long (), ret);
29864 /* Count the minimum number of instructions in BB. Return 4 if the
29865 number of instructions >= 4. */
29868 ix86_count_insn_bb (basic_block bb)
29871 int insn_count = 0;
29873 /* Count number of instructions in this block. Return 4 if the number
29874 of instructions >= 4. */
29875 FOR_BB_INSNS (bb, insn)
29877 /* Only happen in exit blocks. */
29879 && GET_CODE (PATTERN (insn)) == RETURN)
29882 if (NONDEBUG_INSN_P (insn)
29883 && GET_CODE (PATTERN (insn)) != USE
29884 && GET_CODE (PATTERN (insn)) != CLOBBER)
29887 if (insn_count >= 4)
29896 /* Count the minimum number of instructions in code path in BB.
29897 Return 4 if the number of instructions >= 4. */
29900 ix86_count_insn (basic_block bb)
29904 int min_prev_count;
29906 /* Only bother counting instructions along paths with no
29907 more than 2 basic blocks between entry and exit. Given
29908 that BB has an edge to exit, determine if a predecessor
29909 of BB has an edge from entry. If so, compute the number
29910 of instructions in the predecessor block. If there
29911 happen to be multiple such blocks, compute the minimum. */
29912 min_prev_count = 4;
29913 FOR_EACH_EDGE (e, ei, bb->preds)
29916 edge_iterator prev_ei;
29918 if (e->src == ENTRY_BLOCK_PTR)
29920 min_prev_count = 0;
29923 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29925 if (prev_e->src == ENTRY_BLOCK_PTR)
29927 int count = ix86_count_insn_bb (e->src);
29928 if (count < min_prev_count)
29929 min_prev_count = count;
29935 if (min_prev_count < 4)
29936 min_prev_count += ix86_count_insn_bb (bb);
29938 return min_prev_count;
29941 /* Pad short funtion to 4 instructions. */
29944 ix86_pad_short_function (void)
29949 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29951 rtx ret = BB_END (e->src);
29952 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29954 int insn_count = ix86_count_insn (e->src);
29956 /* Pad short function. */
29957 if (insn_count < 4)
29961 /* Find epilogue. */
29964 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29965 insn = PREV_INSN (insn);
29970 /* Two NOPs count as one instruction. */
29971 insn_count = 2 * (4 - insn_count);
29972 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29978 /* Implement machine specific optimizations. We implement padding of returns
29979 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29983 /* We are freeing block_for_insn in the toplev to keep compatibility
29984 with old MDEP_REORGS that are not CFG based. Recompute it now. */
29985 compute_bb_for_insn ();
29987 if (optimize && optimize_function_for_speed_p (cfun))
29989 if (TARGET_PAD_SHORT_FUNCTION)
29990 ix86_pad_short_function ();
29991 else if (TARGET_PAD_RETURNS)
29992 ix86_pad_returns ();
29993 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29994 if (TARGET_FOUR_JUMP_LIMIT)
29995 ix86_avoid_jump_mispredicts ();
29999 /* Run the vzeroupper optimization if needed. */
30000 if (TARGET_VZEROUPPER)
30001 move_or_delete_vzeroupper ();
30004 /* Return nonzero when QImode register that must be represented via REX prefix
30007 x86_extended_QIreg_mentioned_p (rtx insn)
30010 extract_insn_cached (insn);
30011 for (i = 0; i < recog_data.n_operands; i++)
30012 if (REG_P (recog_data.operand[i])
30013 && REGNO (recog_data.operand[i]) > BX_REG)
30018 /* Return nonzero when P points to register encoded via REX prefix.
30019 Called via for_each_rtx. */
30021 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
30023 unsigned int regno;
30026 regno = REGNO (*p);
30027 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
30030 /* Return true when INSN mentions register that must be encoded using REX
30033 x86_extended_reg_mentioned_p (rtx insn)
30035 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
30036 extended_reg_mentioned_1, NULL);
30039 /* If profitable, negate (without causing overflow) integer constant
30040 of mode MODE at location LOC. Return true in this case. */
30042 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
30046 if (!CONST_INT_P (*loc))
30052 /* DImode x86_64 constants must fit in 32 bits. */
30053 gcc_assert (x86_64_immediate_operand (*loc, mode));
30064 gcc_unreachable ();
30067 /* Avoid overflows. */
30068 if (mode_signbit_p (mode, *loc))
30071 val = INTVAL (*loc);
30073 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
30074 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
30075 if ((val < 0 && val != -128)
30078 *loc = GEN_INT (-val);
30085 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
30086 optabs would emit if we didn't have TFmode patterns. */
30089 x86_emit_floatuns (rtx operands[2])
30091 rtx neglab, donelab, i0, i1, f0, in, out;
30092 enum machine_mode mode, inmode;
30094 inmode = GET_MODE (operands[1]);
30095 gcc_assert (inmode == SImode || inmode == DImode);
30098 in = force_reg (inmode, operands[1]);
30099 mode = GET_MODE (out);
30100 neglab = gen_label_rtx ();
30101 donelab = gen_label_rtx ();
30102 f0 = gen_reg_rtx (mode);
30104 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
30106 expand_float (out, in, 0);
30108 emit_jump_insn (gen_jump (donelab));
30111 emit_label (neglab);
30113 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
30115 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
30117 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
30119 expand_float (f0, i0, 0);
30121 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
30123 emit_label (donelab);
30126 /* AVX does not support 32-byte integer vector operations,
30127 thus the longest vector we are faced with is V16QImode. */
30128 #define MAX_VECT_LEN 16
30130 struct expand_vec_perm_d
30132 rtx target, op0, op1;
30133 unsigned char perm[MAX_VECT_LEN];
30134 enum machine_mode vmode;
30135 unsigned char nelt;
30139 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
30140 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
30142 /* Get a vector mode of the same size as the original but with elements
30143 twice as wide. This is only guaranteed to apply to integral vectors. */
30145 static inline enum machine_mode
30146 get_mode_wider_vector (enum machine_mode o)
30148 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
30149 enum machine_mode n = GET_MODE_WIDER_MODE (o);
30150 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
30151 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
30155 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30156 with all elements equal to VAR. Return true if successful. */
30159 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
30160 rtx target, rtx val)
30183 /* First attempt to recognize VAL as-is. */
30184 dup = gen_rtx_VEC_DUPLICATE (mode, val);
30185 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
30186 if (recog_memoized (insn) < 0)
30189 /* If that fails, force VAL into a register. */
30192 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
30193 seq = get_insns ();
30196 emit_insn_before (seq, insn);
30198 ok = recog_memoized (insn) >= 0;
30207 if (TARGET_SSE || TARGET_3DNOW_A)
30211 val = gen_lowpart (SImode, val);
30212 x = gen_rtx_TRUNCATE (HImode, val);
30213 x = gen_rtx_VEC_DUPLICATE (mode, x);
30214 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30227 struct expand_vec_perm_d dperm;
30231 memset (&dperm, 0, sizeof (dperm));
30232 dperm.target = target;
30233 dperm.vmode = mode;
30234 dperm.nelt = GET_MODE_NUNITS (mode);
30235 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
30237 /* Extend to SImode using a paradoxical SUBREG. */
30238 tmp1 = gen_reg_rtx (SImode);
30239 emit_move_insn (tmp1, gen_lowpart (SImode, val));
30241 /* Insert the SImode value as low element of a V4SImode vector. */
30242 tmp2 = gen_lowpart (V4SImode, dperm.op0);
30243 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
30245 ok = (expand_vec_perm_1 (&dperm)
30246 || expand_vec_perm_broadcast_1 (&dperm));
30258 /* Replicate the value once into the next wider mode and recurse. */
30260 enum machine_mode smode, wsmode, wvmode;
30263 smode = GET_MODE_INNER (mode);
30264 wvmode = get_mode_wider_vector (mode);
30265 wsmode = GET_MODE_INNER (wvmode);
30267 val = convert_modes (wsmode, smode, val, true);
30268 x = expand_simple_binop (wsmode, ASHIFT, val,
30269 GEN_INT (GET_MODE_BITSIZE (smode)),
30270 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30271 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
30273 x = gen_lowpart (wvmode, target);
30274 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
30282 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
30283 rtx x = gen_reg_rtx (hvmode);
30285 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
30288 x = gen_rtx_VEC_CONCAT (mode, x, x);
30289 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30298 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30299 whose ONE_VAR element is VAR, and other elements are zero. Return true
30303 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
30304 rtx target, rtx var, int one_var)
30306 enum machine_mode vsimode;
30309 bool use_vector_set = false;
30314 /* For SSE4.1, we normally use vector set. But if the second
30315 element is zero and inter-unit moves are OK, we use movq
30317 use_vector_set = (TARGET_64BIT
30319 && !(TARGET_INTER_UNIT_MOVES
30325 use_vector_set = TARGET_SSE4_1;
30328 use_vector_set = TARGET_SSE2;
30331 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
30338 use_vector_set = TARGET_AVX;
30341 /* Use ix86_expand_vector_set in 64bit mode only. */
30342 use_vector_set = TARGET_AVX && TARGET_64BIT;
30348 if (use_vector_set)
30350 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
30351 var = force_reg (GET_MODE_INNER (mode), var);
30352 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30368 var = force_reg (GET_MODE_INNER (mode), var);
30369 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
30370 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30375 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
30376 new_target = gen_reg_rtx (mode);
30378 new_target = target;
30379 var = force_reg (GET_MODE_INNER (mode), var);
30380 x = gen_rtx_VEC_DUPLICATE (mode, var);
30381 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
30382 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
30385 /* We need to shuffle the value to the correct position, so
30386 create a new pseudo to store the intermediate result. */
30388 /* With SSE2, we can use the integer shuffle insns. */
30389 if (mode != V4SFmode && TARGET_SSE2)
30391 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
30393 GEN_INT (one_var == 1 ? 0 : 1),
30394 GEN_INT (one_var == 2 ? 0 : 1),
30395 GEN_INT (one_var == 3 ? 0 : 1)));
30396 if (target != new_target)
30397 emit_move_insn (target, new_target);
30401 /* Otherwise convert the intermediate result to V4SFmode and
30402 use the SSE1 shuffle instructions. */
30403 if (mode != V4SFmode)
30405 tmp = gen_reg_rtx (V4SFmode);
30406 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30411 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30413 GEN_INT (one_var == 1 ? 0 : 1),
30414 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30415 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30417 if (mode != V4SFmode)
30418 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30419 else if (tmp != target)
30420 emit_move_insn (target, tmp);
30422 else if (target != new_target)
30423 emit_move_insn (target, new_target);
30428 vsimode = V4SImode;
30434 vsimode = V2SImode;
30440 /* Zero extend the variable element to SImode and recurse. */
30441 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30443 x = gen_reg_rtx (vsimode);
30444 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30446 gcc_unreachable ();
30448 emit_move_insn (target, gen_lowpart (mode, x));
30456 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30457 consisting of the values in VALS. It is known that all elements
30458 except ONE_VAR are constants. Return true if successful. */
30461 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30462 rtx target, rtx vals, int one_var)
30464 rtx var = XVECEXP (vals, 0, one_var);
30465 enum machine_mode wmode;
30468 const_vec = copy_rtx (vals);
30469 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30470 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30478 /* For the two element vectors, it's just as easy to use
30479 the general case. */
30483 /* Use ix86_expand_vector_set in 64bit mode only. */
30506 /* There's no way to set one QImode entry easily. Combine
30507 the variable value with its adjacent constant value, and
30508 promote to an HImode set. */
30509 x = XVECEXP (vals, 0, one_var ^ 1);
30512 var = convert_modes (HImode, QImode, var, true);
30513 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30514 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30515 x = GEN_INT (INTVAL (x) & 0xff);
30519 var = convert_modes (HImode, QImode, var, true);
30520 x = gen_int_mode (INTVAL (x) << 8, HImode);
30522 if (x != const0_rtx)
30523 var = expand_simple_binop (HImode, IOR, var, x, var,
30524 1, OPTAB_LIB_WIDEN);
30526 x = gen_reg_rtx (wmode);
30527 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30528 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30530 emit_move_insn (target, gen_lowpart (mode, x));
30537 emit_move_insn (target, const_vec);
30538 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30542 /* A subroutine of ix86_expand_vector_init_general. Use vector
30543 concatenate to handle the most general case: all values variable,
30544 and none identical. */
30547 ix86_expand_vector_init_concat (enum machine_mode mode,
30548 rtx target, rtx *ops, int n)
30550 enum machine_mode cmode, hmode = VOIDmode;
30551 rtx first[8], second[4];
30591 gcc_unreachable ();
30594 if (!register_operand (ops[1], cmode))
30595 ops[1] = force_reg (cmode, ops[1]);
30596 if (!register_operand (ops[0], cmode))
30597 ops[0] = force_reg (cmode, ops[0]);
30598 emit_insn (gen_rtx_SET (VOIDmode, target,
30599 gen_rtx_VEC_CONCAT (mode, ops[0],
30619 gcc_unreachable ();
30635 gcc_unreachable ();
30640 /* FIXME: We process inputs backward to help RA. PR 36222. */
30643 for (; i > 0; i -= 2, j--)
30645 first[j] = gen_reg_rtx (cmode);
30646 v = gen_rtvec (2, ops[i - 1], ops[i]);
30647 ix86_expand_vector_init (false, first[j],
30648 gen_rtx_PARALLEL (cmode, v));
30654 gcc_assert (hmode != VOIDmode);
30655 for (i = j = 0; i < n; i += 2, j++)
30657 second[j] = gen_reg_rtx (hmode);
30658 ix86_expand_vector_init_concat (hmode, second [j],
30662 ix86_expand_vector_init_concat (mode, target, second, n);
30665 ix86_expand_vector_init_concat (mode, target, first, n);
30669 gcc_unreachable ();
30673 /* A subroutine of ix86_expand_vector_init_general. Use vector
30674 interleave to handle the most general case: all values variable,
30675 and none identical. */
30678 ix86_expand_vector_init_interleave (enum machine_mode mode,
30679 rtx target, rtx *ops, int n)
30681 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30684 rtx (*gen_load_even) (rtx, rtx, rtx);
30685 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30686 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30691 gen_load_even = gen_vec_setv8hi;
30692 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30693 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30694 inner_mode = HImode;
30695 first_imode = V4SImode;
30696 second_imode = V2DImode;
30697 third_imode = VOIDmode;
30700 gen_load_even = gen_vec_setv16qi;
30701 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30702 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30703 inner_mode = QImode;
30704 first_imode = V8HImode;
30705 second_imode = V4SImode;
30706 third_imode = V2DImode;
30709 gcc_unreachable ();
30712 for (i = 0; i < n; i++)
30714 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30715 op0 = gen_reg_rtx (SImode);
30716 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30718 /* Insert the SImode value as low element of V4SImode vector. */
30719 op1 = gen_reg_rtx (V4SImode);
30720 op0 = gen_rtx_VEC_MERGE (V4SImode,
30721 gen_rtx_VEC_DUPLICATE (V4SImode,
30723 CONST0_RTX (V4SImode),
30725 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30727 /* Cast the V4SImode vector back to a vector in orignal mode. */
30728 op0 = gen_reg_rtx (mode);
30729 emit_move_insn (op0, gen_lowpart (mode, op1));
30731 /* Load even elements into the second positon. */
30732 emit_insn (gen_load_even (op0,
30733 force_reg (inner_mode,
30737 /* Cast vector to FIRST_IMODE vector. */
30738 ops[i] = gen_reg_rtx (first_imode);
30739 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30742 /* Interleave low FIRST_IMODE vectors. */
30743 for (i = j = 0; i < n; i += 2, j++)
30745 op0 = gen_reg_rtx (first_imode);
30746 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30748 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30749 ops[j] = gen_reg_rtx (second_imode);
30750 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30753 /* Interleave low SECOND_IMODE vectors. */
30754 switch (second_imode)
30757 for (i = j = 0; i < n / 2; i += 2, j++)
30759 op0 = gen_reg_rtx (second_imode);
30760 emit_insn (gen_interleave_second_low (op0, ops[i],
30763 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30765 ops[j] = gen_reg_rtx (third_imode);
30766 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30768 second_imode = V2DImode;
30769 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30773 op0 = gen_reg_rtx (second_imode);
30774 emit_insn (gen_interleave_second_low (op0, ops[0],
30777 /* Cast the SECOND_IMODE vector back to a vector on original
30779 emit_insn (gen_rtx_SET (VOIDmode, target,
30780 gen_lowpart (mode, op0)));
30784 gcc_unreachable ();
30788 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30789 all values variable, and none identical. */
30792 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30793 rtx target, rtx vals)
30795 rtx ops[32], op0, op1;
30796 enum machine_mode half_mode = VOIDmode;
30803 if (!mmx_ok && !TARGET_SSE)
30815 n = GET_MODE_NUNITS (mode);
30816 for (i = 0; i < n; i++)
30817 ops[i] = XVECEXP (vals, 0, i);
30818 ix86_expand_vector_init_concat (mode, target, ops, n);
30822 half_mode = V16QImode;
30826 half_mode = V8HImode;
30830 n = GET_MODE_NUNITS (mode);
30831 for (i = 0; i < n; i++)
30832 ops[i] = XVECEXP (vals, 0, i);
30833 op0 = gen_reg_rtx (half_mode);
30834 op1 = gen_reg_rtx (half_mode);
30835 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30837 ix86_expand_vector_init_interleave (half_mode, op1,
30838 &ops [n >> 1], n >> 2);
30839 emit_insn (gen_rtx_SET (VOIDmode, target,
30840 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30844 if (!TARGET_SSE4_1)
30852 /* Don't use ix86_expand_vector_init_interleave if we can't
30853 move from GPR to SSE register directly. */
30854 if (!TARGET_INTER_UNIT_MOVES)
30857 n = GET_MODE_NUNITS (mode);
30858 for (i = 0; i < n; i++)
30859 ops[i] = XVECEXP (vals, 0, i);
30860 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30868 gcc_unreachable ();
30872 int i, j, n_elts, n_words, n_elt_per_word;
30873 enum machine_mode inner_mode;
30874 rtx words[4], shift;
30876 inner_mode = GET_MODE_INNER (mode);
30877 n_elts = GET_MODE_NUNITS (mode);
30878 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30879 n_elt_per_word = n_elts / n_words;
30880 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30882 for (i = 0; i < n_words; ++i)
30884 rtx word = NULL_RTX;
30886 for (j = 0; j < n_elt_per_word; ++j)
30888 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30889 elt = convert_modes (word_mode, inner_mode, elt, true);
30895 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30896 word, 1, OPTAB_LIB_WIDEN);
30897 word = expand_simple_binop (word_mode, IOR, word, elt,
30898 word, 1, OPTAB_LIB_WIDEN);
30906 emit_move_insn (target, gen_lowpart (mode, words[0]));
30907 else if (n_words == 2)
30909 rtx tmp = gen_reg_rtx (mode);
30910 emit_clobber (tmp);
30911 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30912 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30913 emit_move_insn (target, tmp);
30915 else if (n_words == 4)
30917 rtx tmp = gen_reg_rtx (V4SImode);
30918 gcc_assert (word_mode == SImode);
30919 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30920 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30921 emit_move_insn (target, gen_lowpart (mode, tmp));
30924 gcc_unreachable ();
30928 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30929 instructions unless MMX_OK is true. */
30932 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30934 enum machine_mode mode = GET_MODE (target);
30935 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30936 int n_elts = GET_MODE_NUNITS (mode);
30937 int n_var = 0, one_var = -1;
30938 bool all_same = true, all_const_zero = true;
30942 for (i = 0; i < n_elts; ++i)
30944 x = XVECEXP (vals, 0, i);
30945 if (!(CONST_INT_P (x)
30946 || GET_CODE (x) == CONST_DOUBLE
30947 || GET_CODE (x) == CONST_FIXED))
30948 n_var++, one_var = i;
30949 else if (x != CONST0_RTX (inner_mode))
30950 all_const_zero = false;
30951 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30955 /* Constants are best loaded from the constant pool. */
30958 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30962 /* If all values are identical, broadcast the value. */
30964 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30965 XVECEXP (vals, 0, 0)))
30968 /* Values where only one field is non-constant are best loaded from
30969 the pool and overwritten via move later. */
30973 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30974 XVECEXP (vals, 0, one_var),
30978 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30982 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30986 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30988 enum machine_mode mode = GET_MODE (target);
30989 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30990 enum machine_mode half_mode;
30991 bool use_vec_merge = false;
30993 static rtx (*gen_extract[6][2]) (rtx, rtx)
30995 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30996 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30997 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30998 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30999 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
31000 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
31002 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
31004 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
31005 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
31006 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
31007 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
31008 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
31009 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
31019 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
31020 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
31022 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
31024 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
31025 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31031 use_vec_merge = TARGET_SSE4_1;
31039 /* For the two element vectors, we implement a VEC_CONCAT with
31040 the extraction of the other element. */
31042 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
31043 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
31046 op0 = val, op1 = tmp;
31048 op0 = tmp, op1 = val;
31050 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
31051 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31056 use_vec_merge = TARGET_SSE4_1;
31063 use_vec_merge = true;
31067 /* tmp = target = A B C D */
31068 tmp = copy_to_reg (target);
31069 /* target = A A B B */
31070 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
31071 /* target = X A B B */
31072 ix86_expand_vector_set (false, target, val, 0);
31073 /* target = A X C D */
31074 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31075 const1_rtx, const0_rtx,
31076 GEN_INT (2+4), GEN_INT (3+4)));
31080 /* tmp = target = A B C D */
31081 tmp = copy_to_reg (target);
31082 /* tmp = X B C D */
31083 ix86_expand_vector_set (false, tmp, val, 0);
31084 /* target = A B X D */
31085 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31086 const0_rtx, const1_rtx,
31087 GEN_INT (0+4), GEN_INT (3+4)));
31091 /* tmp = target = A B C D */
31092 tmp = copy_to_reg (target);
31093 /* tmp = X B C D */
31094 ix86_expand_vector_set (false, tmp, val, 0);
31095 /* target = A B X D */
31096 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
31097 const0_rtx, const1_rtx,
31098 GEN_INT (2+4), GEN_INT (0+4)));
31102 gcc_unreachable ();
31107 use_vec_merge = TARGET_SSE4_1;
31111 /* Element 0 handled by vec_merge below. */
31114 use_vec_merge = true;
31120 /* With SSE2, use integer shuffles to swap element 0 and ELT,
31121 store into element 0, then shuffle them back. */
31125 order[0] = GEN_INT (elt);
31126 order[1] = const1_rtx;
31127 order[2] = const2_rtx;
31128 order[3] = GEN_INT (3);
31129 order[elt] = const0_rtx;
31131 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31132 order[1], order[2], order[3]));
31134 ix86_expand_vector_set (false, target, val, 0);
31136 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
31137 order[1], order[2], order[3]));
31141 /* For SSE1, we have to reuse the V4SF code. */
31142 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
31143 gen_lowpart (SFmode, val), elt);
31148 use_vec_merge = TARGET_SSE2;
31151 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31155 use_vec_merge = TARGET_SSE4_1;
31162 half_mode = V16QImode;
31168 half_mode = V8HImode;
31174 half_mode = V4SImode;
31180 half_mode = V2DImode;
31186 half_mode = V4SFmode;
31192 half_mode = V2DFmode;
31198 /* Compute offset. */
31202 gcc_assert (i <= 1);
31204 /* Extract the half. */
31205 tmp = gen_reg_rtx (half_mode);
31206 emit_insn (gen_extract[j][i] (tmp, target));
31208 /* Put val in tmp at elt. */
31209 ix86_expand_vector_set (false, tmp, val, elt);
31212 emit_insn (gen_insert[j][i] (target, target, tmp));
31221 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
31222 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
31223 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31227 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31229 emit_move_insn (mem, target);
31231 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31232 emit_move_insn (tmp, val);
31234 emit_move_insn (target, mem);
31239 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
31241 enum machine_mode mode = GET_MODE (vec);
31242 enum machine_mode inner_mode = GET_MODE_INNER (mode);
31243 bool use_vec_extr = false;
31256 use_vec_extr = true;
31260 use_vec_extr = TARGET_SSE4_1;
31272 tmp = gen_reg_rtx (mode);
31273 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
31274 GEN_INT (elt), GEN_INT (elt),
31275 GEN_INT (elt+4), GEN_INT (elt+4)));
31279 tmp = gen_reg_rtx (mode);
31280 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
31284 gcc_unreachable ();
31287 use_vec_extr = true;
31292 use_vec_extr = TARGET_SSE4_1;
31306 tmp = gen_reg_rtx (mode);
31307 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
31308 GEN_INT (elt), GEN_INT (elt),
31309 GEN_INT (elt), GEN_INT (elt)));
31313 tmp = gen_reg_rtx (mode);
31314 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
31318 gcc_unreachable ();
31321 use_vec_extr = true;
31326 /* For SSE1, we have to reuse the V4SF code. */
31327 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
31328 gen_lowpart (V4SFmode, vec), elt);
31334 use_vec_extr = TARGET_SSE2;
31337 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
31341 use_vec_extr = TARGET_SSE4_1;
31345 /* ??? Could extract the appropriate HImode element and shift. */
31352 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
31353 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
31355 /* Let the rtl optimizers know about the zero extension performed. */
31356 if (inner_mode == QImode || inner_mode == HImode)
31358 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
31359 target = gen_lowpart (SImode, target);
31362 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
31366 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
31368 emit_move_insn (mem, vec);
31370 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
31371 emit_move_insn (target, tmp);
31375 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
31376 pattern to reduce; DEST is the destination; IN is the input vector. */
31379 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
31381 rtx tmp1, tmp2, tmp3;
31383 tmp1 = gen_reg_rtx (V4SFmode);
31384 tmp2 = gen_reg_rtx (V4SFmode);
31385 tmp3 = gen_reg_rtx (V4SFmode);
31387 emit_insn (gen_sse_movhlps (tmp1, in, in));
31388 emit_insn (fn (tmp2, tmp1, in));
31390 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
31391 const1_rtx, const1_rtx,
31392 GEN_INT (1+4), GEN_INT (1+4)));
31393 emit_insn (fn (dest, tmp2, tmp3));
31396 /* Target hook for scalar_mode_supported_p. */
31398 ix86_scalar_mode_supported_p (enum machine_mode mode)
31400 if (DECIMAL_FLOAT_MODE_P (mode))
31401 return default_decimal_float_supported_p ();
31402 else if (mode == TFmode)
31405 return default_scalar_mode_supported_p (mode);
31408 /* Implements target hook vector_mode_supported_p. */
31410 ix86_vector_mode_supported_p (enum machine_mode mode)
31412 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31414 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31416 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31418 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31420 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31425 /* Target hook for c_mode_for_suffix. */
31426 static enum machine_mode
31427 ix86_c_mode_for_suffix (char suffix)
31437 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31439 We do this in the new i386 backend to maintain source compatibility
31440 with the old cc0-based compiler. */
31443 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31444 tree inputs ATTRIBUTE_UNUSED,
31447 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31449 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31454 /* Implements target vector targetm.asm.encode_section_info. This
31455 is not used by netware. */
31457 static void ATTRIBUTE_UNUSED
31458 ix86_encode_section_info (tree decl, rtx rtl, int first)
31460 default_encode_section_info (decl, rtl, first);
31462 if (TREE_CODE (decl) == VAR_DECL
31463 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31464 && ix86_in_large_data_p (decl))
31465 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31468 /* Worker function for REVERSE_CONDITION. */
31471 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31473 return (mode != CCFPmode && mode != CCFPUmode
31474 ? reverse_condition (code)
31475 : reverse_condition_maybe_unordered (code));
31478 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31482 output_387_reg_move (rtx insn, rtx *operands)
31484 if (REG_P (operands[0]))
31486 if (REG_P (operands[1])
31487 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31489 if (REGNO (operands[0]) == FIRST_STACK_REG)
31490 return output_387_ffreep (operands, 0);
31491 return "fstp\t%y0";
31493 if (STACK_TOP_P (operands[0]))
31494 return "fld%Z1\t%y1";
31497 else if (MEM_P (operands[0]))
31499 gcc_assert (REG_P (operands[1]));
31500 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31501 return "fstp%Z0\t%y0";
31504 /* There is no non-popping store to memory for XFmode.
31505 So if we need one, follow the store with a load. */
31506 if (GET_MODE (operands[0]) == XFmode)
31507 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31509 return "fst%Z0\t%y0";
31516 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31517 FP status register is set. */
31520 ix86_emit_fp_unordered_jump (rtx label)
31522 rtx reg = gen_reg_rtx (HImode);
31525 emit_insn (gen_x86_fnstsw_1 (reg));
31527 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31529 emit_insn (gen_x86_sahf_1 (reg));
31531 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31532 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31536 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31538 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31539 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31542 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31543 gen_rtx_LABEL_REF (VOIDmode, label),
31545 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31547 emit_jump_insn (temp);
31548 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31551 /* Output code to perform a log1p XFmode calculation. */
31553 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31555 rtx label1 = gen_label_rtx ();
31556 rtx label2 = gen_label_rtx ();
31558 rtx tmp = gen_reg_rtx (XFmode);
31559 rtx tmp2 = gen_reg_rtx (XFmode);
31562 emit_insn (gen_absxf2 (tmp, op1));
31563 test = gen_rtx_GE (VOIDmode, tmp,
31564 CONST_DOUBLE_FROM_REAL_VALUE (
31565 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31567 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31569 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31570 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31571 emit_jump (label2);
31573 emit_label (label1);
31574 emit_move_insn (tmp, CONST1_RTX (XFmode));
31575 emit_insn (gen_addxf3 (tmp, op1, tmp));
31576 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31577 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31579 emit_label (label2);
31582 /* Output code to perform a Newton-Rhapson approximation of a single precision
31583 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31585 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31587 rtx x0, x1, e0, e1, two;
31589 x0 = gen_reg_rtx (mode);
31590 e0 = gen_reg_rtx (mode);
31591 e1 = gen_reg_rtx (mode);
31592 x1 = gen_reg_rtx (mode);
31594 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
31596 if (VECTOR_MODE_P (mode))
31597 two = ix86_build_const_vector (mode, true, two);
31599 two = force_reg (mode, two);
31601 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31603 /* x0 = rcp(b) estimate */
31604 emit_insn (gen_rtx_SET (VOIDmode, x0,
31605 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31608 emit_insn (gen_rtx_SET (VOIDmode, e0,
31609 gen_rtx_MULT (mode, x0, a)));
31611 emit_insn (gen_rtx_SET (VOIDmode, e1,
31612 gen_rtx_MULT (mode, x0, b)));
31614 emit_insn (gen_rtx_SET (VOIDmode, x1,
31615 gen_rtx_MINUS (mode, two, e1)));
31616 /* res = e0 * x1 */
31617 emit_insn (gen_rtx_SET (VOIDmode, res,
31618 gen_rtx_MULT (mode, e0, x1)));
31621 /* Output code to perform a Newton-Rhapson approximation of a
31622 single precision floating point [reciprocal] square root. */
31624 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31627 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31630 x0 = gen_reg_rtx (mode);
31631 e0 = gen_reg_rtx (mode);
31632 e1 = gen_reg_rtx (mode);
31633 e2 = gen_reg_rtx (mode);
31634 e3 = gen_reg_rtx (mode);
31636 real_from_integer (&r, VOIDmode, -3, -1, 0);
31637 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31639 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31640 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31642 if (VECTOR_MODE_P (mode))
31644 mthree = ix86_build_const_vector (mode, true, mthree);
31645 mhalf = ix86_build_const_vector (mode, true, mhalf);
31648 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31649 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31651 /* x0 = rsqrt(a) estimate */
31652 emit_insn (gen_rtx_SET (VOIDmode, x0,
31653 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31656 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31661 zero = gen_reg_rtx (mode);
31662 mask = gen_reg_rtx (mode);
31664 zero = force_reg (mode, CONST0_RTX(mode));
31665 emit_insn (gen_rtx_SET (VOIDmode, mask,
31666 gen_rtx_NE (mode, zero, a)));
31668 emit_insn (gen_rtx_SET (VOIDmode, x0,
31669 gen_rtx_AND (mode, x0, mask)));
31673 emit_insn (gen_rtx_SET (VOIDmode, e0,
31674 gen_rtx_MULT (mode, x0, a)));
31676 emit_insn (gen_rtx_SET (VOIDmode, e1,
31677 gen_rtx_MULT (mode, e0, x0)));
31680 mthree = force_reg (mode, mthree);
31681 emit_insn (gen_rtx_SET (VOIDmode, e2,
31682 gen_rtx_PLUS (mode, e1, mthree)));
31684 mhalf = force_reg (mode, mhalf);
31686 /* e3 = -.5 * x0 */
31687 emit_insn (gen_rtx_SET (VOIDmode, e3,
31688 gen_rtx_MULT (mode, x0, mhalf)));
31690 /* e3 = -.5 * e0 */
31691 emit_insn (gen_rtx_SET (VOIDmode, e3,
31692 gen_rtx_MULT (mode, e0, mhalf)));
31693 /* ret = e2 * e3 */
31694 emit_insn (gen_rtx_SET (VOIDmode, res,
31695 gen_rtx_MULT (mode, e2, e3)));
31698 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31700 static void ATTRIBUTE_UNUSED
31701 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31704 /* With Binutils 2.15, the "@unwind" marker must be specified on
31705 every occurrence of the ".eh_frame" section, not just the first
31708 && strcmp (name, ".eh_frame") == 0)
31710 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31711 flags & SECTION_WRITE ? "aw" : "a");
31714 default_elf_asm_named_section (name, flags, decl);
31717 /* Return the mangling of TYPE if it is an extended fundamental type. */
31719 static const char *
31720 ix86_mangle_type (const_tree type)
31722 type = TYPE_MAIN_VARIANT (type);
31724 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31725 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31728 switch (TYPE_MODE (type))
31731 /* __float128 is "g". */
31734 /* "long double" or __float80 is "e". */
31741 /* For 32-bit code we can save PIC register setup by using
31742 __stack_chk_fail_local hidden function instead of calling
31743 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31744 register, so it is better to call __stack_chk_fail directly. */
31747 ix86_stack_protect_fail (void)
31749 return TARGET_64BIT
31750 ? default_external_stack_protect_fail ()
31751 : default_hidden_stack_protect_fail ();
31754 /* Select a format to encode pointers in exception handling data. CODE
31755 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31756 true if the symbol may be affected by dynamic relocations.
31758 ??? All x86 object file formats are capable of representing this.
31759 After all, the relocation needed is the same as for the call insn.
31760 Whether or not a particular assembler allows us to enter such, I
31761 guess we'll have to see. */
31763 asm_preferred_eh_data_format (int code, int global)
31767 int type = DW_EH_PE_sdata8;
31769 || ix86_cmodel == CM_SMALL_PIC
31770 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31771 type = DW_EH_PE_sdata4;
31772 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31774 if (ix86_cmodel == CM_SMALL
31775 || (ix86_cmodel == CM_MEDIUM && code))
31776 return DW_EH_PE_udata4;
31777 return DW_EH_PE_absptr;
31780 /* Expand copysign from SIGN to the positive value ABS_VALUE
31781 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31784 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31786 enum machine_mode mode = GET_MODE (sign);
31787 rtx sgn = gen_reg_rtx (mode);
31788 if (mask == NULL_RTX)
31790 enum machine_mode vmode;
31792 if (mode == SFmode)
31794 else if (mode == DFmode)
31799 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31800 if (!VECTOR_MODE_P (mode))
31802 /* We need to generate a scalar mode mask in this case. */
31803 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31804 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31805 mask = gen_reg_rtx (mode);
31806 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31810 mask = gen_rtx_NOT (mode, mask);
31811 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31812 gen_rtx_AND (mode, mask, sign)));
31813 emit_insn (gen_rtx_SET (VOIDmode, result,
31814 gen_rtx_IOR (mode, abs_value, sgn)));
31817 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31818 mask for masking out the sign-bit is stored in *SMASK, if that is
31821 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31823 enum machine_mode vmode, mode = GET_MODE (op0);
31826 xa = gen_reg_rtx (mode);
31827 if (mode == SFmode)
31829 else if (mode == DFmode)
31833 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31834 if (!VECTOR_MODE_P (mode))
31836 /* We need to generate a scalar mode mask in this case. */
31837 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31838 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31839 mask = gen_reg_rtx (mode);
31840 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31842 emit_insn (gen_rtx_SET (VOIDmode, xa,
31843 gen_rtx_AND (mode, op0, mask)));
31851 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31852 swapping the operands if SWAP_OPERANDS is true. The expanded
31853 code is a forward jump to a newly created label in case the
31854 comparison is true. The generated label rtx is returned. */
31856 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31857 bool swap_operands)
31868 label = gen_label_rtx ();
31869 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31870 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31871 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31872 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31873 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31874 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31875 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31876 JUMP_LABEL (tmp) = label;
31881 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31882 using comparison code CODE. Operands are swapped for the comparison if
31883 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31885 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31886 bool swap_operands)
31888 enum machine_mode mode = GET_MODE (op0);
31889 rtx mask = gen_reg_rtx (mode);
31898 if (mode == DFmode)
31899 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
31900 gen_rtx_fmt_ee (code, mode, op0, op1)));
31902 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
31903 gen_rtx_fmt_ee (code, mode, op0, op1)));
31908 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31909 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31911 ix86_gen_TWO52 (enum machine_mode mode)
31913 REAL_VALUE_TYPE TWO52r;
31916 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31917 TWO52 = const_double_from_real_value (TWO52r, mode);
31918 TWO52 = force_reg (mode, TWO52);
31923 /* Expand SSE sequence for computing lround from OP1 storing
31926 ix86_expand_lround (rtx op0, rtx op1)
31928 /* C code for the stuff we're doing below:
31929 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31932 enum machine_mode mode = GET_MODE (op1);
31933 const struct real_format *fmt;
31934 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31937 /* load nextafter (0.5, 0.0) */
31938 fmt = REAL_MODE_FORMAT (mode);
31939 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31940 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31942 /* adj = copysign (0.5, op1) */
31943 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31944 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31946 /* adj = op1 + adj */
31947 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31949 /* op0 = (imode)adj */
31950 expand_fix (op0, adj, 0);
31953 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31956 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31958 /* C code for the stuff we're doing below (for do_floor):
31960 xi -= (double)xi > op1 ? 1 : 0;
31963 enum machine_mode fmode = GET_MODE (op1);
31964 enum machine_mode imode = GET_MODE (op0);
31965 rtx ireg, freg, label, tmp;
31967 /* reg = (long)op1 */
31968 ireg = gen_reg_rtx (imode);
31969 expand_fix (ireg, op1, 0);
31971 /* freg = (double)reg */
31972 freg = gen_reg_rtx (fmode);
31973 expand_float (freg, ireg, 0);
31975 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31976 label = ix86_expand_sse_compare_and_jump (UNLE,
31977 freg, op1, !do_floor);
31978 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31979 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31980 emit_move_insn (ireg, tmp);
31982 emit_label (label);
31983 LABEL_NUSES (label) = 1;
31985 emit_move_insn (op0, ireg);
31988 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31989 result in OPERAND0. */
31991 ix86_expand_rint (rtx operand0, rtx operand1)
31993 /* C code for the stuff we're doing below:
31994 xa = fabs (operand1);
31995 if (!isless (xa, 2**52))
31997 xa = xa + 2**52 - 2**52;
31998 return copysign (xa, operand1);
32000 enum machine_mode mode = GET_MODE (operand0);
32001 rtx res, xa, label, TWO52, mask;
32003 res = gen_reg_rtx (mode);
32004 emit_move_insn (res, operand1);
32006 /* xa = abs (operand1) */
32007 xa = ix86_expand_sse_fabs (res, &mask);
32009 /* if (!isless (xa, TWO52)) goto label; */
32010 TWO52 = ix86_gen_TWO52 (mode);
32011 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32013 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32014 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32016 ix86_sse_copysign_to_positive (res, xa, res, mask);
32018 emit_label (label);
32019 LABEL_NUSES (label) = 1;
32021 emit_move_insn (operand0, res);
32024 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32027 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
32029 /* C code for the stuff we expand below.
32030 double xa = fabs (x), x2;
32031 if (!isless (xa, TWO52))
32033 xa = xa + TWO52 - TWO52;
32034 x2 = copysign (xa, x);
32043 enum machine_mode mode = GET_MODE (operand0);
32044 rtx xa, TWO52, tmp, label, one, res, mask;
32046 TWO52 = ix86_gen_TWO52 (mode);
32048 /* Temporary for holding the result, initialized to the input
32049 operand to ease control flow. */
32050 res = gen_reg_rtx (mode);
32051 emit_move_insn (res, operand1);
32053 /* xa = abs (operand1) */
32054 xa = ix86_expand_sse_fabs (res, &mask);
32056 /* if (!isless (xa, TWO52)) goto label; */
32057 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32059 /* xa = xa + TWO52 - TWO52; */
32060 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32061 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
32063 /* xa = copysign (xa, operand1) */
32064 ix86_sse_copysign_to_positive (xa, xa, res, mask);
32066 /* generate 1.0 or -1.0 */
32067 one = force_reg (mode,
32068 const_double_from_real_value (do_floor
32069 ? dconst1 : dconstm1, mode));
32071 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32072 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32073 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32074 gen_rtx_AND (mode, one, tmp)));
32075 /* We always need to subtract here to preserve signed zero. */
32076 tmp = expand_simple_binop (mode, MINUS,
32077 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32078 emit_move_insn (res, tmp);
32080 emit_label (label);
32081 LABEL_NUSES (label) = 1;
32083 emit_move_insn (operand0, res);
32086 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
32089 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
32091 /* C code for the stuff we expand below.
32092 double xa = fabs (x), x2;
32093 if (!isless (xa, TWO52))
32095 x2 = (double)(long)x;
32102 if (HONOR_SIGNED_ZEROS (mode))
32103 return copysign (x2, x);
32106 enum machine_mode mode = GET_MODE (operand0);
32107 rtx xa, xi, TWO52, tmp, label, one, res, mask;
32109 TWO52 = ix86_gen_TWO52 (mode);
32111 /* Temporary for holding the result, initialized to the input
32112 operand to ease control flow. */
32113 res = gen_reg_rtx (mode);
32114 emit_move_insn (res, operand1);
32116 /* xa = abs (operand1) */
32117 xa = ix86_expand_sse_fabs (res, &mask);
32119 /* if (!isless (xa, TWO52)) goto label; */
32120 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32122 /* xa = (double)(long)x */
32123 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32124 expand_fix (xi, res, 0);
32125 expand_float (xa, xi, 0);
32128 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32130 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
32131 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
32132 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32133 gen_rtx_AND (mode, one, tmp)));
32134 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
32135 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32136 emit_move_insn (res, tmp);
32138 if (HONOR_SIGNED_ZEROS (mode))
32139 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32141 emit_label (label);
32142 LABEL_NUSES (label) = 1;
32144 emit_move_insn (operand0, res);
32147 /* Expand SSE sequence for computing round from OPERAND1 storing
32148 into OPERAND0. Sequence that works without relying on DImode truncation
32149 via cvttsd2siq that is only available on 64bit targets. */
32151 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
32153 /* C code for the stuff we expand below.
32154 double xa = fabs (x), xa2, x2;
32155 if (!isless (xa, TWO52))
32157 Using the absolute value and copying back sign makes
32158 -0.0 -> -0.0 correct.
32159 xa2 = xa + TWO52 - TWO52;
32164 else if (dxa > 0.5)
32166 x2 = copysign (xa2, x);
32169 enum machine_mode mode = GET_MODE (operand0);
32170 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
32172 TWO52 = ix86_gen_TWO52 (mode);
32174 /* Temporary for holding the result, initialized to the input
32175 operand to ease control flow. */
32176 res = gen_reg_rtx (mode);
32177 emit_move_insn (res, operand1);
32179 /* xa = abs (operand1) */
32180 xa = ix86_expand_sse_fabs (res, &mask);
32182 /* if (!isless (xa, TWO52)) goto label; */
32183 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32185 /* xa2 = xa + TWO52 - TWO52; */
32186 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32187 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
32189 /* dxa = xa2 - xa; */
32190 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
32192 /* generate 0.5, 1.0 and -0.5 */
32193 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
32194 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
32195 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
32199 tmp = gen_reg_rtx (mode);
32200 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
32201 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
32202 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32203 gen_rtx_AND (mode, one, tmp)));
32204 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32205 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
32206 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
32207 emit_insn (gen_rtx_SET (VOIDmode, tmp,
32208 gen_rtx_AND (mode, one, tmp)));
32209 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
32211 /* res = copysign (xa2, operand1) */
32212 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
32214 emit_label (label);
32215 LABEL_NUSES (label) = 1;
32217 emit_move_insn (operand0, res);
32220 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32223 ix86_expand_trunc (rtx operand0, rtx operand1)
32225 /* C code for SSE variant we expand below.
32226 double xa = fabs (x), x2;
32227 if (!isless (xa, TWO52))
32229 x2 = (double)(long)x;
32230 if (HONOR_SIGNED_ZEROS (mode))
32231 return copysign (x2, x);
32234 enum machine_mode mode = GET_MODE (operand0);
32235 rtx xa, xi, TWO52, label, res, mask;
32237 TWO52 = ix86_gen_TWO52 (mode);
32239 /* Temporary for holding the result, initialized to the input
32240 operand to ease control flow. */
32241 res = gen_reg_rtx (mode);
32242 emit_move_insn (res, operand1);
32244 /* xa = abs (operand1) */
32245 xa = ix86_expand_sse_fabs (res, &mask);
32247 /* if (!isless (xa, TWO52)) goto label; */
32248 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32250 /* x = (double)(long)x */
32251 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32252 expand_fix (xi, res, 0);
32253 expand_float (res, xi, 0);
32255 if (HONOR_SIGNED_ZEROS (mode))
32256 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
32258 emit_label (label);
32259 LABEL_NUSES (label) = 1;
32261 emit_move_insn (operand0, res);
32264 /* Expand SSE sequence for computing trunc from OPERAND1 storing
32267 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
32269 enum machine_mode mode = GET_MODE (operand0);
32270 rtx xa, mask, TWO52, label, one, res, smask, tmp;
32272 /* C code for SSE variant we expand below.
32273 double xa = fabs (x), x2;
32274 if (!isless (xa, TWO52))
32276 xa2 = xa + TWO52 - TWO52;
32280 x2 = copysign (xa2, x);
32284 TWO52 = ix86_gen_TWO52 (mode);
32286 /* Temporary for holding the result, initialized to the input
32287 operand to ease control flow. */
32288 res = gen_reg_rtx (mode);
32289 emit_move_insn (res, operand1);
32291 /* xa = abs (operand1) */
32292 xa = ix86_expand_sse_fabs (res, &smask);
32294 /* if (!isless (xa, TWO52)) goto label; */
32295 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32297 /* res = xa + TWO52 - TWO52; */
32298 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
32299 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
32300 emit_move_insn (res, tmp);
32303 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
32305 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
32306 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
32307 emit_insn (gen_rtx_SET (VOIDmode, mask,
32308 gen_rtx_AND (mode, mask, one)));
32309 tmp = expand_simple_binop (mode, MINUS,
32310 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
32311 emit_move_insn (res, tmp);
32313 /* res = copysign (res, operand1) */
32314 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
32316 emit_label (label);
32317 LABEL_NUSES (label) = 1;
32319 emit_move_insn (operand0, res);
32322 /* Expand SSE sequence for computing round from OPERAND1 storing
32325 ix86_expand_round (rtx operand0, rtx operand1)
32327 /* C code for the stuff we're doing below:
32328 double xa = fabs (x);
32329 if (!isless (xa, TWO52))
32331 xa = (double)(long)(xa + nextafter (0.5, 0.0));
32332 return copysign (xa, x);
32334 enum machine_mode mode = GET_MODE (operand0);
32335 rtx res, TWO52, xa, label, xi, half, mask;
32336 const struct real_format *fmt;
32337 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
32339 /* Temporary for holding the result, initialized to the input
32340 operand to ease control flow. */
32341 res = gen_reg_rtx (mode);
32342 emit_move_insn (res, operand1);
32344 TWO52 = ix86_gen_TWO52 (mode);
32345 xa = ix86_expand_sse_fabs (res, &mask);
32346 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
32348 /* load nextafter (0.5, 0.0) */
32349 fmt = REAL_MODE_FORMAT (mode);
32350 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
32351 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
32353 /* xa = xa + 0.5 */
32354 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
32355 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
32357 /* xa = (double)(int64_t)xa */
32358 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
32359 expand_fix (xi, xa, 0);
32360 expand_float (xa, xi, 0);
32362 /* res = copysign (xa, operand1) */
32363 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
32365 emit_label (label);
32366 LABEL_NUSES (label) = 1;
32368 emit_move_insn (operand0, res);
32372 /* Table of valid machine attributes. */
32373 static const struct attribute_spec ix86_attribute_table[] =
32375 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
32376 /* Stdcall attribute says callee is responsible for popping arguments
32377 if they are not variable. */
32378 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32379 /* Fastcall attribute says callee is responsible for popping arguments
32380 if they are not variable. */
32381 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32382 /* Thiscall attribute says callee is responsible for popping arguments
32383 if they are not variable. */
32384 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32385 /* Cdecl attribute says the callee is a normal C declaration */
32386 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32387 /* Regparm attribute specifies how many integer arguments are to be
32388 passed in registers. */
32389 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
32390 /* Sseregparm attribute says we are using x86_64 calling conventions
32391 for FP arguments. */
32392 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
32393 /* force_align_arg_pointer says this function realigns the stack at entry. */
32394 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
32395 false, true, true, ix86_handle_cconv_attribute },
32396 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32397 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
32398 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
32399 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
32401 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32402 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
32403 #ifdef SUBTARGET_ATTRIBUTE_TABLE
32404 SUBTARGET_ATTRIBUTE_TABLE,
32406 /* ms_abi and sysv_abi calling convention function attributes. */
32407 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32408 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32409 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
32410 { "callee_pop_aggregate_return", 1, 1, false, true, true,
32411 ix86_handle_callee_pop_aggregate_return },
32413 { NULL, 0, 0, false, false, false, NULL }
32416 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32418 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32419 tree vectype ATTRIBUTE_UNUSED,
32420 int misalign ATTRIBUTE_UNUSED)
32422 switch (type_of_cost)
32425 return ix86_cost->scalar_stmt_cost;
32428 return ix86_cost->scalar_load_cost;
32431 return ix86_cost->scalar_store_cost;
32434 return ix86_cost->vec_stmt_cost;
32437 return ix86_cost->vec_align_load_cost;
32440 return ix86_cost->vec_store_cost;
32442 case vec_to_scalar:
32443 return ix86_cost->vec_to_scalar_cost;
32445 case scalar_to_vec:
32446 return ix86_cost->scalar_to_vec_cost;
32448 case unaligned_load:
32449 case unaligned_store:
32450 return ix86_cost->vec_unalign_load_cost;
32452 case cond_branch_taken:
32453 return ix86_cost->cond_taken_branch_cost;
32455 case cond_branch_not_taken:
32456 return ix86_cost->cond_not_taken_branch_cost;
32462 gcc_unreachable ();
32467 /* Implement targetm.vectorize.builtin_vec_perm. */
32470 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32472 tree itype = TREE_TYPE (vec_type);
32473 bool u = TYPE_UNSIGNED (itype);
32474 enum machine_mode vmode = TYPE_MODE (vec_type);
32475 enum ix86_builtins fcode;
32476 bool ok = TARGET_SSE2;
32482 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32485 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32487 itype = ix86_get_builtin_type (IX86_BT_DI);
32492 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32496 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32498 itype = ix86_get_builtin_type (IX86_BT_SI);
32502 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32505 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32508 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32511 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32521 *mask_type = itype;
32522 return ix86_builtins[(int) fcode];
32525 /* Return a vector mode with twice as many elements as VMODE. */
32526 /* ??? Consider moving this to a table generated by genmodes.c. */
32528 static enum machine_mode
32529 doublesize_vector_mode (enum machine_mode vmode)
32533 case V2SFmode: return V4SFmode;
32534 case V1DImode: return V2DImode;
32535 case V2SImode: return V4SImode;
32536 case V4HImode: return V8HImode;
32537 case V8QImode: return V16QImode;
32539 case V2DFmode: return V4DFmode;
32540 case V4SFmode: return V8SFmode;
32541 case V2DImode: return V4DImode;
32542 case V4SImode: return V8SImode;
32543 case V8HImode: return V16HImode;
32544 case V16QImode: return V32QImode;
32546 case V4DFmode: return V8DFmode;
32547 case V8SFmode: return V16SFmode;
32548 case V4DImode: return V8DImode;
32549 case V8SImode: return V16SImode;
32550 case V16HImode: return V32HImode;
32551 case V32QImode: return V64QImode;
32554 gcc_unreachable ();
32558 /* Construct (set target (vec_select op0 (parallel perm))) and
32559 return true if that's a valid instruction in the active ISA. */
32562 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32564 rtx rperm[MAX_VECT_LEN], x;
32567 for (i = 0; i < nelt; ++i)
32568 rperm[i] = GEN_INT (perm[i]);
32570 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32571 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32572 x = gen_rtx_SET (VOIDmode, target, x);
32575 if (recog_memoized (x) < 0)
32583 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32586 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32587 const unsigned char *perm, unsigned nelt)
32589 enum machine_mode v2mode;
32592 v2mode = doublesize_vector_mode (GET_MODE (op0));
32593 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32594 return expand_vselect (target, x, perm, nelt);
32597 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32598 in terms of blendp[sd] / pblendw / pblendvb. */
32601 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32603 enum machine_mode vmode = d->vmode;
32604 unsigned i, mask, nelt = d->nelt;
32605 rtx target, op0, op1, x;
32607 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32609 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32612 /* This is a blend, not a permute. Elements must stay in their
32613 respective lanes. */
32614 for (i = 0; i < nelt; ++i)
32616 unsigned e = d->perm[i];
32617 if (!(e == i || e == i + nelt))
32624 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32625 decision should be extracted elsewhere, so that we only try that
32626 sequence once all budget==3 options have been tried. */
32628 /* For bytes, see if bytes move in pairs so we can use pblendw with
32629 an immediate argument, rather than pblendvb with a vector argument. */
32630 if (vmode == V16QImode)
32632 bool pblendw_ok = true;
32633 for (i = 0; i < 16 && pblendw_ok; i += 2)
32634 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32638 rtx rperm[16], vperm;
32640 for (i = 0; i < nelt; ++i)
32641 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32643 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32644 vperm = force_reg (V16QImode, vperm);
32646 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32651 target = d->target;
32663 for (i = 0; i < nelt; ++i)
32664 mask |= (d->perm[i] >= nelt) << i;
32668 for (i = 0; i < 2; ++i)
32669 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32673 for (i = 0; i < 4; ++i)
32674 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32678 for (i = 0; i < 8; ++i)
32679 mask |= (d->perm[i * 2] >= 16) << i;
32683 target = gen_lowpart (vmode, target);
32684 op0 = gen_lowpart (vmode, op0);
32685 op1 = gen_lowpart (vmode, op1);
32689 gcc_unreachable ();
32692 /* This matches five different patterns with the different modes. */
32693 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32694 x = gen_rtx_SET (VOIDmode, target, x);
32700 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32701 in terms of the variable form of vpermilps.
32703 Note that we will have already failed the immediate input vpermilps,
32704 which requires that the high and low part shuffle be identical; the
32705 variable form doesn't require that. */
32708 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32710 rtx rperm[8], vperm;
32713 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32716 /* We can only permute within the 128-bit lane. */
32717 for (i = 0; i < 8; ++i)
32719 unsigned e = d->perm[i];
32720 if (i < 4 ? e >= 4 : e < 4)
32727 for (i = 0; i < 8; ++i)
32729 unsigned e = d->perm[i];
32731 /* Within each 128-bit lane, the elements of op0 are numbered
32732 from 0 and the elements of op1 are numbered from 4. */
32738 rperm[i] = GEN_INT (e);
32741 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32742 vperm = force_reg (V8SImode, vperm);
32743 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32748 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32749 in terms of pshufb or vpperm. */
32752 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32754 unsigned i, nelt, eltsz;
32755 rtx rperm[16], vperm, target, op0, op1;
32757 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32759 if (GET_MODE_SIZE (d->vmode) != 16)
32766 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32768 for (i = 0; i < nelt; ++i)
32770 unsigned j, e = d->perm[i];
32771 for (j = 0; j < eltsz; ++j)
32772 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32775 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32776 vperm = force_reg (V16QImode, vperm);
32778 target = gen_lowpart (V16QImode, d->target);
32779 op0 = gen_lowpart (V16QImode, d->op0);
32780 if (d->op0 == d->op1)
32781 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32784 op1 = gen_lowpart (V16QImode, d->op1);
32785 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32791 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32792 in a single instruction. */
32795 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32797 unsigned i, nelt = d->nelt;
32798 unsigned char perm2[MAX_VECT_LEN];
32800 /* Check plain VEC_SELECT first, because AVX has instructions that could
32801 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32802 input where SEL+CONCAT may not. */
32803 if (d->op0 == d->op1)
32805 int mask = nelt - 1;
32807 for (i = 0; i < nelt; i++)
32808 perm2[i] = d->perm[i] & mask;
32810 if (expand_vselect (d->target, d->op0, perm2, nelt))
32813 /* There are plenty of patterns in sse.md that are written for
32814 SEL+CONCAT and are not replicated for a single op. Perhaps
32815 that should be changed, to avoid the nastiness here. */
32817 /* Recognize interleave style patterns, which means incrementing
32818 every other permutation operand. */
32819 for (i = 0; i < nelt; i += 2)
32821 perm2[i] = d->perm[i] & mask;
32822 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32824 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32827 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32830 for (i = 0; i < nelt; i += 4)
32832 perm2[i + 0] = d->perm[i + 0] & mask;
32833 perm2[i + 1] = d->perm[i + 1] & mask;
32834 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32835 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32838 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32843 /* Finally, try the fully general two operand permute. */
32844 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32847 /* Recognize interleave style patterns with reversed operands. */
32848 if (d->op0 != d->op1)
32850 for (i = 0; i < nelt; ++i)
32852 unsigned e = d->perm[i];
32860 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32864 /* Try the SSE4.1 blend variable merge instructions. */
32865 if (expand_vec_perm_blend (d))
32868 /* Try one of the AVX vpermil variable permutations. */
32869 if (expand_vec_perm_vpermil (d))
32872 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32873 if (expand_vec_perm_pshufb (d))
32879 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32880 in terms of a pair of pshuflw + pshufhw instructions. */
32883 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32885 unsigned char perm2[MAX_VECT_LEN];
32889 if (d->vmode != V8HImode || d->op0 != d->op1)
32892 /* The two permutations only operate in 64-bit lanes. */
32893 for (i = 0; i < 4; ++i)
32894 if (d->perm[i] >= 4)
32896 for (i = 4; i < 8; ++i)
32897 if (d->perm[i] < 4)
32903 /* Emit the pshuflw. */
32904 memcpy (perm2, d->perm, 4);
32905 for (i = 4; i < 8; ++i)
32907 ok = expand_vselect (d->target, d->op0, perm2, 8);
32910 /* Emit the pshufhw. */
32911 memcpy (perm2 + 4, d->perm + 4, 4);
32912 for (i = 0; i < 4; ++i)
32914 ok = expand_vselect (d->target, d->target, perm2, 8);
32920 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32921 the permutation using the SSSE3 palignr instruction. This succeeds
32922 when all of the elements in PERM fit within one vector and we merely
32923 need to shift them down so that a single vector permutation has a
32924 chance to succeed. */
32927 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32929 unsigned i, nelt = d->nelt;
32934 /* Even with AVX, palignr only operates on 128-bit vectors. */
32935 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32938 min = nelt, max = 0;
32939 for (i = 0; i < nelt; ++i)
32941 unsigned e = d->perm[i];
32947 if (min == 0 || max - min >= nelt)
32950 /* Given that we have SSSE3, we know we'll be able to implement the
32951 single operand permutation after the palignr with pshufb. */
32955 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32956 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32957 gen_lowpart (TImode, d->op1),
32958 gen_lowpart (TImode, d->op0), shift));
32960 d->op0 = d->op1 = d->target;
32963 for (i = 0; i < nelt; ++i)
32965 unsigned e = d->perm[i] - min;
32971 /* Test for the degenerate case where the alignment by itself
32972 produces the desired permutation. */
32976 ok = expand_vec_perm_1 (d);
32982 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32983 a two vector permutation into a single vector permutation by using
32984 an interleave operation to merge the vectors. */
32987 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32989 struct expand_vec_perm_d dremap, dfinal;
32990 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32991 unsigned contents, h1, h2, h3, h4;
32992 unsigned char remap[2 * MAX_VECT_LEN];
32996 if (d->op0 == d->op1)
32999 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
33000 lanes. We can use similar techniques with the vperm2f128 instruction,
33001 but it requires slightly different logic. */
33002 if (GET_MODE_SIZE (d->vmode) != 16)
33005 /* Examine from whence the elements come. */
33007 for (i = 0; i < nelt; ++i)
33008 contents |= 1u << d->perm[i];
33010 /* Split the two input vectors into 4 halves. */
33011 h1 = (1u << nelt2) - 1;
33016 memset (remap, 0xff, sizeof (remap));
33019 /* If the elements from the low halves use interleave low, and similarly
33020 for interleave high. If the elements are from mis-matched halves, we
33021 can use shufps for V4SF/V4SI or do a DImode shuffle. */
33022 if ((contents & (h1 | h3)) == contents)
33024 for (i = 0; i < nelt2; ++i)
33027 remap[i + nelt] = i * 2 + 1;
33028 dremap.perm[i * 2] = i;
33029 dremap.perm[i * 2 + 1] = i + nelt;
33032 else if ((contents & (h2 | h4)) == contents)
33034 for (i = 0; i < nelt2; ++i)
33036 remap[i + nelt2] = i * 2;
33037 remap[i + nelt + nelt2] = i * 2 + 1;
33038 dremap.perm[i * 2] = i + nelt2;
33039 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
33042 else if ((contents & (h1 | h4)) == contents)
33044 for (i = 0; i < nelt2; ++i)
33047 remap[i + nelt + nelt2] = i + nelt2;
33048 dremap.perm[i] = i;
33049 dremap.perm[i + nelt2] = i + nelt + nelt2;
33053 dremap.vmode = V2DImode;
33055 dremap.perm[0] = 0;
33056 dremap.perm[1] = 3;
33059 else if ((contents & (h2 | h3)) == contents)
33061 for (i = 0; i < nelt2; ++i)
33063 remap[i + nelt2] = i;
33064 remap[i + nelt] = i + nelt2;
33065 dremap.perm[i] = i + nelt2;
33066 dremap.perm[i + nelt2] = i + nelt;
33070 dremap.vmode = V2DImode;
33072 dremap.perm[0] = 1;
33073 dremap.perm[1] = 2;
33079 /* Use the remapping array set up above to move the elements from their
33080 swizzled locations into their final destinations. */
33082 for (i = 0; i < nelt; ++i)
33084 unsigned e = remap[d->perm[i]];
33085 gcc_assert (e < nelt);
33086 dfinal.perm[i] = e;
33088 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
33089 dfinal.op1 = dfinal.op0;
33090 dremap.target = dfinal.op0;
33092 /* Test if the final remap can be done with a single insn. For V4SFmode or
33093 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
33095 ok = expand_vec_perm_1 (&dfinal);
33096 seq = get_insns ();
33102 if (dremap.vmode != dfinal.vmode)
33104 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
33105 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
33106 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
33109 ok = expand_vec_perm_1 (&dremap);
33116 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
33117 permutation with two pshufb insns and an ior. We should have already
33118 failed all two instruction sequences. */
33121 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
33123 rtx rperm[2][16], vperm, l, h, op, m128;
33124 unsigned int i, nelt, eltsz;
33126 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
33128 gcc_assert (d->op0 != d->op1);
33131 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
33133 /* Generate two permutation masks. If the required element is within
33134 the given vector it is shuffled into the proper lane. If the required
33135 element is in the other vector, force a zero into the lane by setting
33136 bit 7 in the permutation mask. */
33137 m128 = GEN_INT (-128);
33138 for (i = 0; i < nelt; ++i)
33140 unsigned j, e = d->perm[i];
33141 unsigned which = (e >= nelt);
33145 for (j = 0; j < eltsz; ++j)
33147 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
33148 rperm[1-which][i*eltsz + j] = m128;
33152 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
33153 vperm = force_reg (V16QImode, vperm);
33155 l = gen_reg_rtx (V16QImode);
33156 op = gen_lowpart (V16QImode, d->op0);
33157 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
33159 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
33160 vperm = force_reg (V16QImode, vperm);
33162 h = gen_reg_rtx (V16QImode);
33163 op = gen_lowpart (V16QImode, d->op1);
33164 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
33166 op = gen_lowpart (V16QImode, d->target);
33167 emit_insn (gen_iorv16qi3 (op, l, h));
33172 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
33173 and extract-odd permutations. */
33176 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
33183 t1 = gen_reg_rtx (V4DFmode);
33184 t2 = gen_reg_rtx (V4DFmode);
33186 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
33187 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
33188 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
33190 /* Now an unpck[lh]pd will produce the result required. */
33192 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
33194 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
33200 int mask = odd ? 0xdd : 0x88;
33202 t1 = gen_reg_rtx (V8SFmode);
33203 t2 = gen_reg_rtx (V8SFmode);
33204 t3 = gen_reg_rtx (V8SFmode);
33206 /* Shuffle within the 128-bit lanes to produce:
33207 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
33208 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
33211 /* Shuffle the lanes around to produce:
33212 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
33213 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
33216 /* Shuffle within the 128-bit lanes to produce:
33217 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
33218 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
33220 /* Shuffle within the 128-bit lanes to produce:
33221 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
33222 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
33224 /* Shuffle the lanes around to produce:
33225 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
33226 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
33235 /* These are always directly implementable by expand_vec_perm_1. */
33236 gcc_unreachable ();
33240 return expand_vec_perm_pshufb2 (d);
33243 /* We need 2*log2(N)-1 operations to achieve odd/even
33244 with interleave. */
33245 t1 = gen_reg_rtx (V8HImode);
33246 t2 = gen_reg_rtx (V8HImode);
33247 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
33248 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
33249 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
33250 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
33252 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
33254 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
33261 return expand_vec_perm_pshufb2 (d);
33264 t1 = gen_reg_rtx (V16QImode);
33265 t2 = gen_reg_rtx (V16QImode);
33266 t3 = gen_reg_rtx (V16QImode);
33267 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
33268 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
33269 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
33270 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
33271 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
33272 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
33274 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
33276 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
33282 gcc_unreachable ();
33288 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33289 extract-even and extract-odd permutations. */
33292 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
33294 unsigned i, odd, nelt = d->nelt;
33297 if (odd != 0 && odd != 1)
33300 for (i = 1; i < nelt; ++i)
33301 if (d->perm[i] != 2 * i + odd)
33304 return expand_vec_perm_even_odd_1 (d, odd);
33307 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
33308 permutations. We assume that expand_vec_perm_1 has already failed. */
33311 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
33313 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
33314 enum machine_mode vmode = d->vmode;
33315 unsigned char perm2[4];
33323 /* These are special-cased in sse.md so that we can optionally
33324 use the vbroadcast instruction. They expand to two insns
33325 if the input happens to be in a register. */
33326 gcc_unreachable ();
33332 /* These are always implementable using standard shuffle patterns. */
33333 gcc_unreachable ();
33337 /* These can be implemented via interleave. We save one insn by
33338 stopping once we have promoted to V4SImode and then use pshufd. */
33341 optab otab = vec_interleave_low_optab;
33345 otab = vec_interleave_high_optab;
33350 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
33351 vmode = get_mode_wider_vector (vmode);
33352 op0 = gen_lowpart (vmode, op0);
33354 while (vmode != V4SImode);
33356 memset (perm2, elt, 4);
33357 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
33362 gcc_unreachable ();
33366 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
33367 broadcast permutations. */
33370 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
33372 unsigned i, elt, nelt = d->nelt;
33374 if (d->op0 != d->op1)
33378 for (i = 1; i < nelt; ++i)
33379 if (d->perm[i] != elt)
33382 return expand_vec_perm_broadcast_1 (d);
33385 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
33386 With all of the interface bits taken care of, perform the expansion
33387 in D and return true on success. */
33390 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
33392 /* Try a single instruction expansion. */
33393 if (expand_vec_perm_1 (d))
33396 /* Try sequences of two instructions. */
33398 if (expand_vec_perm_pshuflw_pshufhw (d))
33401 if (expand_vec_perm_palignr (d))
33404 if (expand_vec_perm_interleave2 (d))
33407 if (expand_vec_perm_broadcast (d))
33410 /* Try sequences of three instructions. */
33412 if (expand_vec_perm_pshufb2 (d))
33415 /* ??? Look for narrow permutations whose element orderings would
33416 allow the promotion to a wider mode. */
33418 /* ??? Look for sequences of interleave or a wider permute that place
33419 the data into the correct lanes for a half-vector shuffle like
33420 pshuf[lh]w or vpermilps. */
33422 /* ??? Look for sequences of interleave that produce the desired results.
33423 The combinatorics of punpck[lh] get pretty ugly... */
33425 if (expand_vec_perm_even_odd (d))
33431 /* Extract the values from the vector CST into the permutation array in D.
33432 Return 0 on error, 1 if all values from the permutation come from the
33433 first vector, 2 if all values from the second vector, and 3 otherwise. */
33436 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33438 tree list = TREE_VECTOR_CST_ELTS (cst);
33439 unsigned i, nelt = d->nelt;
33442 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33444 unsigned HOST_WIDE_INT e;
33446 if (!host_integerp (TREE_VALUE (list), 1))
33448 e = tree_low_cst (TREE_VALUE (list), 1);
33452 ret |= (e < nelt ? 1 : 2);
33455 gcc_assert (list == NULL);
33457 /* For all elements from second vector, fold the elements to first. */
33459 for (i = 0; i < nelt; ++i)
33460 d->perm[i] -= nelt;
33466 ix86_expand_vec_perm_builtin (tree exp)
33468 struct expand_vec_perm_d d;
33469 tree arg0, arg1, arg2;
33471 arg0 = CALL_EXPR_ARG (exp, 0);
33472 arg1 = CALL_EXPR_ARG (exp, 1);
33473 arg2 = CALL_EXPR_ARG (exp, 2);
33475 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33476 d.nelt = GET_MODE_NUNITS (d.vmode);
33477 d.testing_p = false;
33478 gcc_assert (VECTOR_MODE_P (d.vmode));
33480 if (TREE_CODE (arg2) != VECTOR_CST)
33482 error_at (EXPR_LOCATION (exp),
33483 "vector permutation requires vector constant");
33487 switch (extract_vec_perm_cst (&d, arg2))
33493 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33497 if (!operand_equal_p (arg0, arg1, 0))
33499 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33500 d.op0 = force_reg (d.vmode, d.op0);
33501 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33502 d.op1 = force_reg (d.vmode, d.op1);
33506 /* The elements of PERM do not suggest that only the first operand
33507 is used, but both operands are identical. Allow easier matching
33508 of the permutation by folding the permutation into the single
33511 unsigned i, nelt = d.nelt;
33512 for (i = 0; i < nelt; ++i)
33513 if (d.perm[i] >= nelt)
33519 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33520 d.op0 = force_reg (d.vmode, d.op0);
33525 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33526 d.op0 = force_reg (d.vmode, d.op0);
33531 d.target = gen_reg_rtx (d.vmode);
33532 if (ix86_expand_vec_perm_builtin_1 (&d))
33535 /* For compiler generated permutations, we should never got here, because
33536 the compiler should also be checking the ok hook. But since this is a
33537 builtin the user has access too, so don't abort. */
33541 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33544 sorry ("vector permutation (%d %d %d %d)",
33545 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33548 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33549 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33550 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33553 sorry ("vector permutation "
33554 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33555 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33556 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33557 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33558 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33561 gcc_unreachable ();
33564 return CONST0_RTX (d.vmode);
33567 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33570 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33572 struct expand_vec_perm_d d;
33576 d.vmode = TYPE_MODE (vec_type);
33577 d.nelt = GET_MODE_NUNITS (d.vmode);
33578 d.testing_p = true;
33580 /* Given sufficient ISA support we can just return true here
33581 for selected vector modes. */
33582 if (GET_MODE_SIZE (d.vmode) == 16)
33584 /* All implementable with a single vpperm insn. */
33587 /* All implementable with 2 pshufb + 1 ior. */
33590 /* All implementable with shufpd or unpck[lh]pd. */
33595 vec_mask = extract_vec_perm_cst (&d, mask);
33597 /* This hook is cannot be called in response to something that the
33598 user does (unlike the builtin expander) so we shouldn't ever see
33599 an error generated from the extract. */
33600 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33601 one_vec = (vec_mask != 3);
33603 /* Implementable with shufps or pshufd. */
33604 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33607 /* Otherwise we have to go through the motions and see if we can
33608 figure out how to generate the requested permutation. */
33609 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33610 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33612 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33615 ret = ix86_expand_vec_perm_builtin_1 (&d);
33622 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33624 struct expand_vec_perm_d d;
33630 d.vmode = GET_MODE (targ);
33631 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33632 d.testing_p = false;
33634 for (i = 0; i < nelt; ++i)
33635 d.perm[i] = i * 2 + odd;
33637 /* We'll either be able to implement the permutation directly... */
33638 if (expand_vec_perm_1 (&d))
33641 /* ... or we use the special-case patterns. */
33642 expand_vec_perm_even_odd_1 (&d, odd);
33645 /* This function returns the calling abi specific va_list type node.
33646 It returns the FNDECL specific va_list type. */
33649 ix86_fn_abi_va_list (tree fndecl)
33652 return va_list_type_node;
33653 gcc_assert (fndecl != NULL_TREE);
33655 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33656 return ms_va_list_type_node;
33658 return sysv_va_list_type_node;
33661 /* Returns the canonical va_list type specified by TYPE. If there
33662 is no valid TYPE provided, it return NULL_TREE. */
33665 ix86_canonical_va_list_type (tree type)
33669 /* Resolve references and pointers to va_list type. */
33670 if (TREE_CODE (type) == MEM_REF)
33671 type = TREE_TYPE (type);
33672 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33673 type = TREE_TYPE (type);
33674 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33675 type = TREE_TYPE (type);
33679 wtype = va_list_type_node;
33680 gcc_assert (wtype != NULL_TREE);
33682 if (TREE_CODE (wtype) == ARRAY_TYPE)
33684 /* If va_list is an array type, the argument may have decayed
33685 to a pointer type, e.g. by being passed to another function.
33686 In that case, unwrap both types so that we can compare the
33687 underlying records. */
33688 if (TREE_CODE (htype) == ARRAY_TYPE
33689 || POINTER_TYPE_P (htype))
33691 wtype = TREE_TYPE (wtype);
33692 htype = TREE_TYPE (htype);
33695 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33696 return va_list_type_node;
33697 wtype = sysv_va_list_type_node;
33698 gcc_assert (wtype != NULL_TREE);
33700 if (TREE_CODE (wtype) == ARRAY_TYPE)
33702 /* If va_list is an array type, the argument may have decayed
33703 to a pointer type, e.g. by being passed to another function.
33704 In that case, unwrap both types so that we can compare the
33705 underlying records. */
33706 if (TREE_CODE (htype) == ARRAY_TYPE
33707 || POINTER_TYPE_P (htype))
33709 wtype = TREE_TYPE (wtype);
33710 htype = TREE_TYPE (htype);
33713 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33714 return sysv_va_list_type_node;
33715 wtype = ms_va_list_type_node;
33716 gcc_assert (wtype != NULL_TREE);
33718 if (TREE_CODE (wtype) == ARRAY_TYPE)
33720 /* If va_list is an array type, the argument may have decayed
33721 to a pointer type, e.g. by being passed to another function.
33722 In that case, unwrap both types so that we can compare the
33723 underlying records. */
33724 if (TREE_CODE (htype) == ARRAY_TYPE
33725 || POINTER_TYPE_P (htype))
33727 wtype = TREE_TYPE (wtype);
33728 htype = TREE_TYPE (htype);
33731 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33732 return ms_va_list_type_node;
33735 return std_canonical_va_list_type (type);
33738 /* Iterate through the target-specific builtin types for va_list.
33739 IDX denotes the iterator, *PTREE is set to the result type of
33740 the va_list builtin, and *PNAME to its internal type.
33741 Returns zero if there is no element for this index, otherwise
33742 IDX should be increased upon the next call.
33743 Note, do not iterate a base builtin's name like __builtin_va_list.
33744 Used from c_common_nodes_and_builtins. */
33747 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33757 *ptree = ms_va_list_type_node;
33758 *pname = "__builtin_ms_va_list";
33762 *ptree = sysv_va_list_type_node;
33763 *pname = "__builtin_sysv_va_list";
33771 #undef TARGET_SCHED_DISPATCH
33772 #define TARGET_SCHED_DISPATCH has_dispatch
33773 #undef TARGET_SCHED_DISPATCH_DO
33774 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33776 /* The size of the dispatch window is the total number of bytes of
33777 object code allowed in a window. */
33778 #define DISPATCH_WINDOW_SIZE 16
33780 /* Number of dispatch windows considered for scheduling. */
33781 #define MAX_DISPATCH_WINDOWS 3
33783 /* Maximum number of instructions in a window. */
33786 /* Maximum number of immediate operands in a window. */
33789 /* Maximum number of immediate bits allowed in a window. */
33790 #define MAX_IMM_SIZE 128
33792 /* Maximum number of 32 bit immediates allowed in a window. */
33793 #define MAX_IMM_32 4
33795 /* Maximum number of 64 bit immediates allowed in a window. */
33796 #define MAX_IMM_64 2
33798 /* Maximum total of loads or prefetches allowed in a window. */
33801 /* Maximum total of stores allowed in a window. */
33802 #define MAX_STORE 1
33808 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33809 enum dispatch_group {
33824 /* Number of allowable groups in a dispatch window. It is an array
33825 indexed by dispatch_group enum. 100 is used as a big number,
33826 because the number of these kind of operations does not have any
33827 effect in dispatch window, but we need them for other reasons in
33829 static unsigned int num_allowable_groups[disp_last] = {
33830 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33833 char group_name[disp_last + 1][16] = {
33834 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33835 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33836 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33839 /* Instruction path. */
33842 path_single, /* Single micro op. */
33843 path_double, /* Double micro op. */
33844 path_multi, /* Instructions with more than 2 micro op.. */
33848 /* sched_insn_info defines a window to the instructions scheduled in
33849 the basic block. It contains a pointer to the insn_info table and
33850 the instruction scheduled.
33852 Windows are allocated for each basic block and are linked
33854 typedef struct sched_insn_info_s {
33856 enum dispatch_group group;
33857 enum insn_path path;
33862 /* Linked list of dispatch windows. This is a two way list of
33863 dispatch windows of a basic block. It contains information about
33864 the number of uops in the window and the total number of
33865 instructions and of bytes in the object code for this dispatch
33867 typedef struct dispatch_windows_s {
33868 int num_insn; /* Number of insn in the window. */
33869 int num_uops; /* Number of uops in the window. */
33870 int window_size; /* Number of bytes in the window. */
33871 int window_num; /* Window number between 0 or 1. */
33872 int num_imm; /* Number of immediates in an insn. */
33873 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33874 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33875 int imm_size; /* Total immediates in the window. */
33876 int num_loads; /* Total memory loads in the window. */
33877 int num_stores; /* Total memory stores in the window. */
33878 int violation; /* Violation exists in window. */
33879 sched_insn_info *window; /* Pointer to the window. */
33880 struct dispatch_windows_s *next;
33881 struct dispatch_windows_s *prev;
33882 } dispatch_windows;
33884 /* Immediate valuse used in an insn. */
33885 typedef struct imm_info_s
33892 static dispatch_windows *dispatch_window_list;
33893 static dispatch_windows *dispatch_window_list1;
33895 /* Get dispatch group of insn. */
33897 static enum dispatch_group
33898 get_mem_group (rtx insn)
33900 enum attr_memory memory;
33902 if (INSN_CODE (insn) < 0)
33903 return disp_no_group;
33904 memory = get_attr_memory (insn);
33905 if (memory == MEMORY_STORE)
33908 if (memory == MEMORY_LOAD)
33911 if (memory == MEMORY_BOTH)
33912 return disp_load_store;
33914 return disp_no_group;
33917 /* Return true if insn is a compare instruction. */
33922 enum attr_type type;
33924 type = get_attr_type (insn);
33925 return (type == TYPE_TEST
33926 || type == TYPE_ICMP
33927 || type == TYPE_FCMP
33928 || GET_CODE (PATTERN (insn)) == COMPARE);
33931 /* Return true if a dispatch violation encountered. */
33934 dispatch_violation (void)
33936 if (dispatch_window_list->next)
33937 return dispatch_window_list->next->violation;
33938 return dispatch_window_list->violation;
33941 /* Return true if insn is a branch instruction. */
33944 is_branch (rtx insn)
33946 return (CALL_P (insn) || JUMP_P (insn));
33949 /* Return true if insn is a prefetch instruction. */
33952 is_prefetch (rtx insn)
33954 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33957 /* This function initializes a dispatch window and the list container holding a
33958 pointer to the window. */
33961 init_window (int window_num)
33964 dispatch_windows *new_list;
33966 if (window_num == 0)
33967 new_list = dispatch_window_list;
33969 new_list = dispatch_window_list1;
33971 new_list->num_insn = 0;
33972 new_list->num_uops = 0;
33973 new_list->window_size = 0;
33974 new_list->next = NULL;
33975 new_list->prev = NULL;
33976 new_list->window_num = window_num;
33977 new_list->num_imm = 0;
33978 new_list->num_imm_32 = 0;
33979 new_list->num_imm_64 = 0;
33980 new_list->imm_size = 0;
33981 new_list->num_loads = 0;
33982 new_list->num_stores = 0;
33983 new_list->violation = false;
33985 for (i = 0; i < MAX_INSN; i++)
33987 new_list->window[i].insn = NULL;
33988 new_list->window[i].group = disp_no_group;
33989 new_list->window[i].path = no_path;
33990 new_list->window[i].byte_len = 0;
33991 new_list->window[i].imm_bytes = 0;
33996 /* This function allocates and initializes a dispatch window and the
33997 list container holding a pointer to the window. */
33999 static dispatch_windows *
34000 allocate_window (void)
34002 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
34003 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
34008 /* This routine initializes the dispatch scheduling information. It
34009 initiates building dispatch scheduler tables and constructs the
34010 first dispatch window. */
34013 init_dispatch_sched (void)
34015 /* Allocate a dispatch list and a window. */
34016 dispatch_window_list = allocate_window ();
34017 dispatch_window_list1 = allocate_window ();
34022 /* This function returns true if a branch is detected. End of a basic block
34023 does not have to be a branch, but here we assume only branches end a
34027 is_end_basic_block (enum dispatch_group group)
34029 return group == disp_branch;
34032 /* This function is called when the end of a window processing is reached. */
34035 process_end_window (void)
34037 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
34038 if (dispatch_window_list->next)
34040 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
34041 gcc_assert (dispatch_window_list->window_size
34042 + dispatch_window_list1->window_size <= 48);
34048 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
34049 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
34050 for 48 bytes of instructions. Note that these windows are not dispatch
34051 windows that their sizes are DISPATCH_WINDOW_SIZE. */
34053 static dispatch_windows *
34054 allocate_next_window (int window_num)
34056 if (window_num == 0)
34058 if (dispatch_window_list->next)
34061 return dispatch_window_list;
34064 dispatch_window_list->next = dispatch_window_list1;
34065 dispatch_window_list1->prev = dispatch_window_list;
34067 return dispatch_window_list1;
34070 /* Increment the number of immediate operands of an instruction. */
34073 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
34078 switch ( GET_CODE (*in_rtx))
34083 (imm_values->imm)++;
34084 if (x86_64_immediate_operand (*in_rtx, SImode))
34085 (imm_values->imm32)++;
34087 (imm_values->imm64)++;
34091 (imm_values->imm)++;
34092 (imm_values->imm64)++;
34096 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
34098 (imm_values->imm)++;
34099 (imm_values->imm32)++;
34110 /* Compute number of immediate operands of an instruction. */
34113 find_constant (rtx in_rtx, imm_info *imm_values)
34115 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
34116 (rtx_function) find_constant_1, (void *) imm_values);
34119 /* Return total size of immediate operands of an instruction along with number
34120 of corresponding immediate-operands. It initializes its parameters to zero
34121 befor calling FIND_CONSTANT.
34122 INSN is the input instruction. IMM is the total of immediates.
34123 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
34127 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
34129 imm_info imm_values = {0, 0, 0};
34131 find_constant (insn, &imm_values);
34132 *imm = imm_values.imm;
34133 *imm32 = imm_values.imm32;
34134 *imm64 = imm_values.imm64;
34135 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
34138 /* This function indicates if an operand of an instruction is an
34142 has_immediate (rtx insn)
34144 int num_imm_operand;
34145 int num_imm32_operand;
34146 int num_imm64_operand;
34149 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34150 &num_imm64_operand);
34154 /* Return single or double path for instructions. */
34156 static enum insn_path
34157 get_insn_path (rtx insn)
34159 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
34161 if ((int)path == 0)
34162 return path_single;
34164 if ((int)path == 1)
34165 return path_double;
34170 /* Return insn dispatch group. */
34172 static enum dispatch_group
34173 get_insn_group (rtx insn)
34175 enum dispatch_group group = get_mem_group (insn);
34179 if (is_branch (insn))
34180 return disp_branch;
34185 if (has_immediate (insn))
34188 if (is_prefetch (insn))
34189 return disp_prefetch;
34191 return disp_no_group;
34194 /* Count number of GROUP restricted instructions in a dispatch
34195 window WINDOW_LIST. */
34198 count_num_restricted (rtx insn, dispatch_windows *window_list)
34200 enum dispatch_group group = get_insn_group (insn);
34202 int num_imm_operand;
34203 int num_imm32_operand;
34204 int num_imm64_operand;
34206 if (group == disp_no_group)
34209 if (group == disp_imm)
34211 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34212 &num_imm64_operand);
34213 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
34214 || num_imm_operand + window_list->num_imm > MAX_IMM
34215 || (num_imm32_operand > 0
34216 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
34217 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
34218 || (num_imm64_operand > 0
34219 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
34220 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
34221 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
34222 && num_imm64_operand > 0
34223 && ((window_list->num_imm_64 > 0
34224 && window_list->num_insn >= 2)
34225 || window_list->num_insn >= 3)))
34231 if ((group == disp_load_store
34232 && (window_list->num_loads >= MAX_LOAD
34233 || window_list->num_stores >= MAX_STORE))
34234 || ((group == disp_load
34235 || group == disp_prefetch)
34236 && window_list->num_loads >= MAX_LOAD)
34237 || (group == disp_store
34238 && window_list->num_stores >= MAX_STORE))
34244 /* This function returns true if insn satisfies dispatch rules on the
34245 last window scheduled. */
34248 fits_dispatch_window (rtx insn)
34250 dispatch_windows *window_list = dispatch_window_list;
34251 dispatch_windows *window_list_next = dispatch_window_list->next;
34252 unsigned int num_restrict;
34253 enum dispatch_group group = get_insn_group (insn);
34254 enum insn_path path = get_insn_path (insn);
34257 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
34258 instructions should be given the lowest priority in the
34259 scheduling process in Haifa scheduler to make sure they will be
34260 scheduled in the same dispatch window as the refrence to them. */
34261 if (group == disp_jcc || group == disp_cmp)
34264 /* Check nonrestricted. */
34265 if (group == disp_no_group || group == disp_branch)
34268 /* Get last dispatch window. */
34269 if (window_list_next)
34270 window_list = window_list_next;
34272 if (window_list->window_num == 1)
34274 sum = window_list->prev->window_size + window_list->window_size;
34277 || (min_insn_size (insn) + sum) >= 48)
34278 /* Window 1 is full. Go for next window. */
34282 num_restrict = count_num_restricted (insn, window_list);
34284 if (num_restrict > num_allowable_groups[group])
34287 /* See if it fits in the first window. */
34288 if (window_list->window_num == 0)
34290 /* The first widow should have only single and double path
34292 if (path == path_double
34293 && (window_list->num_uops + 2) > MAX_INSN)
34295 else if (path != path_single)
34301 /* Add an instruction INSN with NUM_UOPS micro-operations to the
34302 dispatch window WINDOW_LIST. */
34305 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
34307 int byte_len = min_insn_size (insn);
34308 int num_insn = window_list->num_insn;
34310 sched_insn_info *window = window_list->window;
34311 enum dispatch_group group = get_insn_group (insn);
34312 enum insn_path path = get_insn_path (insn);
34313 int num_imm_operand;
34314 int num_imm32_operand;
34315 int num_imm64_operand;
34317 if (!window_list->violation && group != disp_cmp
34318 && !fits_dispatch_window (insn))
34319 window_list->violation = true;
34321 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34322 &num_imm64_operand);
34324 /* Initialize window with new instruction. */
34325 window[num_insn].insn = insn;
34326 window[num_insn].byte_len = byte_len;
34327 window[num_insn].group = group;
34328 window[num_insn].path = path;
34329 window[num_insn].imm_bytes = imm_size;
34331 window_list->window_size += byte_len;
34332 window_list->num_insn = num_insn + 1;
34333 window_list->num_uops = window_list->num_uops + num_uops;
34334 window_list->imm_size += imm_size;
34335 window_list->num_imm += num_imm_operand;
34336 window_list->num_imm_32 += num_imm32_operand;
34337 window_list->num_imm_64 += num_imm64_operand;
34339 if (group == disp_store)
34340 window_list->num_stores += 1;
34341 else if (group == disp_load
34342 || group == disp_prefetch)
34343 window_list->num_loads += 1;
34344 else if (group == disp_load_store)
34346 window_list->num_stores += 1;
34347 window_list->num_loads += 1;
34351 /* Adds a scheduled instruction, INSN, to the current dispatch window.
34352 If the total bytes of instructions or the number of instructions in
34353 the window exceed allowable, it allocates a new window. */
34356 add_to_dispatch_window (rtx insn)
34359 dispatch_windows *window_list;
34360 dispatch_windows *next_list;
34361 dispatch_windows *window0_list;
34362 enum insn_path path;
34363 enum dispatch_group insn_group;
34371 if (INSN_CODE (insn) < 0)
34374 byte_len = min_insn_size (insn);
34375 window_list = dispatch_window_list;
34376 next_list = window_list->next;
34377 path = get_insn_path (insn);
34378 insn_group = get_insn_group (insn);
34380 /* Get the last dispatch window. */
34382 window_list = dispatch_window_list->next;
34384 if (path == path_single)
34386 else if (path == path_double)
34389 insn_num_uops = (int) path;
34391 /* If current window is full, get a new window.
34392 Window number zero is full, if MAX_INSN uops are scheduled in it.
34393 Window number one is full, if window zero's bytes plus window
34394 one's bytes is 32, or if the bytes of the new instruction added
34395 to the total makes it greater than 48, or it has already MAX_INSN
34396 instructions in it. */
34397 num_insn = window_list->num_insn;
34398 num_uops = window_list->num_uops;
34399 window_num = window_list->window_num;
34400 insn_fits = fits_dispatch_window (insn);
34402 if (num_insn >= MAX_INSN
34403 || num_uops + insn_num_uops > MAX_INSN
34406 window_num = ~window_num & 1;
34407 window_list = allocate_next_window (window_num);
34410 if (window_num == 0)
34412 add_insn_window (insn, window_list, insn_num_uops);
34413 if (window_list->num_insn >= MAX_INSN
34414 && insn_group == disp_branch)
34416 process_end_window ();
34420 else if (window_num == 1)
34422 window0_list = window_list->prev;
34423 sum = window0_list->window_size + window_list->window_size;
34425 || (byte_len + sum) >= 48)
34427 process_end_window ();
34428 window_list = dispatch_window_list;
34431 add_insn_window (insn, window_list, insn_num_uops);
34434 gcc_unreachable ();
34436 if (is_end_basic_block (insn_group))
34438 /* End of basic block is reached do end-basic-block process. */
34439 process_end_window ();
34444 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34446 DEBUG_FUNCTION static void
34447 debug_dispatch_window_file (FILE *file, int window_num)
34449 dispatch_windows *list;
34452 if (window_num == 0)
34453 list = dispatch_window_list;
34455 list = dispatch_window_list1;
34457 fprintf (file, "Window #%d:\n", list->window_num);
34458 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34459 list->num_insn, list->num_uops, list->window_size);
34460 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34461 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34463 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34465 fprintf (file, " insn info:\n");
34467 for (i = 0; i < MAX_INSN; i++)
34469 if (!list->window[i].insn)
34471 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34472 i, group_name[list->window[i].group],
34473 i, (void *)list->window[i].insn,
34474 i, list->window[i].path,
34475 i, list->window[i].byte_len,
34476 i, list->window[i].imm_bytes);
34480 /* Print to stdout a dispatch window. */
34482 DEBUG_FUNCTION void
34483 debug_dispatch_window (int window_num)
34485 debug_dispatch_window_file (stdout, window_num);
34488 /* Print INSN dispatch information to FILE. */
34490 DEBUG_FUNCTION static void
34491 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34494 enum insn_path path;
34495 enum dispatch_group group;
34497 int num_imm_operand;
34498 int num_imm32_operand;
34499 int num_imm64_operand;
34501 if (INSN_CODE (insn) < 0)
34504 byte_len = min_insn_size (insn);
34505 path = get_insn_path (insn);
34506 group = get_insn_group (insn);
34507 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34508 &num_imm64_operand);
34510 fprintf (file, " insn info:\n");
34511 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34512 group_name[group], path, byte_len);
34513 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34514 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34517 /* Print to STDERR the status of the ready list with respect to
34518 dispatch windows. */
34520 DEBUG_FUNCTION void
34521 debug_ready_dispatch (void)
34524 int no_ready = number_in_ready ();
34526 fprintf (stdout, "Number of ready: %d\n", no_ready);
34528 for (i = 0; i < no_ready; i++)
34529 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34532 /* This routine is the driver of the dispatch scheduler. */
34535 do_dispatch (rtx insn, int mode)
34537 if (mode == DISPATCH_INIT)
34538 init_dispatch_sched ();
34539 else if (mode == ADD_TO_DISPATCH_WINDOW)
34540 add_to_dispatch_window (insn);
34543 /* Return TRUE if Dispatch Scheduling is supported. */
34546 has_dispatch (rtx insn, int action)
34548 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34554 case IS_DISPATCH_ON:
34559 return is_cmp (insn);
34561 case DISPATCH_VIOLATION:
34562 return dispatch_violation ();
34564 case FITS_DISPATCH_WINDOW:
34565 return fits_dispatch_window (insn);
34571 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34572 place emms and femms instructions. */
34574 static enum machine_mode
34575 ix86_preferred_simd_mode (enum machine_mode mode)
34577 /* Disable double precision vectorizer if needed. */
34578 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34581 if (!TARGET_AVX && !TARGET_SSE)
34587 return TARGET_AVX ? V8SFmode : V4SFmode;
34589 return TARGET_AVX ? V4DFmode : V2DFmode;
34605 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34608 static unsigned int
34609 ix86_autovectorize_vector_sizes (void)
34611 return TARGET_AVX ? 32 | 16 : 0;
34614 /* Initialize the GCC target structure. */
34615 #undef TARGET_RETURN_IN_MEMORY
34616 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34618 #undef TARGET_LEGITIMIZE_ADDRESS
34619 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34621 #undef TARGET_ATTRIBUTE_TABLE
34622 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34623 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34624 # undef TARGET_MERGE_DECL_ATTRIBUTES
34625 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34628 #undef TARGET_COMP_TYPE_ATTRIBUTES
34629 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34631 #undef TARGET_INIT_BUILTINS
34632 #define TARGET_INIT_BUILTINS ix86_init_builtins
34633 #undef TARGET_BUILTIN_DECL
34634 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34635 #undef TARGET_EXPAND_BUILTIN
34636 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34638 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34639 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34640 ix86_builtin_vectorized_function
34642 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34643 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34645 #undef TARGET_BUILTIN_RECIPROCAL
34646 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34648 #undef TARGET_ASM_FUNCTION_EPILOGUE
34649 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34651 #undef TARGET_ENCODE_SECTION_INFO
34652 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34653 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34655 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34658 #undef TARGET_ASM_OPEN_PAREN
34659 #define TARGET_ASM_OPEN_PAREN ""
34660 #undef TARGET_ASM_CLOSE_PAREN
34661 #define TARGET_ASM_CLOSE_PAREN ""
34663 #undef TARGET_ASM_BYTE_OP
34664 #define TARGET_ASM_BYTE_OP ASM_BYTE
34666 #undef TARGET_ASM_ALIGNED_HI_OP
34667 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34668 #undef TARGET_ASM_ALIGNED_SI_OP
34669 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34671 #undef TARGET_ASM_ALIGNED_DI_OP
34672 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34675 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34676 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34678 #undef TARGET_ASM_UNALIGNED_HI_OP
34679 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34680 #undef TARGET_ASM_UNALIGNED_SI_OP
34681 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34682 #undef TARGET_ASM_UNALIGNED_DI_OP
34683 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34685 #undef TARGET_PRINT_OPERAND
34686 #define TARGET_PRINT_OPERAND ix86_print_operand
34687 #undef TARGET_PRINT_OPERAND_ADDRESS
34688 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34689 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34690 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34691 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34692 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34694 #undef TARGET_SCHED_INIT_GLOBAL
34695 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34696 #undef TARGET_SCHED_ADJUST_COST
34697 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34698 #undef TARGET_SCHED_ISSUE_RATE
34699 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34700 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34701 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34702 ia32_multipass_dfa_lookahead
34704 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34705 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34708 #undef TARGET_HAVE_TLS
34709 #define TARGET_HAVE_TLS true
34711 #undef TARGET_CANNOT_FORCE_CONST_MEM
34712 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34713 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34714 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34716 #undef TARGET_DELEGITIMIZE_ADDRESS
34717 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34719 #undef TARGET_MS_BITFIELD_LAYOUT_P
34720 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34723 #undef TARGET_BINDS_LOCAL_P
34724 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34726 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34727 #undef TARGET_BINDS_LOCAL_P
34728 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34731 #undef TARGET_ASM_OUTPUT_MI_THUNK
34732 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34733 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34734 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34736 #undef TARGET_ASM_FILE_START
34737 #define TARGET_ASM_FILE_START x86_file_start
34739 #undef TARGET_DEFAULT_TARGET_FLAGS
34740 #define TARGET_DEFAULT_TARGET_FLAGS \
34742 | TARGET_SUBTARGET_DEFAULT \
34743 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
34745 #undef TARGET_HANDLE_OPTION
34746 #define TARGET_HANDLE_OPTION ix86_handle_option
34748 #undef TARGET_OPTION_OVERRIDE
34749 #define TARGET_OPTION_OVERRIDE ix86_option_override
34750 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34751 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34752 #undef TARGET_OPTION_INIT_STRUCT
34753 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34755 #undef TARGET_REGISTER_MOVE_COST
34756 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34757 #undef TARGET_MEMORY_MOVE_COST
34758 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34759 #undef TARGET_RTX_COSTS
34760 #define TARGET_RTX_COSTS ix86_rtx_costs
34761 #undef TARGET_ADDRESS_COST
34762 #define TARGET_ADDRESS_COST ix86_address_cost
34764 #undef TARGET_FIXED_CONDITION_CODE_REGS
34765 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34766 #undef TARGET_CC_MODES_COMPATIBLE
34767 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34769 #undef TARGET_MACHINE_DEPENDENT_REORG
34770 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34772 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34773 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34775 #undef TARGET_BUILD_BUILTIN_VA_LIST
34776 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34778 #undef TARGET_ENUM_VA_LIST_P
34779 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34781 #undef TARGET_FN_ABI_VA_LIST
34782 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34784 #undef TARGET_CANONICAL_VA_LIST_TYPE
34785 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34787 #undef TARGET_EXPAND_BUILTIN_VA_START
34788 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34790 #undef TARGET_MD_ASM_CLOBBERS
34791 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34793 #undef TARGET_PROMOTE_PROTOTYPES
34794 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34795 #undef TARGET_STRUCT_VALUE_RTX
34796 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34797 #undef TARGET_SETUP_INCOMING_VARARGS
34798 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34799 #undef TARGET_MUST_PASS_IN_STACK
34800 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34801 #undef TARGET_FUNCTION_ARG_ADVANCE
34802 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34803 #undef TARGET_FUNCTION_ARG
34804 #define TARGET_FUNCTION_ARG ix86_function_arg
34805 #undef TARGET_FUNCTION_ARG_BOUNDARY
34806 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
34807 #undef TARGET_PASS_BY_REFERENCE
34808 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34809 #undef TARGET_INTERNAL_ARG_POINTER
34810 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34811 #undef TARGET_UPDATE_STACK_BOUNDARY
34812 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34813 #undef TARGET_GET_DRAP_RTX
34814 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34815 #undef TARGET_STRICT_ARGUMENT_NAMING
34816 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34817 #undef TARGET_STATIC_CHAIN
34818 #define TARGET_STATIC_CHAIN ix86_static_chain
34819 #undef TARGET_TRAMPOLINE_INIT
34820 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34821 #undef TARGET_RETURN_POPS_ARGS
34822 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34824 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34825 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34827 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34828 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34830 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34831 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34833 #undef TARGET_C_MODE_FOR_SUFFIX
34834 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34837 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34838 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34841 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34842 #undef TARGET_INSERT_ATTRIBUTES
34843 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34846 #undef TARGET_MANGLE_TYPE
34847 #define TARGET_MANGLE_TYPE ix86_mangle_type
34849 #undef TARGET_STACK_PROTECT_FAIL
34850 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34852 #undef TARGET_SUPPORTS_SPLIT_STACK
34853 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34855 #undef TARGET_FUNCTION_VALUE
34856 #define TARGET_FUNCTION_VALUE ix86_function_value
34858 #undef TARGET_FUNCTION_VALUE_REGNO_P
34859 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34861 #undef TARGET_SECONDARY_RELOAD
34862 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34864 #undef TARGET_PREFERRED_RELOAD_CLASS
34865 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34866 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34867 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34868 #undef TARGET_CLASS_LIKELY_SPILLED_P
34869 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34871 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34872 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34873 ix86_builtin_vectorization_cost
34874 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34875 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34876 ix86_vectorize_builtin_vec_perm
34877 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34878 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34879 ix86_vectorize_builtin_vec_perm_ok
34880 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34881 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34882 ix86_preferred_simd_mode
34883 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34884 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34885 ix86_autovectorize_vector_sizes
34887 #undef TARGET_SET_CURRENT_FUNCTION
34888 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34890 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34891 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34893 #undef TARGET_OPTION_SAVE
34894 #define TARGET_OPTION_SAVE ix86_function_specific_save
34896 #undef TARGET_OPTION_RESTORE
34897 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34899 #undef TARGET_OPTION_PRINT
34900 #define TARGET_OPTION_PRINT ix86_function_specific_print
34902 #undef TARGET_CAN_INLINE_P
34903 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34905 #undef TARGET_EXPAND_TO_RTL_HOOK
34906 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34908 #undef TARGET_LEGITIMATE_ADDRESS_P
34909 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34911 #undef TARGET_IRA_COVER_CLASSES
34912 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
34914 #undef TARGET_FRAME_POINTER_REQUIRED
34915 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34917 #undef TARGET_CAN_ELIMINATE
34918 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34920 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34921 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34923 #undef TARGET_ASM_CODE_END
34924 #define TARGET_ASM_CODE_END ix86_code_end
34926 #undef TARGET_CONDITIONAL_REGISTER_USAGE
34927 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
34929 struct gcc_target targetm = TARGET_INITIALIZER;
34931 #include "gt-i386.h"