1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 typedef struct block_info_def
62 /* TRUE if the upper 128bits of any AVX registers are live at exit. */
63 bool upper_128bits_set;
64 /* TRUE if block has been processed. */
68 #define BLOCK_INFO(B) ((block_info) (B)->aux)
70 enum call_avx256_state
72 /* Callee returns 256bit AVX register. */
73 callee_return_avx256 = -1,
74 /* Callee returns and passes 256bit AVX register. */
75 callee_return_pass_avx256,
76 /* Callee passes 256bit AVX register. */
78 /* Callee doesn't return nor passe 256bit AVX register, or no
79 256bit AVX register in function return. */
81 /* vzeroupper intrinsic. */
85 /* Check if a 256bit AVX register is referenced in stores. */
88 check_avx256_stores (rtx dest, const_rtx set, void *data)
91 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
92 || (GET_CODE (set) == SET
93 && REG_P (SET_SRC (set))
94 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
96 bool *upper_128bits_set = (bool *) data;
97 *upper_128bits_set = true;
101 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
102 in basic block BB. Delete it if upper 128bit AVX registers are
103 unused. If it isn't deleted, move it to just before a jump insn.
105 UPPER_128BITS_LIVE is TRUE if the upper 128bits of any AVX registers
106 are live at entry. */
109 move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
111 rtx curr_insn, next_insn, prev_insn, insn;
114 fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
115 bb->index, upper_128bits_set);
117 for (curr_insn = BB_HEAD (bb);
118 curr_insn && curr_insn != NEXT_INSN (BB_END (bb));
119 curr_insn = next_insn)
123 next_insn = NEXT_INSN (curr_insn);
125 if (!NONDEBUG_INSN_P (curr_insn))
128 /* Search for vzeroupper. */
129 insn = PATTERN (curr_insn);
130 if (GET_CODE (insn) == UNSPEC_VOLATILE
131 && XINT (insn, 1) == UNSPECV_VZEROUPPER)
133 /* Found vzeroupper. */
136 fprintf (dump_file, "Found vzeroupper:\n");
137 print_rtl_single (dump_file, curr_insn);
142 /* Check vzeroall intrinsic. */
143 if (GET_CODE (insn) == PARALLEL
144 && GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE
145 && XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL)
146 upper_128bits_set = false;
147 else if (!upper_128bits_set)
149 /* Check if upper 128bits of AVX registers are used. */
150 note_stores (insn, check_avx256_stores,
156 avx256 = INTVAL (XVECEXP (insn, 0, 0));
158 if (!upper_128bits_set)
160 /* Since the upper 128bits are cleared, callee must not pass
161 256bit AVX register. We only need to check if callee
162 returns 256bit AVX register. */
163 upper_128bits_set = avx256 == callee_return_avx256;
165 /* Remove unnecessary vzeroupper since upper 128bits are
169 fprintf (dump_file, "Delete redundant vzeroupper:\n");
170 print_rtl_single (dump_file, curr_insn);
172 delete_insn (curr_insn);
175 else if (avx256 == callee_return_pass_avx256
176 || avx256 == callee_pass_avx256)
178 /* Callee passes 256bit AVX register. Check if callee
179 returns 256bit AVX register. */
180 upper_128bits_set = avx256 == callee_return_pass_avx256;
182 /* Must remove vzeroupper since callee passes 256bit AVX
186 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
187 print_rtl_single (dump_file, curr_insn);
189 delete_insn (curr_insn);
193 /* Find the jump after vzeroupper. */
194 prev_insn = curr_insn;
195 if (avx256 == vzeroupper_intrinsic)
197 /* For vzeroupper intrinsic, check if there is another
199 insn = NEXT_INSN (curr_insn);
202 if (NONJUMP_INSN_P (insn)
203 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
204 && XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER)
209 "Delete redundant vzeroupper intrinsic:\n");
210 print_rtl_single (dump_file, curr_insn);
212 delete_insn (curr_insn);
217 if (JUMP_P (insn) || CALL_P (insn))
220 insn = NEXT_INSN (insn);
221 if (insn == NEXT_INSN (BB_END (bb)))
225 /* Continue if redundant vzeroupper intrinsic is deleted. */
231 /* Find the next jump/call. */
232 insn = NEXT_INSN (curr_insn);
235 if (JUMP_P (insn) || CALL_P (insn))
238 insn = NEXT_INSN (insn);
239 if (insn == NEXT_INSN (BB_END (bb)))
247 /* Keep vzeroupper. */
248 upper_128bits_set = false;
250 /* Also allow label as the next instruction. */
251 if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn))
254 /* Move vzeroupper before jump/call if neeeded. */
255 if (curr_insn != prev_insn)
257 reorder_insns_nobb (curr_insn, curr_insn, prev_insn);
260 fprintf (dump_file, "Move vzeroupper after:\n");
261 print_rtl_single (dump_file, prev_insn);
262 fprintf (dump_file, "before:\n");
263 print_rtl_single (dump_file, insn);
267 next_insn = NEXT_INSN (insn);
270 BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
273 fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
274 bb->index, upper_128bits_set);
277 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
278 in BLOCK and its predecessor blocks recursively. */
281 move_or_delete_vzeroupper_1 (basic_block block)
285 bool upper_128bits_set;
288 fprintf (dump_file, " Process BB [%i]: status: %d\n",
289 block->index, BLOCK_INFO (block)->done);
291 if (BLOCK_INFO (block)->done)
294 BLOCK_INFO (block)->done = true;
296 upper_128bits_set = false;
298 /* Process all predecessor edges of this block. */
299 FOR_EACH_EDGE (e, ei, block->preds)
303 move_or_delete_vzeroupper_1 (e->src);
304 if (BLOCK_INFO (e->src)->upper_128bits_set)
305 upper_128bits_set = true;
308 /* Process this block. */
309 move_or_delete_vzeroupper_2 (block, upper_128bits_set);
312 /* Go through the instruction stream looking for vzeroupper. Delete
313 it if upper 128bit AVX registers are unused. If it isn't deleted,
314 move it to just before a jump insn. */
317 move_or_delete_vzeroupper (void)
322 /* Set up block info for each basic block. */
323 alloc_aux_for_blocks (sizeof (struct block_info_def));
325 /* Process successor blocks of all entry points. */
327 fprintf (dump_file, "Process all entry points\n");
329 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
331 move_or_delete_vzeroupper_2 (e->dest,
332 cfun->machine->caller_pass_avx256_p);
333 BLOCK_INFO (e->dest)->done = true;
336 /* Process predecessor blocks of all exit points. */
338 fprintf (dump_file, "Process all exit points\n");
340 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
341 move_or_delete_vzeroupper_1 (e->src);
343 free_aux_for_blocks ();
346 static rtx legitimize_dllimport_symbol (rtx, bool);
348 #ifndef CHECK_STACK_LIMIT
349 #define CHECK_STACK_LIMIT (-1)
352 /* Return index of given mode in mult and division cost tables. */
353 #define MODE_INDEX(mode) \
354 ((mode) == QImode ? 0 \
355 : (mode) == HImode ? 1 \
356 : (mode) == SImode ? 2 \
357 : (mode) == DImode ? 3 \
360 /* Processor costs (relative to an add) */
361 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
362 #define COSTS_N_BYTES(N) ((N) * 2)
364 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
367 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
368 COSTS_N_BYTES (2), /* cost of an add instruction */
369 COSTS_N_BYTES (3), /* cost of a lea instruction */
370 COSTS_N_BYTES (2), /* variable shift costs */
371 COSTS_N_BYTES (3), /* constant shift costs */
372 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
373 COSTS_N_BYTES (3), /* HI */
374 COSTS_N_BYTES (3), /* SI */
375 COSTS_N_BYTES (3), /* DI */
376 COSTS_N_BYTES (5)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
379 COSTS_N_BYTES (3), /* HI */
380 COSTS_N_BYTES (3), /* SI */
381 COSTS_N_BYTES (3), /* DI */
382 COSTS_N_BYTES (5)}, /* other */
383 COSTS_N_BYTES (3), /* cost of movsx */
384 COSTS_N_BYTES (3), /* cost of movzx */
385 0, /* "large" insn */
387 2, /* cost for loading QImode using movzbl */
388 {2, 2, 2}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {2, 2, 2}, /* cost of storing integer registers */
392 2, /* cost of reg,reg fld/fst */
393 {2, 2, 2}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {2, 2, 2}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397 3, /* cost of moving MMX register */
398 {3, 3}, /* cost of loading MMX registers
399 in SImode and DImode */
400 {3, 3}, /* cost of storing MMX registers
401 in SImode and DImode */
402 3, /* cost of moving SSE register */
403 {3, 3, 3}, /* cost of loading SSE registers
404 in SImode, DImode and TImode */
405 {3, 3, 3}, /* cost of storing SSE registers
406 in SImode, DImode and TImode */
407 3, /* MMX or SSE register to integer */
408 0, /* size of l1 cache */
409 0, /* size of l2 cache */
410 0, /* size of prefetch block */
411 0, /* number of parallel prefetches */
413 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
414 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
415 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
416 COSTS_N_BYTES (2), /* cost of FABS instruction. */
417 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
418 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
419 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
420 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
421 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
422 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
423 1, /* scalar_stmt_cost. */
424 1, /* scalar load_cost. */
425 1, /* scalar_store_cost. */
426 1, /* vec_stmt_cost. */
427 1, /* vec_to_scalar_cost. */
428 1, /* scalar_to_vec_cost. */
429 1, /* vec_align_load_cost. */
430 1, /* vec_unalign_load_cost. */
431 1, /* vec_store_cost. */
432 1, /* cond_taken_branch_cost. */
433 1, /* cond_not_taken_branch_cost. */
436 /* Processor costs (relative to an add) */
438 struct processor_costs i386_cost = { /* 386 specific costs */
439 COSTS_N_INSNS (1), /* cost of an add instruction */
440 COSTS_N_INSNS (1), /* cost of a lea instruction */
441 COSTS_N_INSNS (3), /* variable shift costs */
442 COSTS_N_INSNS (2), /* constant shift costs */
443 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
444 COSTS_N_INSNS (6), /* HI */
445 COSTS_N_INSNS (6), /* SI */
446 COSTS_N_INSNS (6), /* DI */
447 COSTS_N_INSNS (6)}, /* other */
448 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
449 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
450 COSTS_N_INSNS (23), /* HI */
451 COSTS_N_INSNS (23), /* SI */
452 COSTS_N_INSNS (23), /* DI */
453 COSTS_N_INSNS (23)}, /* other */
454 COSTS_N_INSNS (3), /* cost of movsx */
455 COSTS_N_INSNS (2), /* cost of movzx */
456 15, /* "large" insn */
458 4, /* cost for loading QImode using movzbl */
459 {2, 4, 2}, /* cost of loading integer registers
460 in QImode, HImode and SImode.
461 Relative to reg-reg move (2). */
462 {2, 4, 2}, /* cost of storing integer registers */
463 2, /* cost of reg,reg fld/fst */
464 {8, 8, 8}, /* cost of loading fp registers
465 in SFmode, DFmode and XFmode */
466 {8, 8, 8}, /* cost of storing fp registers
467 in SFmode, DFmode and XFmode */
468 2, /* cost of moving MMX register */
469 {4, 8}, /* cost of loading MMX registers
470 in SImode and DImode */
471 {4, 8}, /* cost of storing MMX registers
472 in SImode and DImode */
473 2, /* cost of moving SSE register */
474 {4, 8, 16}, /* cost of loading SSE registers
475 in SImode, DImode and TImode */
476 {4, 8, 16}, /* cost of storing SSE registers
477 in SImode, DImode and TImode */
478 3, /* MMX or SSE register to integer */
479 0, /* size of l1 cache */
480 0, /* size of l2 cache */
481 0, /* size of prefetch block */
482 0, /* number of parallel prefetches */
484 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
485 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
486 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
487 COSTS_N_INSNS (22), /* cost of FABS instruction. */
488 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
489 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
490 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
491 DUMMY_STRINGOP_ALGS},
492 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
493 DUMMY_STRINGOP_ALGS},
494 1, /* scalar_stmt_cost. */
495 1, /* scalar load_cost. */
496 1, /* scalar_store_cost. */
497 1, /* vec_stmt_cost. */
498 1, /* vec_to_scalar_cost. */
499 1, /* scalar_to_vec_cost. */
500 1, /* vec_align_load_cost. */
501 2, /* vec_unalign_load_cost. */
502 1, /* vec_store_cost. */
503 3, /* cond_taken_branch_cost. */
504 1, /* cond_not_taken_branch_cost. */
508 struct processor_costs i486_cost = { /* 486 specific costs */
509 COSTS_N_INSNS (1), /* cost of an add instruction */
510 COSTS_N_INSNS (1), /* cost of a lea instruction */
511 COSTS_N_INSNS (3), /* variable shift costs */
512 COSTS_N_INSNS (2), /* constant shift costs */
513 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
514 COSTS_N_INSNS (12), /* HI */
515 COSTS_N_INSNS (12), /* SI */
516 COSTS_N_INSNS (12), /* DI */
517 COSTS_N_INSNS (12)}, /* other */
518 1, /* cost of multiply per each bit set */
519 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
520 COSTS_N_INSNS (40), /* HI */
521 COSTS_N_INSNS (40), /* SI */
522 COSTS_N_INSNS (40), /* DI */
523 COSTS_N_INSNS (40)}, /* other */
524 COSTS_N_INSNS (3), /* cost of movsx */
525 COSTS_N_INSNS (2), /* cost of movzx */
526 15, /* "large" insn */
528 4, /* cost for loading QImode using movzbl */
529 {2, 4, 2}, /* cost of loading integer registers
530 in QImode, HImode and SImode.
531 Relative to reg-reg move (2). */
532 {2, 4, 2}, /* cost of storing integer registers */
533 2, /* cost of reg,reg fld/fst */
534 {8, 8, 8}, /* cost of loading fp registers
535 in SFmode, DFmode and XFmode */
536 {8, 8, 8}, /* cost of storing fp registers
537 in SFmode, DFmode and XFmode */
538 2, /* cost of moving MMX register */
539 {4, 8}, /* cost of loading MMX registers
540 in SImode and DImode */
541 {4, 8}, /* cost of storing MMX registers
542 in SImode and DImode */
543 2, /* cost of moving SSE register */
544 {4, 8, 16}, /* cost of loading SSE registers
545 in SImode, DImode and TImode */
546 {4, 8, 16}, /* cost of storing SSE registers
547 in SImode, DImode and TImode */
548 3, /* MMX or SSE register to integer */
549 4, /* size of l1 cache. 486 has 8kB cache
550 shared for code and data, so 4kB is
551 not really precise. */
552 4, /* size of l2 cache */
553 0, /* size of prefetch block */
554 0, /* number of parallel prefetches */
556 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (3), /* cost of FABS instruction. */
560 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
562 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
563 DUMMY_STRINGOP_ALGS},
564 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs pentium_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (1), /* cost of a lea instruction */
583 COSTS_N_INSNS (4), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (11), /* HI */
587 COSTS_N_INSNS (11), /* SI */
588 COSTS_N_INSNS (11), /* DI */
589 COSTS_N_INSNS (11)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (25), /* HI */
593 COSTS_N_INSNS (25), /* SI */
594 COSTS_N_INSNS (25), /* DI */
595 COSTS_N_INSNS (25)}, /* other */
596 COSTS_N_INSNS (3), /* cost of movsx */
597 COSTS_N_INSNS (2), /* cost of movzx */
598 8, /* "large" insn */
600 6, /* cost for loading QImode using movzbl */
601 {2, 4, 2}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {2, 4, 2}, /* cost of storing integer registers */
605 2, /* cost of reg,reg fld/fst */
606 {2, 2, 6}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {4, 4, 6}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 8, /* cost of moving MMX register */
611 {8, 8}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {8, 8}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 8, 16}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 8, 16}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 3, /* MMX or SSE register to integer */
621 8, /* size of l1 cache. */
622 8, /* size of l2 cache */
623 0, /* size of prefetch block */
624 0, /* number of parallel prefetches */
626 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (1), /* cost of FABS instruction. */
630 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
632 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
633 DUMMY_STRINGOP_ALGS},
634 {{libcall, {{-1, rep_prefix_4_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
650 struct processor_costs pentiumpro_cost = {
651 COSTS_N_INSNS (1), /* cost of an add instruction */
652 COSTS_N_INSNS (1), /* cost of a lea instruction */
653 COSTS_N_INSNS (1), /* variable shift costs */
654 COSTS_N_INSNS (1), /* constant shift costs */
655 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
656 COSTS_N_INSNS (4), /* HI */
657 COSTS_N_INSNS (4), /* SI */
658 COSTS_N_INSNS (4), /* DI */
659 COSTS_N_INSNS (4)}, /* other */
660 0, /* cost of multiply per each bit set */
661 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
662 COSTS_N_INSNS (17), /* HI */
663 COSTS_N_INSNS (17), /* SI */
664 COSTS_N_INSNS (17), /* DI */
665 COSTS_N_INSNS (17)}, /* other */
666 COSTS_N_INSNS (1), /* cost of movsx */
667 COSTS_N_INSNS (1), /* cost of movzx */
668 8, /* "large" insn */
670 2, /* cost for loading QImode using movzbl */
671 {4, 4, 4}, /* cost of loading integer registers
672 in QImode, HImode and SImode.
673 Relative to reg-reg move (2). */
674 {2, 2, 2}, /* cost of storing integer registers */
675 2, /* cost of reg,reg fld/fst */
676 {2, 2, 6}, /* cost of loading fp registers
677 in SFmode, DFmode and XFmode */
678 {4, 4, 6}, /* cost of storing fp registers
679 in SFmode, DFmode and XFmode */
680 2, /* cost of moving MMX register */
681 {2, 2}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {2, 2}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {2, 2, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {2, 2, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 3, /* MMX or SSE register to integer */
691 8, /* size of l1 cache. */
692 256, /* size of l2 cache */
693 32, /* size of prefetch block */
694 6, /* number of parallel prefetches */
696 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
697 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
698 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
699 COSTS_N_INSNS (2), /* cost of FABS instruction. */
700 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
701 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
702 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
703 (we ensure the alignment). For small blocks inline loop is still a
704 noticeable win, for bigger blocks either rep movsl or rep movsb is
705 way to go. Rep movsb has apparently more expensive startup time in CPU,
706 but after 4K the difference is down in the noise. */
707 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
708 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{1024, unrolled_loop},
711 {8192, rep_prefix_4_byte}, {-1, libcall}}},
712 DUMMY_STRINGOP_ALGS},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs geode_cost = {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (2), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (4), /* HI */
734 COSTS_N_INSNS (7), /* SI */
735 COSTS_N_INSNS (7), /* DI */
736 COSTS_N_INSNS (7)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (23), /* HI */
740 COSTS_N_INSNS (39), /* SI */
741 COSTS_N_INSNS (39), /* DI */
742 COSTS_N_INSNS (39)}, /* other */
743 COSTS_N_INSNS (1), /* cost of movsx */
744 COSTS_N_INSNS (1), /* cost of movzx */
745 8, /* "large" insn */
747 1, /* cost for loading QImode using movzbl */
748 {1, 1, 1}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {1, 1, 1}, /* cost of storing integer registers */
752 1, /* cost of reg,reg fld/fst */
753 {1, 1, 1}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 6, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
758 1, /* cost of moving MMX register */
759 {1, 1}, /* cost of loading MMX registers
760 in SImode and DImode */
761 {1, 1}, /* cost of storing MMX registers
762 in SImode and DImode */
763 1, /* cost of moving SSE register */
764 {1, 1, 1}, /* cost of loading SSE registers
765 in SImode, DImode and TImode */
766 {1, 1, 1}, /* cost of storing SSE registers
767 in SImode, DImode and TImode */
768 1, /* MMX or SSE register to integer */
769 64, /* size of l1 cache. */
770 128, /* size of l2 cache. */
771 32, /* size of prefetch block */
772 1, /* number of parallel prefetches */
774 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
775 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
776 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
777 COSTS_N_INSNS (1), /* cost of FABS instruction. */
778 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
779 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
780 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
781 DUMMY_STRINGOP_ALGS},
782 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
783 DUMMY_STRINGOP_ALGS},
784 1, /* scalar_stmt_cost. */
785 1, /* scalar load_cost. */
786 1, /* scalar_store_cost. */
787 1, /* vec_stmt_cost. */
788 1, /* vec_to_scalar_cost. */
789 1, /* scalar_to_vec_cost. */
790 1, /* vec_align_load_cost. */
791 2, /* vec_unalign_load_cost. */
792 1, /* vec_store_cost. */
793 3, /* cond_taken_branch_cost. */
794 1, /* cond_not_taken_branch_cost. */
798 struct processor_costs k6_cost = {
799 COSTS_N_INSNS (1), /* cost of an add instruction */
800 COSTS_N_INSNS (2), /* cost of a lea instruction */
801 COSTS_N_INSNS (1), /* variable shift costs */
802 COSTS_N_INSNS (1), /* constant shift costs */
803 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
804 COSTS_N_INSNS (3), /* HI */
805 COSTS_N_INSNS (3), /* SI */
806 COSTS_N_INSNS (3), /* DI */
807 COSTS_N_INSNS (3)}, /* other */
808 0, /* cost of multiply per each bit set */
809 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
810 COSTS_N_INSNS (18), /* HI */
811 COSTS_N_INSNS (18), /* SI */
812 COSTS_N_INSNS (18), /* DI */
813 COSTS_N_INSNS (18)}, /* other */
814 COSTS_N_INSNS (2), /* cost of movsx */
815 COSTS_N_INSNS (2), /* cost of movzx */
816 8, /* "large" insn */
818 3, /* cost for loading QImode using movzbl */
819 {4, 5, 4}, /* cost of loading integer registers
820 in QImode, HImode and SImode.
821 Relative to reg-reg move (2). */
822 {2, 3, 2}, /* cost of storing integer registers */
823 4, /* cost of reg,reg fld/fst */
824 {6, 6, 6}, /* cost of loading fp registers
825 in SFmode, DFmode and XFmode */
826 {4, 4, 4}, /* cost of storing fp registers
827 in SFmode, DFmode and XFmode */
828 2, /* cost of moving MMX register */
829 {2, 2}, /* cost of loading MMX registers
830 in SImode and DImode */
831 {2, 2}, /* cost of storing MMX registers
832 in SImode and DImode */
833 2, /* cost of moving SSE register */
834 {2, 2, 8}, /* cost of loading SSE registers
835 in SImode, DImode and TImode */
836 {2, 2, 8}, /* cost of storing SSE registers
837 in SImode, DImode and TImode */
838 6, /* MMX or SSE register to integer */
839 32, /* size of l1 cache. */
840 32, /* size of l2 cache. Some models
841 have integrated l2 cache, but
842 optimizing for k6 is not important
843 enough to worry about that. */
844 32, /* size of prefetch block */
845 1, /* number of parallel prefetches */
847 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
848 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
849 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
850 COSTS_N_INSNS (2), /* cost of FABS instruction. */
851 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
852 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
853 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
854 DUMMY_STRINGOP_ALGS},
855 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 1, /* scalar_stmt_cost. */
858 1, /* scalar load_cost. */
859 1, /* scalar_store_cost. */
860 1, /* vec_stmt_cost. */
861 1, /* vec_to_scalar_cost. */
862 1, /* scalar_to_vec_cost. */
863 1, /* vec_align_load_cost. */
864 2, /* vec_unalign_load_cost. */
865 1, /* vec_store_cost. */
866 3, /* cond_taken_branch_cost. */
867 1, /* cond_not_taken_branch_cost. */
871 struct processor_costs athlon_cost = {
872 COSTS_N_INSNS (1), /* cost of an add instruction */
873 COSTS_N_INSNS (2), /* cost of a lea instruction */
874 COSTS_N_INSNS (1), /* variable shift costs */
875 COSTS_N_INSNS (1), /* constant shift costs */
876 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
877 COSTS_N_INSNS (5), /* HI */
878 COSTS_N_INSNS (5), /* SI */
879 COSTS_N_INSNS (5), /* DI */
880 COSTS_N_INSNS (5)}, /* other */
881 0, /* cost of multiply per each bit set */
882 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
883 COSTS_N_INSNS (26), /* HI */
884 COSTS_N_INSNS (42), /* SI */
885 COSTS_N_INSNS (74), /* DI */
886 COSTS_N_INSNS (74)}, /* other */
887 COSTS_N_INSNS (1), /* cost of movsx */
888 COSTS_N_INSNS (1), /* cost of movzx */
889 8, /* "large" insn */
891 4, /* cost for loading QImode using movzbl */
892 {3, 4, 3}, /* cost of loading integer registers
893 in QImode, HImode and SImode.
894 Relative to reg-reg move (2). */
895 {3, 4, 3}, /* cost of storing integer registers */
896 4, /* cost of reg,reg fld/fst */
897 {4, 4, 12}, /* cost of loading fp registers
898 in SFmode, DFmode and XFmode */
899 {6, 6, 8}, /* cost of storing fp registers
900 in SFmode, DFmode and XFmode */
901 2, /* cost of moving MMX register */
902 {4, 4}, /* cost of loading MMX registers
903 in SImode and DImode */
904 {4, 4}, /* cost of storing MMX registers
905 in SImode and DImode */
906 2, /* cost of moving SSE register */
907 {4, 4, 6}, /* cost of loading SSE registers
908 in SImode, DImode and TImode */
909 {4, 4, 5}, /* cost of storing SSE registers
910 in SImode, DImode and TImode */
911 5, /* MMX or SSE register to integer */
912 64, /* size of l1 cache. */
913 256, /* size of l2 cache. */
914 64, /* size of prefetch block */
915 6, /* number of parallel prefetches */
917 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
918 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
919 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
920 COSTS_N_INSNS (2), /* cost of FABS instruction. */
921 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
922 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
923 /* For some reason, Athlon deals better with REP prefix (relative to loops)
924 compared to K8. Alignment becomes important after 8 bytes for memcpy and
925 128 bytes for memset. */
926 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k8_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (4), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (4), /* DI */
953 COSTS_N_INSNS (5)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (26), /* HI */
957 COSTS_N_INSNS (42), /* SI */
958 COSTS_N_INSNS (74), /* DI */
959 COSTS_N_INSNS (74)}, /* other */
960 COSTS_N_INSNS (1), /* cost of movsx */
961 COSTS_N_INSNS (1), /* cost of movzx */
962 8, /* "large" insn */
964 4, /* cost for loading QImode using movzbl */
965 {3, 4, 3}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {3, 4, 3}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {4, 4, 12}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {6, 6, 8}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {3, 3}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {4, 4}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {4, 3, 6}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {4, 4, 5}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 5, /* MMX or SSE register to integer */
985 64, /* size of l1 cache. */
986 512, /* size of l2 cache. */
987 64, /* size of prefetch block */
988 /* New AMD processors never drop prefetches; if they cannot be performed
989 immediately, they are queued. We set number of simultaneous prefetches
990 to a large constant to reflect this (it probably is not a good idea not
991 to limit number of prefetches at all, as their execution also takes some
993 100, /* number of parallel prefetches */
995 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
996 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
997 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
998 COSTS_N_INSNS (2), /* cost of FABS instruction. */
999 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1000 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1001 /* K8 has optimized REP instruction for medium sized blocks, but for very
1002 small blocks it is better to use loop. For large blocks, libcall can
1003 do nontemporary accesses and beat inline considerably. */
1004 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1005 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1006 {{libcall, {{8, loop}, {24, unrolled_loop},
1007 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1008 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1009 4, /* scalar_stmt_cost. */
1010 2, /* scalar load_cost. */
1011 2, /* scalar_store_cost. */
1012 5, /* vec_stmt_cost. */
1013 0, /* vec_to_scalar_cost. */
1014 2, /* scalar_to_vec_cost. */
1015 2, /* vec_align_load_cost. */
1016 3, /* vec_unalign_load_cost. */
1017 3, /* vec_store_cost. */
1018 3, /* cond_taken_branch_cost. */
1019 2, /* cond_not_taken_branch_cost. */
1022 struct processor_costs amdfam10_cost = {
1023 COSTS_N_INSNS (1), /* cost of an add instruction */
1024 COSTS_N_INSNS (2), /* cost of a lea instruction */
1025 COSTS_N_INSNS (1), /* variable shift costs */
1026 COSTS_N_INSNS (1), /* constant shift costs */
1027 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1028 COSTS_N_INSNS (4), /* HI */
1029 COSTS_N_INSNS (3), /* SI */
1030 COSTS_N_INSNS (4), /* DI */
1031 COSTS_N_INSNS (5)}, /* other */
1032 0, /* cost of multiply per each bit set */
1033 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1034 COSTS_N_INSNS (35), /* HI */
1035 COSTS_N_INSNS (51), /* SI */
1036 COSTS_N_INSNS (83), /* DI */
1037 COSTS_N_INSNS (83)}, /* other */
1038 COSTS_N_INSNS (1), /* cost of movsx */
1039 COSTS_N_INSNS (1), /* cost of movzx */
1040 8, /* "large" insn */
1042 4, /* cost for loading QImode using movzbl */
1043 {3, 4, 3}, /* cost of loading integer registers
1044 in QImode, HImode and SImode.
1045 Relative to reg-reg move (2). */
1046 {3, 4, 3}, /* cost of storing integer registers */
1047 4, /* cost of reg,reg fld/fst */
1048 {4, 4, 12}, /* cost of loading fp registers
1049 in SFmode, DFmode and XFmode */
1050 {6, 6, 8}, /* cost of storing fp registers
1051 in SFmode, DFmode and XFmode */
1052 2, /* cost of moving MMX register */
1053 {3, 3}, /* cost of loading MMX registers
1054 in SImode and DImode */
1055 {4, 4}, /* cost of storing MMX registers
1056 in SImode and DImode */
1057 2, /* cost of moving SSE register */
1058 {4, 4, 3}, /* cost of loading SSE registers
1059 in SImode, DImode and TImode */
1060 {4, 4, 5}, /* cost of storing SSE registers
1061 in SImode, DImode and TImode */
1062 3, /* MMX or SSE register to integer */
1064 MOVD reg64, xmmreg Double FSTORE 4
1065 MOVD reg32, xmmreg Double FSTORE 4
1067 MOVD reg64, xmmreg Double FADD 3
1069 MOVD reg32, xmmreg Double FADD 3
1071 64, /* size of l1 cache. */
1072 512, /* size of l2 cache. */
1073 64, /* size of prefetch block */
1074 /* New AMD processors never drop prefetches; if they cannot be performed
1075 immediately, they are queued. We set number of simultaneous prefetches
1076 to a large constant to reflect this (it probably is not a good idea not
1077 to limit number of prefetches at all, as their execution also takes some
1079 100, /* number of parallel prefetches */
1080 2, /* Branch cost */
1081 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1082 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1083 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1084 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1085 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1086 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1088 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1089 very small blocks it is better to use loop. For large blocks, libcall can
1090 do nontemporary accesses and beat inline considerably. */
1091 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1093 {{libcall, {{8, loop}, {24, unrolled_loop},
1094 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1095 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 4, /* scalar_stmt_cost. */
1097 2, /* scalar load_cost. */
1098 2, /* scalar_store_cost. */
1099 6, /* vec_stmt_cost. */
1100 0, /* vec_to_scalar_cost. */
1101 2, /* scalar_to_vec_cost. */
1102 2, /* vec_align_load_cost. */
1103 2, /* vec_unalign_load_cost. */
1104 2, /* vec_store_cost. */
1105 2, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1109 struct processor_costs bdver1_cost = {
1110 COSTS_N_INSNS (1), /* cost of an add instruction */
1111 COSTS_N_INSNS (1), /* cost of a lea instruction */
1112 COSTS_N_INSNS (1), /* variable shift costs */
1113 COSTS_N_INSNS (1), /* constant shift costs */
1114 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1115 COSTS_N_INSNS (4), /* HI */
1116 COSTS_N_INSNS (4), /* SI */
1117 COSTS_N_INSNS (6), /* DI */
1118 COSTS_N_INSNS (6)}, /* other */
1119 0, /* cost of multiply per each bit set */
1120 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1121 COSTS_N_INSNS (35), /* HI */
1122 COSTS_N_INSNS (51), /* SI */
1123 COSTS_N_INSNS (83), /* DI */
1124 COSTS_N_INSNS (83)}, /* other */
1125 COSTS_N_INSNS (1), /* cost of movsx */
1126 COSTS_N_INSNS (1), /* cost of movzx */
1127 8, /* "large" insn */
1129 4, /* cost for loading QImode using movzbl */
1130 {5, 5, 4}, /* cost of loading integer registers
1131 in QImode, HImode and SImode.
1132 Relative to reg-reg move (2). */
1133 {4, 4, 4}, /* cost of storing integer registers */
1134 2, /* cost of reg,reg fld/fst */
1135 {5, 5, 12}, /* cost of loading fp registers
1136 in SFmode, DFmode and XFmode */
1137 {4, 4, 8}, /* cost of storing fp registers
1138 in SFmode, DFmode and XFmode */
1139 2, /* cost of moving MMX register */
1140 {4, 4}, /* cost of loading MMX registers
1141 in SImode and DImode */
1142 {4, 4}, /* cost of storing MMX registers
1143 in SImode and DImode */
1144 2, /* cost of moving SSE register */
1145 {4, 4, 4}, /* cost of loading SSE registers
1146 in SImode, DImode and TImode */
1147 {4, 4, 4}, /* cost of storing SSE registers
1148 in SImode, DImode and TImode */
1149 2, /* MMX or SSE register to integer */
1151 MOVD reg64, xmmreg Double FSTORE 4
1152 MOVD reg32, xmmreg Double FSTORE 4
1154 MOVD reg64, xmmreg Double FADD 3
1156 MOVD reg32, xmmreg Double FADD 3
1158 16, /* size of l1 cache. */
1159 2048, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 /* New AMD processors never drop prefetches; if they cannot be performed
1162 immediately, they are queued. We set number of simultaneous prefetches
1163 to a large constant to reflect this (it probably is not a good idea not
1164 to limit number of prefetches at all, as their execution also takes some
1166 100, /* number of parallel prefetches */
1167 2, /* Branch cost */
1168 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1169 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1170 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1171 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1172 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1173 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1175 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1176 very small blocks it is better to use loop. For large blocks, libcall
1177 can do nontemporary accesses and beat inline considerably. */
1178 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1179 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1180 {{libcall, {{8, loop}, {24, unrolled_loop},
1181 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1182 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1183 6, /* scalar_stmt_cost. */
1184 4, /* scalar load_cost. */
1185 4, /* scalar_store_cost. */
1186 6, /* vec_stmt_cost. */
1187 0, /* vec_to_scalar_cost. */
1188 2, /* scalar_to_vec_cost. */
1189 4, /* vec_align_load_cost. */
1190 4, /* vec_unalign_load_cost. */
1191 4, /* vec_store_cost. */
1192 2, /* cond_taken_branch_cost. */
1193 1, /* cond_not_taken_branch_cost. */
1197 struct processor_costs pentium4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (3), /* cost of a lea instruction */
1200 COSTS_N_INSNS (4), /* variable shift costs */
1201 COSTS_N_INSNS (4), /* constant shift costs */
1202 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (15), /* HI */
1204 COSTS_N_INSNS (15), /* SI */
1205 COSTS_N_INSNS (15), /* DI */
1206 COSTS_N_INSNS (15)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (56), /* HI */
1210 COSTS_N_INSNS (56), /* SI */
1211 COSTS_N_INSNS (56), /* DI */
1212 COSTS_N_INSNS (56)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 16, /* "large" insn */
1217 2, /* cost for loading QImode using movzbl */
1218 {4, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {2, 3, 2}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {2, 2, 6}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 6}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {2, 2}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {2, 2}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 12, /* cost of moving SSE register */
1233 {12, 12, 12}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {2, 2, 8}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 10, /* MMX or SSE register to integer */
1238 8, /* size of l1 cache. */
1239 256, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 6, /* number of parallel prefetches */
1242 2, /* Branch cost */
1243 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1244 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1245 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1246 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1247 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1248 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1249 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1250 DUMMY_STRINGOP_ALGS},
1251 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1253 DUMMY_STRINGOP_ALGS},
1254 1, /* scalar_stmt_cost. */
1255 1, /* scalar load_cost. */
1256 1, /* scalar_store_cost. */
1257 1, /* vec_stmt_cost. */
1258 1, /* vec_to_scalar_cost. */
1259 1, /* scalar_to_vec_cost. */
1260 1, /* vec_align_load_cost. */
1261 2, /* vec_unalign_load_cost. */
1262 1, /* vec_store_cost. */
1263 3, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1268 struct processor_costs nocona_cost = {
1269 COSTS_N_INSNS (1), /* cost of an add instruction */
1270 COSTS_N_INSNS (1), /* cost of a lea instruction */
1271 COSTS_N_INSNS (1), /* variable shift costs */
1272 COSTS_N_INSNS (1), /* constant shift costs */
1273 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1274 COSTS_N_INSNS (10), /* HI */
1275 COSTS_N_INSNS (10), /* SI */
1276 COSTS_N_INSNS (10), /* DI */
1277 COSTS_N_INSNS (10)}, /* other */
1278 0, /* cost of multiply per each bit set */
1279 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1280 COSTS_N_INSNS (66), /* HI */
1281 COSTS_N_INSNS (66), /* SI */
1282 COSTS_N_INSNS (66), /* DI */
1283 COSTS_N_INSNS (66)}, /* other */
1284 COSTS_N_INSNS (1), /* cost of movsx */
1285 COSTS_N_INSNS (1), /* cost of movzx */
1286 16, /* "large" insn */
1287 17, /* MOVE_RATIO */
1288 4, /* cost for loading QImode using movzbl */
1289 {4, 4, 4}, /* cost of loading integer registers
1290 in QImode, HImode and SImode.
1291 Relative to reg-reg move (2). */
1292 {4, 4, 4}, /* cost of storing integer registers */
1293 3, /* cost of reg,reg fld/fst */
1294 {12, 12, 12}, /* cost of loading fp registers
1295 in SFmode, DFmode and XFmode */
1296 {4, 4, 4}, /* cost of storing fp registers
1297 in SFmode, DFmode and XFmode */
1298 6, /* cost of moving MMX register */
1299 {12, 12}, /* cost of loading MMX registers
1300 in SImode and DImode */
1301 {12, 12}, /* cost of storing MMX registers
1302 in SImode and DImode */
1303 6, /* cost of moving SSE register */
1304 {12, 12, 12}, /* cost of loading SSE registers
1305 in SImode, DImode and TImode */
1306 {12, 12, 12}, /* cost of storing SSE registers
1307 in SImode, DImode and TImode */
1308 8, /* MMX or SSE register to integer */
1309 8, /* size of l1 cache. */
1310 1024, /* size of l2 cache. */
1311 128, /* size of prefetch block */
1312 8, /* number of parallel prefetches */
1313 1, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1320 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1321 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1322 {100000, unrolled_loop}, {-1, libcall}}}},
1323 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1325 {libcall, {{24, loop}, {64, unrolled_loop},
1326 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 1, /* scalar_stmt_cost. */
1328 1, /* scalar load_cost. */
1329 1, /* scalar_store_cost. */
1330 1, /* vec_stmt_cost. */
1331 1, /* vec_to_scalar_cost. */
1332 1, /* scalar_to_vec_cost. */
1333 1, /* vec_align_load_cost. */
1334 2, /* vec_unalign_load_cost. */
1335 1, /* vec_store_cost. */
1336 3, /* cond_taken_branch_cost. */
1337 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs core2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (3), /* HI */
1348 COSTS_N_INSNS (3), /* SI */
1349 COSTS_N_INSNS (3), /* DI */
1350 COSTS_N_INSNS (3)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (22), /* HI */
1354 COSTS_N_INSNS (22), /* SI */
1355 COSTS_N_INSNS (22), /* DI */
1356 COSTS_N_INSNS (22)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 16, /* MOVE_RATIO */
1361 2, /* cost for loading QImode using movzbl */
1362 {6, 6, 6}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {6, 6, 6}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 4}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {6, 6}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {6, 6, 6}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 32, /* size of l1 cache. */
1383 2048, /* size of l2 cache. */
1384 128, /* size of prefetch block */
1385 8, /* number of parallel prefetches */
1386 3, /* Branch cost */
1387 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1388 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1389 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1390 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1391 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1392 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1393 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1394 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1395 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1396 {{libcall, {{8, loop}, {15, unrolled_loop},
1397 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1398 {libcall, {{24, loop}, {32, unrolled_loop},
1399 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1400 1, /* scalar_stmt_cost. */
1401 1, /* scalar load_cost. */
1402 1, /* scalar_store_cost. */
1403 1, /* vec_stmt_cost. */
1404 1, /* vec_to_scalar_cost. */
1405 1, /* scalar_to_vec_cost. */
1406 1, /* vec_align_load_cost. */
1407 2, /* vec_unalign_load_cost. */
1408 1, /* vec_store_cost. */
1409 3, /* cond_taken_branch_cost. */
1410 1, /* cond_not_taken_branch_cost. */
1414 struct processor_costs atom_cost = {
1415 COSTS_N_INSNS (1), /* cost of an add instruction */
1416 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1417 COSTS_N_INSNS (1), /* variable shift costs */
1418 COSTS_N_INSNS (1), /* constant shift costs */
1419 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1420 COSTS_N_INSNS (4), /* HI */
1421 COSTS_N_INSNS (3), /* SI */
1422 COSTS_N_INSNS (4), /* DI */
1423 COSTS_N_INSNS (2)}, /* other */
1424 0, /* cost of multiply per each bit set */
1425 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1426 COSTS_N_INSNS (26), /* HI */
1427 COSTS_N_INSNS (42), /* SI */
1428 COSTS_N_INSNS (74), /* DI */
1429 COSTS_N_INSNS (74)}, /* other */
1430 COSTS_N_INSNS (1), /* cost of movsx */
1431 COSTS_N_INSNS (1), /* cost of movzx */
1432 8, /* "large" insn */
1433 17, /* MOVE_RATIO */
1434 2, /* cost for loading QImode using movzbl */
1435 {4, 4, 4}, /* cost of loading integer registers
1436 in QImode, HImode and SImode.
1437 Relative to reg-reg move (2). */
1438 {4, 4, 4}, /* cost of storing integer registers */
1439 4, /* cost of reg,reg fld/fst */
1440 {12, 12, 12}, /* cost of loading fp registers
1441 in SFmode, DFmode and XFmode */
1442 {6, 6, 8}, /* cost of storing fp registers
1443 in SFmode, DFmode and XFmode */
1444 2, /* cost of moving MMX register */
1445 {8, 8}, /* cost of loading MMX registers
1446 in SImode and DImode */
1447 {8, 8}, /* cost of storing MMX registers
1448 in SImode and DImode */
1449 2, /* cost of moving SSE register */
1450 {8, 8, 8}, /* cost of loading SSE registers
1451 in SImode, DImode and TImode */
1452 {8, 8, 8}, /* cost of storing SSE registers
1453 in SImode, DImode and TImode */
1454 5, /* MMX or SSE register to integer */
1455 32, /* size of l1 cache. */
1456 256, /* size of l2 cache. */
1457 64, /* size of prefetch block */
1458 6, /* number of parallel prefetches */
1459 3, /* Branch cost */
1460 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1461 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1462 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1463 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1464 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1465 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1466 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1467 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1468 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1469 {{libcall, {{8, loop}, {15, unrolled_loop},
1470 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1471 {libcall, {{24, loop}, {32, unrolled_loop},
1472 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1473 1, /* scalar_stmt_cost. */
1474 1, /* scalar load_cost. */
1475 1, /* scalar_store_cost. */
1476 1, /* vec_stmt_cost. */
1477 1, /* vec_to_scalar_cost. */
1478 1, /* scalar_to_vec_cost. */
1479 1, /* vec_align_load_cost. */
1480 2, /* vec_unalign_load_cost. */
1481 1, /* vec_store_cost. */
1482 3, /* cond_taken_branch_cost. */
1483 1, /* cond_not_taken_branch_cost. */
1486 /* Generic64 should produce code tuned for Nocona and K8. */
1488 struct processor_costs generic64_cost = {
1489 COSTS_N_INSNS (1), /* cost of an add instruction */
1490 /* On all chips taken into consideration lea is 2 cycles and more. With
1491 this cost however our current implementation of synth_mult results in
1492 use of unnecessary temporary registers causing regression on several
1493 SPECfp benchmarks. */
1494 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1495 COSTS_N_INSNS (1), /* variable shift costs */
1496 COSTS_N_INSNS (1), /* constant shift costs */
1497 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1498 COSTS_N_INSNS (4), /* HI */
1499 COSTS_N_INSNS (3), /* SI */
1500 COSTS_N_INSNS (4), /* DI */
1501 COSTS_N_INSNS (2)}, /* other */
1502 0, /* cost of multiply per each bit set */
1503 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1504 COSTS_N_INSNS (26), /* HI */
1505 COSTS_N_INSNS (42), /* SI */
1506 COSTS_N_INSNS (74), /* DI */
1507 COSTS_N_INSNS (74)}, /* other */
1508 COSTS_N_INSNS (1), /* cost of movsx */
1509 COSTS_N_INSNS (1), /* cost of movzx */
1510 8, /* "large" insn */
1511 17, /* MOVE_RATIO */
1512 4, /* cost for loading QImode using movzbl */
1513 {4, 4, 4}, /* cost of loading integer registers
1514 in QImode, HImode and SImode.
1515 Relative to reg-reg move (2). */
1516 {4, 4, 4}, /* cost of storing integer registers */
1517 4, /* cost of reg,reg fld/fst */
1518 {12, 12, 12}, /* cost of loading fp registers
1519 in SFmode, DFmode and XFmode */
1520 {6, 6, 8}, /* cost of storing fp registers
1521 in SFmode, DFmode and XFmode */
1522 2, /* cost of moving MMX register */
1523 {8, 8}, /* cost of loading MMX registers
1524 in SImode and DImode */
1525 {8, 8}, /* cost of storing MMX registers
1526 in SImode and DImode */
1527 2, /* cost of moving SSE register */
1528 {8, 8, 8}, /* cost of loading SSE registers
1529 in SImode, DImode and TImode */
1530 {8, 8, 8}, /* cost of storing SSE registers
1531 in SImode, DImode and TImode */
1532 5, /* MMX or SSE register to integer */
1533 32, /* size of l1 cache. */
1534 512, /* size of l2 cache. */
1535 64, /* size of prefetch block */
1536 6, /* number of parallel prefetches */
1537 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1538 value is increased to perhaps more appropriate value of 5. */
1539 3, /* Branch cost */
1540 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1541 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1542 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1543 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1544 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1545 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1546 {DUMMY_STRINGOP_ALGS,
1547 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1548 {DUMMY_STRINGOP_ALGS,
1549 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1550 1, /* scalar_stmt_cost. */
1551 1, /* scalar load_cost. */
1552 1, /* scalar_store_cost. */
1553 1, /* vec_stmt_cost. */
1554 1, /* vec_to_scalar_cost. */
1555 1, /* scalar_to_vec_cost. */
1556 1, /* vec_align_load_cost. */
1557 2, /* vec_unalign_load_cost. */
1558 1, /* vec_store_cost. */
1559 3, /* cond_taken_branch_cost. */
1560 1, /* cond_not_taken_branch_cost. */
1563 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1566 struct processor_costs generic32_cost = {
1567 COSTS_N_INSNS (1), /* cost of an add instruction */
1568 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1569 COSTS_N_INSNS (1), /* variable shift costs */
1570 COSTS_N_INSNS (1), /* constant shift costs */
1571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1572 COSTS_N_INSNS (4), /* HI */
1573 COSTS_N_INSNS (3), /* SI */
1574 COSTS_N_INSNS (4), /* DI */
1575 COSTS_N_INSNS (2)}, /* other */
1576 0, /* cost of multiply per each bit set */
1577 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1578 COSTS_N_INSNS (26), /* HI */
1579 COSTS_N_INSNS (42), /* SI */
1580 COSTS_N_INSNS (74), /* DI */
1581 COSTS_N_INSNS (74)}, /* other */
1582 COSTS_N_INSNS (1), /* cost of movsx */
1583 COSTS_N_INSNS (1), /* cost of movzx */
1584 8, /* "large" insn */
1585 17, /* MOVE_RATIO */
1586 4, /* cost for loading QImode using movzbl */
1587 {4, 4, 4}, /* cost of loading integer registers
1588 in QImode, HImode and SImode.
1589 Relative to reg-reg move (2). */
1590 {4, 4, 4}, /* cost of storing integer registers */
1591 4, /* cost of reg,reg fld/fst */
1592 {12, 12, 12}, /* cost of loading fp registers
1593 in SFmode, DFmode and XFmode */
1594 {6, 6, 8}, /* cost of storing fp registers
1595 in SFmode, DFmode and XFmode */
1596 2, /* cost of moving MMX register */
1597 {8, 8}, /* cost of loading MMX registers
1598 in SImode and DImode */
1599 {8, 8}, /* cost of storing MMX registers
1600 in SImode and DImode */
1601 2, /* cost of moving SSE register */
1602 {8, 8, 8}, /* cost of loading SSE registers
1603 in SImode, DImode and TImode */
1604 {8, 8, 8}, /* cost of storing SSE registers
1605 in SImode, DImode and TImode */
1606 5, /* MMX or SSE register to integer */
1607 32, /* size of l1 cache. */
1608 256, /* size of l2 cache. */
1609 64, /* size of prefetch block */
1610 6, /* number of parallel prefetches */
1611 3, /* Branch cost */
1612 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1613 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1614 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1615 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1616 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1617 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1618 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1619 DUMMY_STRINGOP_ALGS},
1620 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1621 DUMMY_STRINGOP_ALGS},
1622 1, /* scalar_stmt_cost. */
1623 1, /* scalar load_cost. */
1624 1, /* scalar_store_cost. */
1625 1, /* vec_stmt_cost. */
1626 1, /* vec_to_scalar_cost. */
1627 1, /* scalar_to_vec_cost. */
1628 1, /* vec_align_load_cost. */
1629 2, /* vec_unalign_load_cost. */
1630 1, /* vec_store_cost. */
1631 3, /* cond_taken_branch_cost. */
1632 1, /* cond_not_taken_branch_cost. */
1635 const struct processor_costs *ix86_cost = &pentium_cost;
1637 /* Processor feature/optimization bitmasks. */
1638 #define m_386 (1<<PROCESSOR_I386)
1639 #define m_486 (1<<PROCESSOR_I486)
1640 #define m_PENT (1<<PROCESSOR_PENTIUM)
1641 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1642 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1643 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1644 #define m_CORE2 (1<<PROCESSOR_CORE2)
1645 #define m_ATOM (1<<PROCESSOR_ATOM)
1647 #define m_GEODE (1<<PROCESSOR_GEODE)
1648 #define m_K6 (1<<PROCESSOR_K6)
1649 #define m_K6_GEODE (m_K6 | m_GEODE)
1650 #define m_K8 (1<<PROCESSOR_K8)
1651 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1652 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1653 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1654 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1655 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1657 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1658 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1660 /* Generic instruction choice should be common subset of supported CPUs
1661 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1662 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1664 /* Feature tests against the various tunings. */
1665 unsigned char ix86_tune_features[X86_TUNE_LAST];
1667 /* Feature tests against the various tunings used to create ix86_tune_features
1668 based on the processor mask. */
1669 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1670 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1671 negatively, so enabling for Generic64 seems like good code size
1672 tradeoff. We can't enable it for 32bit generic because it does not
1673 work well with PPro base chips. */
1674 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1676 /* X86_TUNE_PUSH_MEMORY */
1677 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1678 | m_NOCONA | m_CORE2 | m_GENERIC,
1680 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1683 /* X86_TUNE_UNROLL_STRLEN */
1684 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1685 | m_CORE2 | m_GENERIC,
1687 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1688 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1690 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1691 on simulation result. But after P4 was made, no performance benefit
1692 was observed with branch hints. It also increases the code size.
1693 As a result, icc never generates branch hints. */
1696 /* X86_TUNE_DOUBLE_WITH_ADD */
1699 /* X86_TUNE_USE_SAHF */
1700 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1701 | m_NOCONA | m_CORE2 | m_GENERIC,
1703 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1704 partial dependencies. */
1705 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1706 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1708 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1709 register stalls on Generic32 compilation setting as well. However
1710 in current implementation the partial register stalls are not eliminated
1711 very well - they can be introduced via subregs synthesized by combine
1712 and can happen in caller/callee saving sequences. Because this option
1713 pays back little on PPro based chips and is in conflict with partial reg
1714 dependencies used by Athlon/P4 based chips, it is better to leave it off
1715 for generic32 for now. */
1718 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1719 m_CORE2 | m_GENERIC,
1721 /* X86_TUNE_USE_HIMODE_FIOP */
1722 m_386 | m_486 | m_K6_GEODE,
1724 /* X86_TUNE_USE_SIMODE_FIOP */
1725 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1727 /* X86_TUNE_USE_MOV0 */
1730 /* X86_TUNE_USE_CLTD */
1731 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1733 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1736 /* X86_TUNE_SPLIT_LONG_MOVES */
1739 /* X86_TUNE_READ_MODIFY_WRITE */
1742 /* X86_TUNE_READ_MODIFY */
1745 /* X86_TUNE_PROMOTE_QIMODE */
1746 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1747 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1749 /* X86_TUNE_FAST_PREFIX */
1750 ~(m_PENT | m_486 | m_386),
1752 /* X86_TUNE_SINGLE_STRINGOP */
1753 m_386 | m_PENT4 | m_NOCONA,
1755 /* X86_TUNE_QIMODE_MATH */
1758 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1759 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1760 might be considered for Generic32 if our scheme for avoiding partial
1761 stalls was more effective. */
1764 /* X86_TUNE_PROMOTE_QI_REGS */
1767 /* X86_TUNE_PROMOTE_HI_REGS */
1770 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1771 over esp addition. */
1772 m_386 | m_486 | m_PENT | m_PPRO,
1774 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1775 over esp addition. */
1778 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1779 over esp subtraction. */
1780 m_386 | m_486 | m_PENT | m_K6_GEODE,
1782 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1783 over esp subtraction. */
1784 m_PENT | m_K6_GEODE,
1786 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1787 for DFmode copies */
1788 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1789 | m_GENERIC | m_GEODE),
1791 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1792 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1794 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1795 conflict here in between PPro/Pentium4 based chips that thread 128bit
1796 SSE registers as single units versus K8 based chips that divide SSE
1797 registers to two 64bit halves. This knob promotes all store destinations
1798 to be 128bit to allow register renaming on 128bit SSE units, but usually
1799 results in one extra microop on 64bit SSE units. Experimental results
1800 shows that disabling this option on P4 brings over 20% SPECfp regression,
1801 while enabling it on K8 brings roughly 2.4% regression that can be partly
1802 masked by careful scheduling of moves. */
1803 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1804 | m_AMDFAM10 | m_BDVER1,
1806 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1807 m_AMDFAM10 | m_BDVER1,
1809 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1812 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1815 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1816 are resolved on SSE register parts instead of whole registers, so we may
1817 maintain just lower part of scalar values in proper format leaving the
1818 upper part undefined. */
1821 /* X86_TUNE_SSE_TYPELESS_STORES */
1824 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1825 m_PPRO | m_PENT4 | m_NOCONA,
1827 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1828 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1830 /* X86_TUNE_PROLOGUE_USING_MOVE */
1831 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1833 /* X86_TUNE_EPILOGUE_USING_MOVE */
1834 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1836 /* X86_TUNE_SHIFT1 */
1839 /* X86_TUNE_USE_FFREEP */
1842 /* X86_TUNE_INTER_UNIT_MOVES */
1843 ~(m_AMD_MULTIPLE | m_GENERIC),
1845 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1846 ~(m_AMDFAM10 | m_BDVER1),
1848 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1849 than 4 branch instructions in the 16 byte window. */
1850 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1853 /* X86_TUNE_SCHEDULE */
1854 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1857 /* X86_TUNE_USE_BT */
1858 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1860 /* X86_TUNE_USE_INCDEC */
1861 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1863 /* X86_TUNE_PAD_RETURNS */
1864 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1866 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1869 /* X86_TUNE_EXT_80387_CONSTANTS */
1870 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1871 | m_CORE2 | m_GENERIC,
1873 /* X86_TUNE_SHORTEN_X87_SSE */
1876 /* X86_TUNE_AVOID_VECTOR_DECODE */
1879 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1880 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1883 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1884 vector path on AMD machines. */
1885 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1887 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1889 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1891 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1895 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1896 but one byte longer. */
1899 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1900 operand that cannot be represented using a modRM byte. The XOR
1901 replacement is long decoded, so this split helps here as well. */
1904 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1906 m_AMDFAM10 | m_GENERIC,
1908 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1909 from integer to FP. */
1912 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1913 with a subsequent conditional jump instruction into a single
1914 compare-and-branch uop. */
1917 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1918 will impact LEA instruction selection. */
1921 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1926 /* Feature tests against the various architecture variations. */
1927 unsigned char ix86_arch_features[X86_ARCH_LAST];
1929 /* Feature tests against the various architecture variations, used to create
1930 ix86_arch_features based on the processor mask. */
1931 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1932 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1933 ~(m_386 | m_486 | m_PENT | m_K6),
1935 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1938 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1941 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1944 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1948 static const unsigned int x86_accumulate_outgoing_args
1949 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1952 static const unsigned int x86_arch_always_fancy_math_387
1953 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1954 | m_NOCONA | m_CORE2 | m_GENERIC;
1956 static enum stringop_alg stringop_alg = no_stringop;
1958 /* In case the average insn count for single function invocation is
1959 lower than this constant, emit fast (but longer) prologue and
1961 #define FAST_PROLOGUE_INSN_COUNT 20
1963 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1964 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1965 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1966 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1968 /* Array of the smallest class containing reg number REGNO, indexed by
1969 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1971 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1973 /* ax, dx, cx, bx */
1974 AREG, DREG, CREG, BREG,
1975 /* si, di, bp, sp */
1976 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1978 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1979 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1982 /* flags, fpsr, fpcr, frame */
1983 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1985 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1988 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1991 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1992 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1993 /* SSE REX registers */
1994 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1998 /* The "default" register map used in 32bit mode. */
2000 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2002 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2003 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2004 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2005 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2006 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2007 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2008 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2011 /* The "default" register map used in 64bit mode. */
2013 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2015 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2016 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2017 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2018 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2019 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2020 8,9,10,11,12,13,14,15, /* extended integer registers */
2021 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2024 /* Define the register numbers to be used in Dwarf debugging information.
2025 The SVR4 reference port C compiler uses the following register numbers
2026 in its Dwarf output code:
2027 0 for %eax (gcc regno = 0)
2028 1 for %ecx (gcc regno = 2)
2029 2 for %edx (gcc regno = 1)
2030 3 for %ebx (gcc regno = 3)
2031 4 for %esp (gcc regno = 7)
2032 5 for %ebp (gcc regno = 6)
2033 6 for %esi (gcc regno = 4)
2034 7 for %edi (gcc regno = 5)
2035 The following three DWARF register numbers are never generated by
2036 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2037 believes these numbers have these meanings.
2038 8 for %eip (no gcc equivalent)
2039 9 for %eflags (gcc regno = 17)
2040 10 for %trapno (no gcc equivalent)
2041 It is not at all clear how we should number the FP stack registers
2042 for the x86 architecture. If the version of SDB on x86/svr4 were
2043 a bit less brain dead with respect to floating-point then we would
2044 have a precedent to follow with respect to DWARF register numbers
2045 for x86 FP registers, but the SDB on x86/svr4 is so completely
2046 broken with respect to FP registers that it is hardly worth thinking
2047 of it as something to strive for compatibility with.
2048 The version of x86/svr4 SDB I have at the moment does (partially)
2049 seem to believe that DWARF register number 11 is associated with
2050 the x86 register %st(0), but that's about all. Higher DWARF
2051 register numbers don't seem to be associated with anything in
2052 particular, and even for DWARF regno 11, SDB only seems to under-
2053 stand that it should say that a variable lives in %st(0) (when
2054 asked via an `=' command) if we said it was in DWARF regno 11,
2055 but SDB still prints garbage when asked for the value of the
2056 variable in question (via a `/' command).
2057 (Also note that the labels SDB prints for various FP stack regs
2058 when doing an `x' command are all wrong.)
2059 Note that these problems generally don't affect the native SVR4
2060 C compiler because it doesn't allow the use of -O with -g and
2061 because when it is *not* optimizing, it allocates a memory
2062 location for each floating-point variable, and the memory
2063 location is what gets described in the DWARF AT_location
2064 attribute for the variable in question.
2065 Regardless of the severe mental illness of the x86/svr4 SDB, we
2066 do something sensible here and we use the following DWARF
2067 register numbers. Note that these are all stack-top-relative
2069 11 for %st(0) (gcc regno = 8)
2070 12 for %st(1) (gcc regno = 9)
2071 13 for %st(2) (gcc regno = 10)
2072 14 for %st(3) (gcc regno = 11)
2073 15 for %st(4) (gcc regno = 12)
2074 16 for %st(5) (gcc regno = 13)
2075 17 for %st(6) (gcc regno = 14)
2076 18 for %st(7) (gcc regno = 15)
2078 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2080 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2081 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2082 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2083 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2084 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2085 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2086 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2089 /* Define parameter passing and return registers. */
2091 static int const x86_64_int_parameter_registers[6] =
2093 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2096 static int const x86_64_ms_abi_int_parameter_registers[4] =
2098 CX_REG, DX_REG, R8_REG, R9_REG
2101 static int const x86_64_int_return_registers[4] =
2103 AX_REG, DX_REG, DI_REG, SI_REG
2106 /* Define the structure for the machine field in struct function. */
2108 struct GTY(()) stack_local_entry {
2109 unsigned short mode;
2112 struct stack_local_entry *next;
2115 /* Structure describing stack frame layout.
2116 Stack grows downward:
2122 saved static chain if ix86_static_chain_on_stack
2124 saved frame pointer if frame_pointer_needed
2125 <- HARD_FRAME_POINTER
2131 <- sse_regs_save_offset
2134 [va_arg registers] |
2138 [padding2] | = to_allocate
2147 int outgoing_arguments_size;
2148 HOST_WIDE_INT frame;
2150 /* The offsets relative to ARG_POINTER. */
2151 HOST_WIDE_INT frame_pointer_offset;
2152 HOST_WIDE_INT hard_frame_pointer_offset;
2153 HOST_WIDE_INT stack_pointer_offset;
2154 HOST_WIDE_INT reg_save_offset;
2155 HOST_WIDE_INT sse_reg_save_offset;
2157 /* When save_regs_using_mov is set, emit prologue using
2158 move instead of push instructions. */
2159 bool save_regs_using_mov;
2162 /* Code model option. */
2163 enum cmodel ix86_cmodel;
2165 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2167 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2169 /* Which unit we are generating floating point math for. */
2170 enum fpmath_unit ix86_fpmath;
2172 /* Which cpu are we scheduling for. */
2173 enum attr_cpu ix86_schedule;
2175 /* Which cpu are we optimizing for. */
2176 enum processor_type ix86_tune;
2178 /* Which instruction set architecture to use. */
2179 enum processor_type ix86_arch;
2181 /* true if sse prefetch instruction is not NOOP. */
2182 int x86_prefetch_sse;
2184 /* ix86_regparm_string as a number */
2185 static int ix86_regparm;
2187 /* -mstackrealign option */
2188 static const char ix86_force_align_arg_pointer_string[]
2189 = "force_align_arg_pointer";
2191 static rtx (*ix86_gen_leave) (void);
2192 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2193 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2194 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2195 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2196 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2197 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2198 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2199 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2200 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2202 /* Preferred alignment for stack boundary in bits. */
2203 unsigned int ix86_preferred_stack_boundary;
2205 /* Alignment for incoming stack boundary in bits specified at
2207 static unsigned int ix86_user_incoming_stack_boundary;
2209 /* Default alignment for incoming stack boundary in bits. */
2210 static unsigned int ix86_default_incoming_stack_boundary;
2212 /* Alignment for incoming stack boundary in bits. */
2213 unsigned int ix86_incoming_stack_boundary;
2215 /* The abi used by target. */
2216 enum calling_abi ix86_abi;
2218 /* Values 1-5: see jump.c */
2219 int ix86_branch_cost;
2221 /* Calling abi specific va_list type nodes. */
2222 static GTY(()) tree sysv_va_list_type_node;
2223 static GTY(()) tree ms_va_list_type_node;
2225 /* Variables which are this size or smaller are put in the data/bss
2226 or ldata/lbss sections. */
2228 int ix86_section_threshold = 65536;
2230 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2231 char internal_label_prefix[16];
2232 int internal_label_prefix_len;
2234 /* Fence to use after loop using movnt. */
2237 /* Register class used for passing given 64bit part of the argument.
2238 These represent classes as documented by the PS ABI, with the exception
2239 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2240 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2242 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2243 whenever possible (upper half does contain padding). */
2244 enum x86_64_reg_class
2247 X86_64_INTEGER_CLASS,
2248 X86_64_INTEGERSI_CLASS,
2255 X86_64_COMPLEX_X87_CLASS,
2259 #define MAX_CLASSES 4
2261 /* Table of constants used by fldpi, fldln2, etc.... */
2262 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2263 static bool ext_80387_constants_init = 0;
2266 static struct machine_function * ix86_init_machine_status (void);
2267 static rtx ix86_function_value (const_tree, const_tree, bool);
2268 static bool ix86_function_value_regno_p (const unsigned int);
2269 static rtx ix86_static_chain (const_tree, bool);
2270 static int ix86_function_regparm (const_tree, const_tree);
2271 static void ix86_compute_frame_layout (struct ix86_frame *);
2272 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2274 static void ix86_add_new_builtins (int);
2275 static rtx ix86_expand_vec_perm_builtin (tree);
2276 static tree ix86_canonical_va_list_type (tree);
2277 static void predict_jump (int);
2278 static unsigned int split_stack_prologue_scratch_regno (void);
2279 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2281 enum ix86_function_specific_strings
2283 IX86_FUNCTION_SPECIFIC_ARCH,
2284 IX86_FUNCTION_SPECIFIC_TUNE,
2285 IX86_FUNCTION_SPECIFIC_FPMATH,
2286 IX86_FUNCTION_SPECIFIC_MAX
2289 static char *ix86_target_string (int, int, const char *, const char *,
2290 const char *, bool);
2291 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2292 static void ix86_function_specific_save (struct cl_target_option *);
2293 static void ix86_function_specific_restore (struct cl_target_option *);
2294 static void ix86_function_specific_print (FILE *, int,
2295 struct cl_target_option *);
2296 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2297 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2298 static bool ix86_can_inline_p (tree, tree);
2299 static void ix86_set_current_function (tree);
2300 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2302 static enum calling_abi ix86_function_abi (const_tree);
2305 #ifndef SUBTARGET32_DEFAULT_CPU
2306 #define SUBTARGET32_DEFAULT_CPU "i386"
2309 /* The svr4 ABI for the i386 says that records and unions are returned
2311 #ifndef DEFAULT_PCC_STRUCT_RETURN
2312 #define DEFAULT_PCC_STRUCT_RETURN 1
2315 /* Whether -mtune= or -march= were specified */
2316 static int ix86_tune_defaulted;
2317 static int ix86_arch_specified;
2319 /* A mask of ix86_isa_flags that includes bit X if X
2320 was set or cleared on the command line. */
2321 static int ix86_isa_flags_explicit;
2323 /* Define a set of ISAs which are available when a given ISA is
2324 enabled. MMX and SSE ISAs are handled separately. */
2326 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2327 #define OPTION_MASK_ISA_3DNOW_SET \
2328 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2330 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2331 #define OPTION_MASK_ISA_SSE2_SET \
2332 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2333 #define OPTION_MASK_ISA_SSE3_SET \
2334 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2335 #define OPTION_MASK_ISA_SSSE3_SET \
2336 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2337 #define OPTION_MASK_ISA_SSE4_1_SET \
2338 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2339 #define OPTION_MASK_ISA_SSE4_2_SET \
2340 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2341 #define OPTION_MASK_ISA_AVX_SET \
2342 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2343 #define OPTION_MASK_ISA_FMA_SET \
2344 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2346 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2348 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2350 #define OPTION_MASK_ISA_SSE4A_SET \
2351 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2352 #define OPTION_MASK_ISA_FMA4_SET \
2353 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2354 | OPTION_MASK_ISA_AVX_SET)
2355 #define OPTION_MASK_ISA_XOP_SET \
2356 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2357 #define OPTION_MASK_ISA_LWP_SET \
2360 /* AES and PCLMUL need SSE2 because they use xmm registers */
2361 #define OPTION_MASK_ISA_AES_SET \
2362 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2363 #define OPTION_MASK_ISA_PCLMUL_SET \
2364 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2366 #define OPTION_MASK_ISA_ABM_SET \
2367 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2369 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2370 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2371 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2372 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2373 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2375 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2376 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2377 #define OPTION_MASK_ISA_F16C_SET \
2378 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2380 /* Define a set of ISAs which aren't available when a given ISA is
2381 disabled. MMX and SSE ISAs are handled separately. */
2383 #define OPTION_MASK_ISA_MMX_UNSET \
2384 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2385 #define OPTION_MASK_ISA_3DNOW_UNSET \
2386 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2387 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2389 #define OPTION_MASK_ISA_SSE_UNSET \
2390 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2391 #define OPTION_MASK_ISA_SSE2_UNSET \
2392 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2393 #define OPTION_MASK_ISA_SSE3_UNSET \
2394 (OPTION_MASK_ISA_SSE3 \
2395 | OPTION_MASK_ISA_SSSE3_UNSET \
2396 | OPTION_MASK_ISA_SSE4A_UNSET )
2397 #define OPTION_MASK_ISA_SSSE3_UNSET \
2398 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2399 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2400 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2401 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2402 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2403 #define OPTION_MASK_ISA_AVX_UNSET \
2404 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2405 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2406 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2408 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2410 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2412 #define OPTION_MASK_ISA_SSE4A_UNSET \
2413 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2415 #define OPTION_MASK_ISA_FMA4_UNSET \
2416 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2417 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2418 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2420 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2421 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2422 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2423 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2424 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2425 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2426 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2427 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2429 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2430 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2431 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2433 /* Vectorization library interface and handlers. */
2434 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2436 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2437 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2439 /* Processor target table, indexed by processor number */
2442 const struct processor_costs *cost; /* Processor costs */
2443 const int align_loop; /* Default alignments. */
2444 const int align_loop_max_skip;
2445 const int align_jump;
2446 const int align_jump_max_skip;
2447 const int align_func;
2450 static const struct ptt processor_target_table[PROCESSOR_max] =
2452 {&i386_cost, 4, 3, 4, 3, 4},
2453 {&i486_cost, 16, 15, 16, 15, 16},
2454 {&pentium_cost, 16, 7, 16, 7, 16},
2455 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2456 {&geode_cost, 0, 0, 0, 0, 0},
2457 {&k6_cost, 32, 7, 32, 7, 32},
2458 {&athlon_cost, 16, 7, 16, 7, 16},
2459 {&pentium4_cost, 0, 0, 0, 0, 0},
2460 {&k8_cost, 16, 7, 16, 7, 16},
2461 {&nocona_cost, 0, 0, 0, 0, 0},
2462 {&core2_cost, 16, 10, 16, 10, 16},
2463 {&generic32_cost, 16, 7, 16, 7, 16},
2464 {&generic64_cost, 16, 10, 16, 10, 16},
2465 {&amdfam10_cost, 32, 24, 32, 7, 32},
2466 {&bdver1_cost, 32, 24, 32, 7, 32},
2467 {&atom_cost, 16, 7, 16, 7, 16}
2470 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2497 /* Return true if a red-zone is in use. */
2500 ix86_using_red_zone (void)
2502 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2505 /* Implement TARGET_HANDLE_OPTION. */
2508 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2515 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2516 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2520 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2521 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2528 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2529 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2533 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2534 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2544 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2545 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2549 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2550 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2557 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2558 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2562 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2563 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2570 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2571 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2575 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2576 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2583 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2584 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2588 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2589 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2596 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2601 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2602 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2609 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2614 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2615 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2622 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2627 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2628 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2635 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2640 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2641 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2646 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2647 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2651 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2652 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2658 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2659 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2663 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2664 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2671 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2672 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2676 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2677 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2684 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2685 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2689 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2690 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2697 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2698 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2702 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2703 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2710 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2711 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2715 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2716 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2723 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2724 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2728 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2729 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2736 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2737 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2741 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2742 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2749 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2750 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2754 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2755 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2762 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2763 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2767 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2768 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2775 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2776 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2780 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2781 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2788 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2789 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2793 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2794 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2801 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2802 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2806 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2807 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2814 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2815 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2819 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2820 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2827 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2828 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2832 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2833 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2840 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2841 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2845 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2846 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2855 /* Return a string that documents the current -m options. The caller is
2856 responsible for freeing the string. */
2859 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2860 const char *fpmath, bool add_nl_p)
2862 struct ix86_target_opts
2864 const char *option; /* option string */
2865 int mask; /* isa mask options */
2868 /* This table is ordered so that options like -msse4.2 that imply
2869 preceding options while match those first. */
2870 static struct ix86_target_opts isa_opts[] =
2872 { "-m64", OPTION_MASK_ISA_64BIT },
2873 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2874 { "-mfma", OPTION_MASK_ISA_FMA },
2875 { "-mxop", OPTION_MASK_ISA_XOP },
2876 { "-mlwp", OPTION_MASK_ISA_LWP },
2877 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2878 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2879 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2880 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2881 { "-msse3", OPTION_MASK_ISA_SSE3 },
2882 { "-msse2", OPTION_MASK_ISA_SSE2 },
2883 { "-msse", OPTION_MASK_ISA_SSE },
2884 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2885 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2886 { "-mmmx", OPTION_MASK_ISA_MMX },
2887 { "-mabm", OPTION_MASK_ISA_ABM },
2888 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2889 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2890 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2891 { "-maes", OPTION_MASK_ISA_AES },
2892 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2893 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2894 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2895 { "-mf16c", OPTION_MASK_ISA_F16C },
2899 static struct ix86_target_opts flag_opts[] =
2901 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2902 { "-m80387", MASK_80387 },
2903 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2904 { "-malign-double", MASK_ALIGN_DOUBLE },
2905 { "-mcld", MASK_CLD },
2906 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2907 { "-mieee-fp", MASK_IEEE_FP },
2908 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2909 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2910 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2911 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2912 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2913 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2914 { "-mno-red-zone", MASK_NO_RED_ZONE },
2915 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2916 { "-mrecip", MASK_RECIP },
2917 { "-mrtd", MASK_RTD },
2918 { "-msseregparm", MASK_SSEREGPARM },
2919 { "-mstack-arg-probe", MASK_STACK_PROBE },
2920 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2921 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2922 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2923 { "-mvzeroupper", MASK_VZEROUPPER },
2926 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2929 char target_other[40];
2938 memset (opts, '\0', sizeof (opts));
2940 /* Add -march= option. */
2943 opts[num][0] = "-march=";
2944 opts[num++][1] = arch;
2947 /* Add -mtune= option. */
2950 opts[num][0] = "-mtune=";
2951 opts[num++][1] = tune;
2954 /* Pick out the options in isa options. */
2955 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2957 if ((isa & isa_opts[i].mask) != 0)
2959 opts[num++][0] = isa_opts[i].option;
2960 isa &= ~ isa_opts[i].mask;
2964 if (isa && add_nl_p)
2966 opts[num++][0] = isa_other;
2967 sprintf (isa_other, "(other isa: %#x)", isa);
2970 /* Add flag options. */
2971 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2973 if ((flags & flag_opts[i].mask) != 0)
2975 opts[num++][0] = flag_opts[i].option;
2976 flags &= ~ flag_opts[i].mask;
2980 if (flags && add_nl_p)
2982 opts[num++][0] = target_other;
2983 sprintf (target_other, "(other flags: %#x)", flags);
2986 /* Add -fpmath= option. */
2989 opts[num][0] = "-mfpmath=";
2990 opts[num++][1] = fpmath;
2997 gcc_assert (num < ARRAY_SIZE (opts));
2999 /* Size the string. */
3001 sep_len = (add_nl_p) ? 3 : 1;
3002 for (i = 0; i < num; i++)
3005 for (j = 0; j < 2; j++)
3007 len += strlen (opts[i][j]);
3010 /* Build the string. */
3011 ret = ptr = (char *) xmalloc (len);
3014 for (i = 0; i < num; i++)
3018 for (j = 0; j < 2; j++)
3019 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3026 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3034 for (j = 0; j < 2; j++)
3037 memcpy (ptr, opts[i][j], len2[j]);
3039 line_len += len2[j];
3044 gcc_assert (ret + len >= ptr);
3049 /* Return TRUE if software prefetching is beneficial for the
3053 software_prefetching_beneficial_p (void)
3057 case PROCESSOR_GEODE:
3059 case PROCESSOR_ATHLON:
3061 case PROCESSOR_AMDFAM10:
3069 /* Return true, if profiling code should be emitted before
3070 prologue. Otherwise it returns false.
3071 Note: For x86 with "hotfix" it is sorried. */
3073 ix86_profile_before_prologue (void)
3075 return flag_fentry != 0;
3078 /* Function that is callable from the debugger to print the current
3081 ix86_debug_options (void)
3083 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3084 ix86_arch_string, ix86_tune_string,
3085 ix86_fpmath_string, true);
3089 fprintf (stderr, "%s\n\n", opts);
3093 fputs ("<no options>\n\n", stderr);
3098 /* Override various settings based on options. If MAIN_ARGS_P, the
3099 options are from the command line, otherwise they are from
3103 ix86_option_override_internal (bool main_args_p)
3106 unsigned int ix86_arch_mask, ix86_tune_mask;
3107 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3112 /* Comes from final.c -- no real reason to change it. */
3113 #define MAX_CODE_ALIGN 16
3121 PTA_PREFETCH_SSE = 1 << 4,
3123 PTA_3DNOW_A = 1 << 6,
3127 PTA_POPCNT = 1 << 10,
3129 PTA_SSE4A = 1 << 12,
3130 PTA_NO_SAHF = 1 << 13,
3131 PTA_SSE4_1 = 1 << 14,
3132 PTA_SSE4_2 = 1 << 15,
3134 PTA_PCLMUL = 1 << 17,
3137 PTA_MOVBE = 1 << 20,
3141 PTA_FSGSBASE = 1 << 24,
3142 PTA_RDRND = 1 << 25,
3148 const char *const name; /* processor name or nickname. */
3149 const enum processor_type processor;
3150 const enum attr_cpu schedule;
3151 const unsigned /*enum pta_flags*/ flags;
3153 const processor_alias_table[] =
3155 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3156 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3157 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3158 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3159 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3160 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3161 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3162 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3163 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3164 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3165 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3166 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3167 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3169 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3171 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3172 PTA_MMX | PTA_SSE | PTA_SSE2},
3173 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3174 PTA_MMX |PTA_SSE | PTA_SSE2},
3175 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3176 PTA_MMX | PTA_SSE | PTA_SSE2},
3177 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3178 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3179 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3180 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3181 | PTA_CX16 | PTA_NO_SAHF},
3182 {"core2", PROCESSOR_CORE2, CPU_CORE2,
3183 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3184 | PTA_SSSE3 | PTA_CX16},
3185 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3186 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3187 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3188 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3189 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3190 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3191 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3192 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3193 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3194 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3195 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3196 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3197 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3198 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3199 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3200 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3201 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3202 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3203 {"x86-64", PROCESSOR_K8, CPU_K8,
3204 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3205 {"k8", PROCESSOR_K8, CPU_K8,
3206 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3207 | PTA_SSE2 | PTA_NO_SAHF},
3208 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3209 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3210 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3211 {"opteron", PROCESSOR_K8, CPU_K8,
3212 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3213 | PTA_SSE2 | PTA_NO_SAHF},
3214 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3215 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3216 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3217 {"athlon64", PROCESSOR_K8, CPU_K8,
3218 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3219 | PTA_SSE2 | PTA_NO_SAHF},
3220 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3221 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3222 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3223 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3224 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3225 | PTA_SSE2 | PTA_NO_SAHF},
3226 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3227 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3228 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3229 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3232 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
3235 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
3236 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
3237 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3238 0 /* flags are only used for -march switch. */ },
3239 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3240 PTA_64BIT /* flags are only used for -march switch. */ },
3243 int const pta_size = ARRAY_SIZE (processor_alias_table);
3245 /* Set up prefix/suffix so the error messages refer to either the command
3246 line argument, or the attribute(target). */
3255 prefix = "option(\"";
3260 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3261 SUBTARGET_OVERRIDE_OPTIONS;
3264 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3265 SUBSUBTARGET_OVERRIDE_OPTIONS;
3268 /* -fPIC is the default for x86_64. */
3269 if (TARGET_MACHO && TARGET_64BIT)
3272 /* Need to check -mtune=generic first. */
3273 if (ix86_tune_string)
3275 if (!strcmp (ix86_tune_string, "generic")
3276 || !strcmp (ix86_tune_string, "i686")
3277 /* As special support for cross compilers we read -mtune=native
3278 as -mtune=generic. With native compilers we won't see the
3279 -mtune=native, as it was changed by the driver. */
3280 || !strcmp (ix86_tune_string, "native"))
3283 ix86_tune_string = "generic64";
3285 ix86_tune_string = "generic32";
3287 /* If this call is for setting the option attribute, allow the
3288 generic32/generic64 that was previously set. */
3289 else if (!main_args_p
3290 && (!strcmp (ix86_tune_string, "generic32")
3291 || !strcmp (ix86_tune_string, "generic64")))
3293 else if (!strncmp (ix86_tune_string, "generic", 7))
3294 error ("bad value (%s) for %stune=%s %s",
3295 ix86_tune_string, prefix, suffix, sw);
3296 else if (!strcmp (ix86_tune_string, "x86-64"))
3297 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3298 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3299 prefix, suffix, prefix, suffix, prefix, suffix);
3303 if (ix86_arch_string)
3304 ix86_tune_string = ix86_arch_string;
3305 if (!ix86_tune_string)
3307 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3308 ix86_tune_defaulted = 1;
3311 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3312 need to use a sensible tune option. */
3313 if (!strcmp (ix86_tune_string, "generic")
3314 || !strcmp (ix86_tune_string, "x86-64")
3315 || !strcmp (ix86_tune_string, "i686"))
3318 ix86_tune_string = "generic64";
3320 ix86_tune_string = "generic32";
3324 if (ix86_stringop_string)
3326 if (!strcmp (ix86_stringop_string, "rep_byte"))
3327 stringop_alg = rep_prefix_1_byte;
3328 else if (!strcmp (ix86_stringop_string, "libcall"))
3329 stringop_alg = libcall;
3330 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3331 stringop_alg = rep_prefix_4_byte;
3332 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3334 /* rep; movq isn't available in 32-bit code. */
3335 stringop_alg = rep_prefix_8_byte;
3336 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3337 stringop_alg = loop_1_byte;
3338 else if (!strcmp (ix86_stringop_string, "loop"))
3339 stringop_alg = loop;
3340 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3341 stringop_alg = unrolled_loop;
3343 error ("bad value (%s) for %sstringop-strategy=%s %s",
3344 ix86_stringop_string, prefix, suffix, sw);
3347 if (!ix86_arch_string)
3348 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3350 ix86_arch_specified = 1;
3352 /* Validate -mabi= value. */
3353 if (ix86_abi_string)
3355 if (strcmp (ix86_abi_string, "sysv") == 0)
3356 ix86_abi = SYSV_ABI;
3357 else if (strcmp (ix86_abi_string, "ms") == 0)
3360 error ("unknown ABI (%s) for %sabi=%s %s",
3361 ix86_abi_string, prefix, suffix, sw);
3364 ix86_abi = DEFAULT_ABI;
3366 if (ix86_cmodel_string != 0)
3368 if (!strcmp (ix86_cmodel_string, "small"))
3369 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3370 else if (!strcmp (ix86_cmodel_string, "medium"))
3371 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3372 else if (!strcmp (ix86_cmodel_string, "large"))
3373 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3375 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3376 else if (!strcmp (ix86_cmodel_string, "32"))
3377 ix86_cmodel = CM_32;
3378 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3379 ix86_cmodel = CM_KERNEL;
3381 error ("bad value (%s) for %scmodel=%s %s",
3382 ix86_cmodel_string, prefix, suffix, sw);
3386 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3387 use of rip-relative addressing. This eliminates fixups that
3388 would otherwise be needed if this object is to be placed in a
3389 DLL, and is essentially just as efficient as direct addressing. */
3390 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3391 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3392 else if (TARGET_64BIT)
3393 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3395 ix86_cmodel = CM_32;
3397 if (ix86_asm_string != 0)
3400 && !strcmp (ix86_asm_string, "intel"))
3401 ix86_asm_dialect = ASM_INTEL;
3402 else if (!strcmp (ix86_asm_string, "att"))
3403 ix86_asm_dialect = ASM_ATT;
3405 error ("bad value (%s) for %sasm=%s %s",
3406 ix86_asm_string, prefix, suffix, sw);
3408 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3409 error ("code model %qs not supported in the %s bit mode",
3410 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3411 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3412 sorry ("%i-bit mode not compiled in",
3413 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3415 for (i = 0; i < pta_size; i++)
3416 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3418 ix86_schedule = processor_alias_table[i].schedule;
3419 ix86_arch = processor_alias_table[i].processor;
3420 /* Default cpu tuning to the architecture. */
3421 ix86_tune = ix86_arch;
3423 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3424 error ("CPU you selected does not support x86-64 "
3427 if (processor_alias_table[i].flags & PTA_MMX
3428 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3429 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3430 if (processor_alias_table[i].flags & PTA_3DNOW
3431 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3432 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3433 if (processor_alias_table[i].flags & PTA_3DNOW_A
3434 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3435 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3436 if (processor_alias_table[i].flags & PTA_SSE
3437 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3438 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3439 if (processor_alias_table[i].flags & PTA_SSE2
3440 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3441 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3442 if (processor_alias_table[i].flags & PTA_SSE3
3443 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3444 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3445 if (processor_alias_table[i].flags & PTA_SSSE3
3446 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3447 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3448 if (processor_alias_table[i].flags & PTA_SSE4_1
3449 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3450 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3451 if (processor_alias_table[i].flags & PTA_SSE4_2
3452 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3453 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3454 if (processor_alias_table[i].flags & PTA_AVX
3455 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3456 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3457 if (processor_alias_table[i].flags & PTA_FMA
3458 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3459 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3460 if (processor_alias_table[i].flags & PTA_SSE4A
3461 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3462 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3463 if (processor_alias_table[i].flags & PTA_FMA4
3464 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3465 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3466 if (processor_alias_table[i].flags & PTA_XOP
3467 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3468 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3469 if (processor_alias_table[i].flags & PTA_LWP
3470 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3471 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3472 if (processor_alias_table[i].flags & PTA_ABM
3473 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3474 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3475 if (processor_alias_table[i].flags & PTA_CX16
3476 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3477 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3478 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3479 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3480 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3481 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3482 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3483 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3484 if (processor_alias_table[i].flags & PTA_MOVBE
3485 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3486 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3487 if (processor_alias_table[i].flags & PTA_AES
3488 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3489 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3490 if (processor_alias_table[i].flags & PTA_PCLMUL
3491 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3492 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3493 if (processor_alias_table[i].flags & PTA_FSGSBASE
3494 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3495 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3496 if (processor_alias_table[i].flags & PTA_RDRND
3497 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3498 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3499 if (processor_alias_table[i].flags & PTA_F16C
3500 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3501 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3502 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3503 x86_prefetch_sse = true;
3508 if (!strcmp (ix86_arch_string, "generic"))
3509 error ("generic CPU can be used only for %stune=%s %s",
3510 prefix, suffix, sw);
3511 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3512 error ("bad value (%s) for %sarch=%s %s",
3513 ix86_arch_string, prefix, suffix, sw);
3515 ix86_arch_mask = 1u << ix86_arch;
3516 for (i = 0; i < X86_ARCH_LAST; ++i)
3517 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3519 for (i = 0; i < pta_size; i++)
3520 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3522 ix86_schedule = processor_alias_table[i].schedule;
3523 ix86_tune = processor_alias_table[i].processor;
3524 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3526 if (ix86_tune_defaulted)
3528 ix86_tune_string = "x86-64";
3529 for (i = 0; i < pta_size; i++)
3530 if (! strcmp (ix86_tune_string,
3531 processor_alias_table[i].name))
3533 ix86_schedule = processor_alias_table[i].schedule;
3534 ix86_tune = processor_alias_table[i].processor;
3537 error ("CPU you selected does not support x86-64 "
3540 /* Intel CPUs have always interpreted SSE prefetch instructions as
3541 NOPs; so, we can enable SSE prefetch instructions even when
3542 -mtune (rather than -march) points us to a processor that has them.
3543 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3544 higher processors. */
3546 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3547 x86_prefetch_sse = true;
3551 if (ix86_tune_specified && i == pta_size)
3552 error ("bad value (%s) for %stune=%s %s",
3553 ix86_tune_string, prefix, suffix, sw);
3555 ix86_tune_mask = 1u << ix86_tune;
3556 for (i = 0; i < X86_TUNE_LAST; ++i)
3557 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3559 #ifndef USE_IX86_FRAME_POINTER
3560 #define USE_IX86_FRAME_POINTER 0
3563 #ifndef USE_X86_64_FRAME_POINTER
3564 #define USE_X86_64_FRAME_POINTER 0
3567 /* Set the default values for switches whose default depends on TARGET_64BIT
3568 in case they weren't overwritten by command line options. */
3571 if (optimize > 1 && !global_options_set.x_flag_zee)
3573 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3574 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3575 if (flag_asynchronous_unwind_tables == 2)
3576 flag_asynchronous_unwind_tables = 1;
3577 if (flag_pcc_struct_return == 2)
3578 flag_pcc_struct_return = 0;
3582 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3583 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3584 if (flag_asynchronous_unwind_tables == 2)
3585 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3586 if (flag_pcc_struct_return == 2)
3587 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3591 ix86_cost = &ix86_size_cost;
3593 ix86_cost = processor_target_table[ix86_tune].cost;
3595 /* Arrange to set up i386_stack_locals for all functions. */
3596 init_machine_status = ix86_init_machine_status;
3598 /* Validate -mregparm= value. */
3599 if (ix86_regparm_string)
3602 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3603 i = atoi (ix86_regparm_string);
3604 if (i < 0 || i > REGPARM_MAX)
3605 error ("%sregparm=%d%s is not between 0 and %d",
3606 prefix, i, suffix, REGPARM_MAX);
3611 ix86_regparm = REGPARM_MAX;
3613 /* If the user has provided any of the -malign-* options,
3614 warn and use that value only if -falign-* is not set.
3615 Remove this code in GCC 3.2 or later. */
3616 if (ix86_align_loops_string)
3618 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3619 prefix, suffix, suffix);
3620 if (align_loops == 0)
3622 i = atoi (ix86_align_loops_string);
3623 if (i < 0 || i > MAX_CODE_ALIGN)
3624 error ("%salign-loops=%d%s is not between 0 and %d",
3625 prefix, i, suffix, MAX_CODE_ALIGN);
3627 align_loops = 1 << i;
3631 if (ix86_align_jumps_string)
3633 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3634 prefix, suffix, suffix);
3635 if (align_jumps == 0)
3637 i = atoi (ix86_align_jumps_string);
3638 if (i < 0 || i > MAX_CODE_ALIGN)
3639 error ("%salign-loops=%d%s is not between 0 and %d",
3640 prefix, i, suffix, MAX_CODE_ALIGN);
3642 align_jumps = 1 << i;
3646 if (ix86_align_funcs_string)
3648 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3649 prefix, suffix, suffix);
3650 if (align_functions == 0)
3652 i = atoi (ix86_align_funcs_string);
3653 if (i < 0 || i > MAX_CODE_ALIGN)
3654 error ("%salign-loops=%d%s is not between 0 and %d",
3655 prefix, i, suffix, MAX_CODE_ALIGN);
3657 align_functions = 1 << i;
3661 /* Default align_* from the processor table. */
3662 if (align_loops == 0)
3664 align_loops = processor_target_table[ix86_tune].align_loop;
3665 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3667 if (align_jumps == 0)
3669 align_jumps = processor_target_table[ix86_tune].align_jump;
3670 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3672 if (align_functions == 0)
3674 align_functions = processor_target_table[ix86_tune].align_func;
3677 /* Validate -mbranch-cost= value, or provide default. */
3678 ix86_branch_cost = ix86_cost->branch_cost;
3679 if (ix86_branch_cost_string)
3681 i = atoi (ix86_branch_cost_string);
3683 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3685 ix86_branch_cost = i;
3687 if (ix86_section_threshold_string)
3689 i = atoi (ix86_section_threshold_string);
3691 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3693 ix86_section_threshold = i;
3696 if (ix86_tls_dialect_string)
3698 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3699 ix86_tls_dialect = TLS_DIALECT_GNU;
3700 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3701 ix86_tls_dialect = TLS_DIALECT_GNU2;
3703 error ("bad value (%s) for %stls-dialect=%s %s",
3704 ix86_tls_dialect_string, prefix, suffix, sw);
3707 if (ix87_precision_string)
3709 i = atoi (ix87_precision_string);
3710 if (i != 32 && i != 64 && i != 80)
3711 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3716 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3718 /* Enable by default the SSE and MMX builtins. Do allow the user to
3719 explicitly disable any of these. In particular, disabling SSE and
3720 MMX for kernel code is extremely useful. */
3721 if (!ix86_arch_specified)
3723 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3724 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3727 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3731 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3733 if (!ix86_arch_specified)
3735 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3737 /* i386 ABI does not specify red zone. It still makes sense to use it
3738 when programmer takes care to stack from being destroyed. */
3739 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3740 target_flags |= MASK_NO_RED_ZONE;
3743 /* Keep nonleaf frame pointers. */
3744 if (flag_omit_frame_pointer)
3745 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3746 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3747 flag_omit_frame_pointer = 1;
3749 /* If we're doing fast math, we don't care about comparison order
3750 wrt NaNs. This lets us use a shorter comparison sequence. */
3751 if (flag_finite_math_only)
3752 target_flags &= ~MASK_IEEE_FP;
3754 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3755 since the insns won't need emulation. */
3756 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3757 target_flags &= ~MASK_NO_FANCY_MATH_387;
3759 /* Likewise, if the target doesn't have a 387, or we've specified
3760 software floating point, don't use 387 inline intrinsics. */
3762 target_flags |= MASK_NO_FANCY_MATH_387;
3764 /* Turn on MMX builtins for -msse. */
3767 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3768 x86_prefetch_sse = true;
3771 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3772 if (TARGET_SSE4_2 || TARGET_ABM)
3773 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3775 /* Validate -mpreferred-stack-boundary= value or default it to
3776 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3777 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3778 if (ix86_preferred_stack_boundary_string)
3780 i = atoi (ix86_preferred_stack_boundary_string);
3781 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3782 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3783 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3785 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3788 /* Set the default value for -mstackrealign. */
3789 if (ix86_force_align_arg_pointer == -1)
3790 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3792 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3794 /* Validate -mincoming-stack-boundary= value or default it to
3795 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3796 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3797 if (ix86_incoming_stack_boundary_string)
3799 i = atoi (ix86_incoming_stack_boundary_string);
3800 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3801 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3802 i, TARGET_64BIT ? 4 : 2);
3805 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3806 ix86_incoming_stack_boundary
3807 = ix86_user_incoming_stack_boundary;
3811 /* Accept -msseregparm only if at least SSE support is enabled. */
3812 if (TARGET_SSEREGPARM
3814 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3816 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3817 if (ix86_fpmath_string != 0)
3819 if (! strcmp (ix86_fpmath_string, "387"))
3820 ix86_fpmath = FPMATH_387;
3821 else if (! strcmp (ix86_fpmath_string, "sse"))
3825 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3826 ix86_fpmath = FPMATH_387;
3829 ix86_fpmath = FPMATH_SSE;
3831 else if (! strcmp (ix86_fpmath_string, "387,sse")
3832 || ! strcmp (ix86_fpmath_string, "387+sse")
3833 || ! strcmp (ix86_fpmath_string, "sse,387")
3834 || ! strcmp (ix86_fpmath_string, "sse+387")
3835 || ! strcmp (ix86_fpmath_string, "both"))
3839 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3840 ix86_fpmath = FPMATH_387;
3842 else if (!TARGET_80387)
3844 warning (0, "387 instruction set disabled, using SSE arithmetics");
3845 ix86_fpmath = FPMATH_SSE;
3848 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3851 error ("bad value (%s) for %sfpmath=%s %s",
3852 ix86_fpmath_string, prefix, suffix, sw);
3855 /* If the i387 is disabled, then do not return values in it. */
3857 target_flags &= ~MASK_FLOAT_RETURNS;
3859 /* Use external vectorized library in vectorizing intrinsics. */
3860 if (ix86_veclibabi_string)
3862 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3863 ix86_veclib_handler = ix86_veclibabi_svml;
3864 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3865 ix86_veclib_handler = ix86_veclibabi_acml;
3867 error ("unknown vectorization library ABI type (%s) for "
3868 "%sveclibabi=%s %s", ix86_veclibabi_string,
3869 prefix, suffix, sw);
3872 if ((!USE_IX86_FRAME_POINTER
3873 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3874 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3876 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3878 /* ??? Unwind info is not correct around the CFG unless either a frame
3879 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3880 unwind info generation to be aware of the CFG and propagating states
3882 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3883 || flag_exceptions || flag_non_call_exceptions)
3884 && flag_omit_frame_pointer
3885 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3887 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3888 warning (0, "unwind tables currently require either a frame pointer "
3889 "or %saccumulate-outgoing-args%s for correctness",
3891 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3894 /* If stack probes are required, the space used for large function
3895 arguments on the stack must also be probed, so enable
3896 -maccumulate-outgoing-args so this happens in the prologue. */
3897 if (TARGET_STACK_PROBE
3898 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3900 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3901 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3902 "for correctness", prefix, suffix);
3903 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3906 /* For sane SSE instruction set generation we need fcomi instruction.
3907 It is safe to enable all CMOVE instructions. */
3911 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3914 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3915 p = strchr (internal_label_prefix, 'X');
3916 internal_label_prefix_len = p - internal_label_prefix;
3920 /* When scheduling description is not available, disable scheduler pass
3921 so it won't slow down the compilation and make x87 code slower. */
3922 if (!TARGET_SCHEDULE)
3923 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3925 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3926 ix86_cost->simultaneous_prefetches,
3927 global_options.x_param_values,
3928 global_options_set.x_param_values);
3929 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3930 global_options.x_param_values,
3931 global_options_set.x_param_values);
3932 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3933 global_options.x_param_values,
3934 global_options_set.x_param_values);
3935 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3936 global_options.x_param_values,
3937 global_options_set.x_param_values);
3939 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3940 if (flag_prefetch_loop_arrays < 0
3943 && software_prefetching_beneficial_p ())
3944 flag_prefetch_loop_arrays = 1;
3946 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3947 can be optimized to ap = __builtin_next_arg (0). */
3948 if (!TARGET_64BIT && !flag_split_stack)
3949 targetm.expand_builtin_va_start = NULL;
3953 ix86_gen_leave = gen_leave_rex64;
3954 ix86_gen_add3 = gen_adddi3;
3955 ix86_gen_sub3 = gen_subdi3;
3956 ix86_gen_sub3_carry = gen_subdi3_carry;
3957 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3958 ix86_gen_monitor = gen_sse3_monitor64;
3959 ix86_gen_andsp = gen_anddi3;
3960 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3961 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3962 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3966 ix86_gen_leave = gen_leave;
3967 ix86_gen_add3 = gen_addsi3;
3968 ix86_gen_sub3 = gen_subsi3;
3969 ix86_gen_sub3_carry = gen_subsi3_carry;
3970 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3971 ix86_gen_monitor = gen_sse3_monitor;
3972 ix86_gen_andsp = gen_andsi3;
3973 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3974 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3975 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3979 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3981 target_flags |= MASK_CLD & ~target_flags_explicit;
3984 if (!TARGET_64BIT && flag_pic)
3986 if (flag_fentry > 0)
3987 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3990 if (flag_fentry < 0)
3992 #if defined(PROFILE_BEFORE_PROLOGUE)
3999 /* Save the initial options in case the user does function specific options */
4001 target_option_default_node = target_option_current_node
4002 = build_target_option_node ();
4006 /* Enable vzeroupper pass by default for TARGET_AVX. */
4007 if (!(target_flags_explicit & MASK_VZEROUPPER))
4008 target_flags |= MASK_VZEROUPPER;
4012 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4013 target_flags &= ~MASK_VZEROUPPER;
4017 /* Return TRUE if type TYPE and mode MODE use 256bit AVX modes. */
4020 use_avx256_p (enum machine_mode mode, const_tree type)
4022 return (VALID_AVX256_REG_MODE (mode)
4024 && TREE_CODE (type) == VECTOR_TYPE
4025 && int_size_in_bytes (type) == 32));
4028 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4031 function_pass_avx256_p (const_rtx val)
4036 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4039 if (GET_CODE (val) == PARALLEL)
4044 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4046 r = XVECEXP (val, 0, i);
4047 if (GET_CODE (r) == EXPR_LIST
4049 && REG_P (XEXP (r, 0))
4050 && (GET_MODE (XEXP (r, 0)) == OImode
4051 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4059 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4062 ix86_option_override (void)
4064 ix86_option_override_internal (true);
4067 /* Update register usage after having seen the compiler flags. */
4070 ix86_conditional_register_usage (void)
4075 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4077 if (fixed_regs[i] > 1)
4078 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4079 if (call_used_regs[i] > 1)
4080 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4083 /* The PIC register, if it exists, is fixed. */
4084 j = PIC_OFFSET_TABLE_REGNUM;
4085 if (j != INVALID_REGNUM)
4086 fixed_regs[j] = call_used_regs[j] = 1;
4088 /* The MS_ABI changes the set of call-used registers. */
4089 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4091 call_used_regs[SI_REG] = 0;
4092 call_used_regs[DI_REG] = 0;
4093 call_used_regs[XMM6_REG] = 0;
4094 call_used_regs[XMM7_REG] = 0;
4095 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4096 call_used_regs[i] = 0;
4099 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4100 other call-clobbered regs for 64-bit. */
4103 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4105 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4106 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4107 && call_used_regs[i])
4108 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4111 /* If MMX is disabled, squash the registers. */
4113 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4114 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4115 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4117 /* If SSE is disabled, squash the registers. */
4119 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4120 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4121 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4123 /* If the FPU is disabled, squash the registers. */
4124 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4125 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4126 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4127 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4129 /* If 32-bit, squash the 64-bit registers. */
4132 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4134 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4140 /* Save the current options */
4143 ix86_function_specific_save (struct cl_target_option *ptr)
4145 ptr->arch = ix86_arch;
4146 ptr->schedule = ix86_schedule;
4147 ptr->tune = ix86_tune;
4148 ptr->fpmath = ix86_fpmath;
4149 ptr->branch_cost = ix86_branch_cost;
4150 ptr->tune_defaulted = ix86_tune_defaulted;
4151 ptr->arch_specified = ix86_arch_specified;
4152 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4153 ptr->ix86_target_flags_explicit = target_flags_explicit;
4155 /* The fields are char but the variables are not; make sure the
4156 values fit in the fields. */
4157 gcc_assert (ptr->arch == ix86_arch);
4158 gcc_assert (ptr->schedule == ix86_schedule);
4159 gcc_assert (ptr->tune == ix86_tune);
4160 gcc_assert (ptr->fpmath == ix86_fpmath);
4161 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4164 /* Restore the current options */
4167 ix86_function_specific_restore (struct cl_target_option *ptr)
4169 enum processor_type old_tune = ix86_tune;
4170 enum processor_type old_arch = ix86_arch;
4171 unsigned int ix86_arch_mask, ix86_tune_mask;
4174 ix86_arch = (enum processor_type) ptr->arch;
4175 ix86_schedule = (enum attr_cpu) ptr->schedule;
4176 ix86_tune = (enum processor_type) ptr->tune;
4177 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4178 ix86_branch_cost = ptr->branch_cost;
4179 ix86_tune_defaulted = ptr->tune_defaulted;
4180 ix86_arch_specified = ptr->arch_specified;
4181 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4182 target_flags_explicit = ptr->ix86_target_flags_explicit;
4184 /* Recreate the arch feature tests if the arch changed */
4185 if (old_arch != ix86_arch)
4187 ix86_arch_mask = 1u << ix86_arch;
4188 for (i = 0; i < X86_ARCH_LAST; ++i)
4189 ix86_arch_features[i]
4190 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4193 /* Recreate the tune optimization tests */
4194 if (old_tune != ix86_tune)
4196 ix86_tune_mask = 1u << ix86_tune;
4197 for (i = 0; i < X86_TUNE_LAST; ++i)
4198 ix86_tune_features[i]
4199 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4203 /* Print the current options */
4206 ix86_function_specific_print (FILE *file, int indent,
4207 struct cl_target_option *ptr)
4210 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4211 NULL, NULL, NULL, false);
4213 fprintf (file, "%*sarch = %d (%s)\n",
4216 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4217 ? cpu_names[ptr->arch]
4220 fprintf (file, "%*stune = %d (%s)\n",
4223 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4224 ? cpu_names[ptr->tune]
4227 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4228 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4229 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4230 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4234 fprintf (file, "%*s%s\n", indent, "", target_string);
4235 free (target_string);
4240 /* Inner function to process the attribute((target(...))), take an argument and
4241 set the current options from the argument. If we have a list, recursively go
4245 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4250 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4251 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4252 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4253 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4268 enum ix86_opt_type type;
4273 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4274 IX86_ATTR_ISA ("abm", OPT_mabm),
4275 IX86_ATTR_ISA ("aes", OPT_maes),
4276 IX86_ATTR_ISA ("avx", OPT_mavx),
4277 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4278 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4279 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4280 IX86_ATTR_ISA ("sse", OPT_msse),
4281 IX86_ATTR_ISA ("sse2", OPT_msse2),
4282 IX86_ATTR_ISA ("sse3", OPT_msse3),
4283 IX86_ATTR_ISA ("sse4", OPT_msse4),
4284 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4285 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4286 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4287 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4288 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4289 IX86_ATTR_ISA ("xop", OPT_mxop),
4290 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4291 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4292 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4293 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4295 /* string options */
4296 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4297 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4298 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4301 IX86_ATTR_YES ("cld",
4305 IX86_ATTR_NO ("fancy-math-387",
4306 OPT_mfancy_math_387,
4307 MASK_NO_FANCY_MATH_387),
4309 IX86_ATTR_YES ("ieee-fp",
4313 IX86_ATTR_YES ("inline-all-stringops",
4314 OPT_minline_all_stringops,
4315 MASK_INLINE_ALL_STRINGOPS),
4317 IX86_ATTR_YES ("inline-stringops-dynamically",
4318 OPT_minline_stringops_dynamically,
4319 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4321 IX86_ATTR_NO ("align-stringops",
4322 OPT_mno_align_stringops,
4323 MASK_NO_ALIGN_STRINGOPS),
4325 IX86_ATTR_YES ("recip",
4331 /* If this is a list, recurse to get the options. */
4332 if (TREE_CODE (args) == TREE_LIST)
4336 for (; args; args = TREE_CHAIN (args))
4337 if (TREE_VALUE (args)
4338 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4344 else if (TREE_CODE (args) != STRING_CST)
4347 /* Handle multiple arguments separated by commas. */
4348 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4350 while (next_optstr && *next_optstr != '\0')
4352 char *p = next_optstr;
4354 char *comma = strchr (next_optstr, ',');
4355 const char *opt_string;
4356 size_t len, opt_len;
4361 enum ix86_opt_type type = ix86_opt_unknown;
4367 len = comma - next_optstr;
4368 next_optstr = comma + 1;
4376 /* Recognize no-xxx. */
4377 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4386 /* Find the option. */
4389 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4391 type = attrs[i].type;
4392 opt_len = attrs[i].len;
4393 if (ch == attrs[i].string[0]
4394 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4395 && memcmp (p, attrs[i].string, opt_len) == 0)
4398 mask = attrs[i].mask;
4399 opt_string = attrs[i].string;
4404 /* Process the option. */
4407 error ("attribute(target(\"%s\")) is unknown", orig_p);
4411 else if (type == ix86_opt_isa)
4412 ix86_handle_option (opt, p, opt_set_p);
4414 else if (type == ix86_opt_yes || type == ix86_opt_no)
4416 if (type == ix86_opt_no)
4417 opt_set_p = !opt_set_p;
4420 target_flags |= mask;
4422 target_flags &= ~mask;
4425 else if (type == ix86_opt_str)
4429 error ("option(\"%s\") was already specified", opt_string);
4433 p_strings[opt] = xstrdup (p + opt_len);
4443 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4446 ix86_valid_target_attribute_tree (tree args)
4448 const char *orig_arch_string = ix86_arch_string;
4449 const char *orig_tune_string = ix86_tune_string;
4450 const char *orig_fpmath_string = ix86_fpmath_string;
4451 int orig_tune_defaulted = ix86_tune_defaulted;
4452 int orig_arch_specified = ix86_arch_specified;
4453 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4456 struct cl_target_option *def
4457 = TREE_TARGET_OPTION (target_option_default_node);
4459 /* Process each of the options on the chain. */
4460 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4463 /* If the changed options are different from the default, rerun
4464 ix86_option_override_internal, and then save the options away.
4465 The string options are are attribute options, and will be undone
4466 when we copy the save structure. */
4467 if (ix86_isa_flags != def->x_ix86_isa_flags
4468 || target_flags != def->x_target_flags
4469 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4470 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4471 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4473 /* If we are using the default tune= or arch=, undo the string assigned,
4474 and use the default. */
4475 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4476 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4477 else if (!orig_arch_specified)
4478 ix86_arch_string = NULL;
4480 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4481 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4482 else if (orig_tune_defaulted)
4483 ix86_tune_string = NULL;
4485 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4486 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4487 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4488 else if (!TARGET_64BIT && TARGET_SSE)
4489 ix86_fpmath_string = "sse,387";
4491 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4492 ix86_option_override_internal (false);
4494 /* Add any builtin functions with the new isa if any. */
4495 ix86_add_new_builtins (ix86_isa_flags);
4497 /* Save the current options unless we are validating options for
4499 t = build_target_option_node ();
4501 ix86_arch_string = orig_arch_string;
4502 ix86_tune_string = orig_tune_string;
4503 ix86_fpmath_string = orig_fpmath_string;
4505 /* Free up memory allocated to hold the strings */
4506 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4507 if (option_strings[i])
4508 free (option_strings[i]);
4514 /* Hook to validate attribute((target("string"))). */
4517 ix86_valid_target_attribute_p (tree fndecl,
4518 tree ARG_UNUSED (name),
4520 int ARG_UNUSED (flags))
4522 struct cl_target_option cur_target;
4524 tree old_optimize = build_optimization_node ();
4525 tree new_target, new_optimize;
4526 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4528 /* If the function changed the optimization levels as well as setting target
4529 options, start with the optimizations specified. */
4530 if (func_optimize && func_optimize != old_optimize)
4531 cl_optimization_restore (&global_options,
4532 TREE_OPTIMIZATION (func_optimize));
4534 /* The target attributes may also change some optimization flags, so update
4535 the optimization options if necessary. */
4536 cl_target_option_save (&cur_target, &global_options);
4537 new_target = ix86_valid_target_attribute_tree (args);
4538 new_optimize = build_optimization_node ();
4545 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4547 if (old_optimize != new_optimize)
4548 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4551 cl_target_option_restore (&global_options, &cur_target);
4553 if (old_optimize != new_optimize)
4554 cl_optimization_restore (&global_options,
4555 TREE_OPTIMIZATION (old_optimize));
4561 /* Hook to determine if one function can safely inline another. */
4564 ix86_can_inline_p (tree caller, tree callee)
4567 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4568 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4570 /* If callee has no option attributes, then it is ok to inline. */
4574 /* If caller has no option attributes, but callee does then it is not ok to
4576 else if (!caller_tree)
4581 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4582 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4584 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4585 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4587 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4588 != callee_opts->x_ix86_isa_flags)
4591 /* See if we have the same non-isa options. */
4592 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4595 /* See if arch, tune, etc. are the same. */
4596 else if (caller_opts->arch != callee_opts->arch)
4599 else if (caller_opts->tune != callee_opts->tune)
4602 else if (caller_opts->fpmath != callee_opts->fpmath)
4605 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4616 /* Remember the last target of ix86_set_current_function. */
4617 static GTY(()) tree ix86_previous_fndecl;
4619 /* Establish appropriate back-end context for processing the function
4620 FNDECL. The argument might be NULL to indicate processing at top
4621 level, outside of any function scope. */
4623 ix86_set_current_function (tree fndecl)
4625 /* Only change the context if the function changes. This hook is called
4626 several times in the course of compiling a function, and we don't want to
4627 slow things down too much or call target_reinit when it isn't safe. */
4628 if (fndecl && fndecl != ix86_previous_fndecl)
4630 tree old_tree = (ix86_previous_fndecl
4631 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4634 tree new_tree = (fndecl
4635 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4638 ix86_previous_fndecl = fndecl;
4639 if (old_tree == new_tree)
4644 cl_target_option_restore (&global_options,
4645 TREE_TARGET_OPTION (new_tree));
4651 struct cl_target_option *def
4652 = TREE_TARGET_OPTION (target_option_current_node);
4654 cl_target_option_restore (&global_options, def);
4661 /* Return true if this goes in large data/bss. */
4664 ix86_in_large_data_p (tree exp)
4666 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4669 /* Functions are never large data. */
4670 if (TREE_CODE (exp) == FUNCTION_DECL)
4673 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4675 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4676 if (strcmp (section, ".ldata") == 0
4677 || strcmp (section, ".lbss") == 0)
4683 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4685 /* If this is an incomplete type with size 0, then we can't put it
4686 in data because it might be too big when completed. */
4687 if (!size || size > ix86_section_threshold)
4694 /* Switch to the appropriate section for output of DECL.
4695 DECL is either a `VAR_DECL' node or a constant of some sort.
4696 RELOC indicates whether forming the initial value of DECL requires
4697 link-time relocations. */
4699 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4703 x86_64_elf_select_section (tree decl, int reloc,
4704 unsigned HOST_WIDE_INT align)
4706 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4707 && ix86_in_large_data_p (decl))
4709 const char *sname = NULL;
4710 unsigned int flags = SECTION_WRITE;
4711 switch (categorize_decl_for_section (decl, reloc))
4716 case SECCAT_DATA_REL:
4717 sname = ".ldata.rel";
4719 case SECCAT_DATA_REL_LOCAL:
4720 sname = ".ldata.rel.local";
4722 case SECCAT_DATA_REL_RO:
4723 sname = ".ldata.rel.ro";
4725 case SECCAT_DATA_REL_RO_LOCAL:
4726 sname = ".ldata.rel.ro.local";
4730 flags |= SECTION_BSS;
4733 case SECCAT_RODATA_MERGE_STR:
4734 case SECCAT_RODATA_MERGE_STR_INIT:
4735 case SECCAT_RODATA_MERGE_CONST:
4739 case SECCAT_SRODATA:
4746 /* We don't split these for medium model. Place them into
4747 default sections and hope for best. */
4752 /* We might get called with string constants, but get_named_section
4753 doesn't like them as they are not DECLs. Also, we need to set
4754 flags in that case. */
4756 return get_section (sname, flags, NULL);
4757 return get_named_section (decl, sname, reloc);
4760 return default_elf_select_section (decl, reloc, align);
4763 /* Build up a unique section name, expressed as a
4764 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4765 RELOC indicates whether the initial value of EXP requires
4766 link-time relocations. */
4768 static void ATTRIBUTE_UNUSED
4769 x86_64_elf_unique_section (tree decl, int reloc)
4771 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4772 && ix86_in_large_data_p (decl))
4774 const char *prefix = NULL;
4775 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4776 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4778 switch (categorize_decl_for_section (decl, reloc))
4781 case SECCAT_DATA_REL:
4782 case SECCAT_DATA_REL_LOCAL:
4783 case SECCAT_DATA_REL_RO:
4784 case SECCAT_DATA_REL_RO_LOCAL:
4785 prefix = one_only ? ".ld" : ".ldata";
4788 prefix = one_only ? ".lb" : ".lbss";
4791 case SECCAT_RODATA_MERGE_STR:
4792 case SECCAT_RODATA_MERGE_STR_INIT:
4793 case SECCAT_RODATA_MERGE_CONST:
4794 prefix = one_only ? ".lr" : ".lrodata";
4796 case SECCAT_SRODATA:
4803 /* We don't split these for medium model. Place them into
4804 default sections and hope for best. */
4809 const char *name, *linkonce;
4812 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4813 name = targetm.strip_name_encoding (name);
4815 /* If we're using one_only, then there needs to be a .gnu.linkonce
4816 prefix to the section name. */
4817 linkonce = one_only ? ".gnu.linkonce" : "";
4819 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4821 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4825 default_unique_section (decl, reloc);
4828 #ifdef COMMON_ASM_OP
4829 /* This says how to output assembler code to declare an
4830 uninitialized external linkage data object.
4832 For medium model x86-64 we need to use .largecomm opcode for
4835 x86_elf_aligned_common (FILE *file,
4836 const char *name, unsigned HOST_WIDE_INT size,
4839 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4840 && size > (unsigned int)ix86_section_threshold)
4841 fputs (".largecomm\t", file);
4843 fputs (COMMON_ASM_OP, file);
4844 assemble_name (file, name);
4845 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4846 size, align / BITS_PER_UNIT);
4850 /* Utility function for targets to use in implementing
4851 ASM_OUTPUT_ALIGNED_BSS. */
4854 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4855 const char *name, unsigned HOST_WIDE_INT size,
4858 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4859 && size > (unsigned int)ix86_section_threshold)
4860 switch_to_section (get_named_section (decl, ".lbss", 0));
4862 switch_to_section (bss_section);
4863 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4864 #ifdef ASM_DECLARE_OBJECT_NAME
4865 last_assemble_variable_decl = decl;
4866 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4868 /* Standard thing is just output label for the object. */
4869 ASM_OUTPUT_LABEL (file, name);
4870 #endif /* ASM_DECLARE_OBJECT_NAME */
4871 ASM_OUTPUT_SKIP (file, size ? size : 1);
4874 static const struct default_options ix86_option_optimization_table[] =
4876 /* Turn off -fschedule-insns by default. It tends to make the
4877 problem with not enough registers even worse. */
4878 #ifdef INSN_SCHEDULING
4879 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
4882 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4883 SUBTARGET_OPTIMIZATION_OPTIONS,
4885 { OPT_LEVELS_NONE, 0, NULL, 0 }
4888 /* Implement TARGET_OPTION_INIT_STRUCT. */
4891 ix86_option_init_struct (struct gcc_options *opts)
4894 /* The Darwin libraries never set errno, so we might as well
4895 avoid calling them when that's the only reason we would. */
4896 opts->x_flag_errno_math = 0;
4898 opts->x_flag_pcc_struct_return = 2;
4899 opts->x_flag_asynchronous_unwind_tables = 2;
4900 opts->x_flag_vect_cost_model = 1;
4903 /* Decide whether we must probe the stack before any space allocation
4904 on this target. It's essentially TARGET_STACK_PROBE except when
4905 -fstack-check causes the stack to be already probed differently. */
4908 ix86_target_stack_probe (void)
4910 /* Do not probe the stack twice if static stack checking is enabled. */
4911 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4914 return TARGET_STACK_PROBE;
4917 /* Decide whether we can make a sibling call to a function. DECL is the
4918 declaration of the function being targeted by the call and EXP is the
4919 CALL_EXPR representing the call. */
4922 ix86_function_ok_for_sibcall (tree decl, tree exp)
4924 tree type, decl_or_type;
4927 /* If we are generating position-independent code, we cannot sibcall
4928 optimize any indirect call, or a direct call to a global function,
4929 as the PLT requires %ebx be live. */
4930 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4933 /* If we need to align the outgoing stack, then sibcalling would
4934 unalign the stack, which may break the called function. */
4935 if (ix86_minimum_incoming_stack_boundary (true)
4936 < PREFERRED_STACK_BOUNDARY)
4941 decl_or_type = decl;
4942 type = TREE_TYPE (decl);
4946 /* We're looking at the CALL_EXPR, we need the type of the function. */
4947 type = CALL_EXPR_FN (exp); /* pointer expression */
4948 type = TREE_TYPE (type); /* pointer type */
4949 type = TREE_TYPE (type); /* function type */
4950 decl_or_type = type;
4953 /* Check that the return value locations are the same. Like
4954 if we are returning floats on the 80387 register stack, we cannot
4955 make a sibcall from a function that doesn't return a float to a
4956 function that does or, conversely, from a function that does return
4957 a float to a function that doesn't; the necessary stack adjustment
4958 would not be executed. This is also the place we notice
4959 differences in the return value ABI. Note that it is ok for one
4960 of the functions to have void return type as long as the return
4961 value of the other is passed in a register. */
4962 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4963 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4965 if (STACK_REG_P (a) || STACK_REG_P (b))
4967 if (!rtx_equal_p (a, b))
4970 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4972 /* Disable sibcall if we need to generate vzeroupper after
4974 if (TARGET_VZEROUPPER
4975 && cfun->machine->callee_return_avx256_p
4976 && !cfun->machine->caller_return_avx256_p)
4979 else if (!rtx_equal_p (a, b))
4984 /* The SYSV ABI has more call-clobbered registers;
4985 disallow sibcalls from MS to SYSV. */
4986 if (cfun->machine->call_abi == MS_ABI
4987 && ix86_function_type_abi (type) == SYSV_ABI)
4992 /* If this call is indirect, we'll need to be able to use a
4993 call-clobbered register for the address of the target function.
4994 Make sure that all such registers are not used for passing
4995 parameters. Note that DLLIMPORT functions are indirect. */
4997 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4999 if (ix86_function_regparm (type, NULL) >= 3)
5001 /* ??? Need to count the actual number of registers to be used,
5002 not the possible number of registers. Fix later. */
5008 /* Otherwise okay. That also includes certain types of indirect calls. */
5012 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5013 and "sseregparm" calling convention attributes;
5014 arguments as in struct attribute_spec.handler. */
5017 ix86_handle_cconv_attribute (tree *node, tree name,
5019 int flags ATTRIBUTE_UNUSED,
5022 if (TREE_CODE (*node) != FUNCTION_TYPE
5023 && TREE_CODE (*node) != METHOD_TYPE
5024 && TREE_CODE (*node) != FIELD_DECL
5025 && TREE_CODE (*node) != TYPE_DECL)
5027 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5029 *no_add_attrs = true;
5033 /* Can combine regparm with all attributes but fastcall. */
5034 if (is_attribute_p ("regparm", name))
5038 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5040 error ("fastcall and regparm attributes are not compatible");
5043 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5045 error ("regparam and thiscall attributes are not compatible");
5048 cst = TREE_VALUE (args);
5049 if (TREE_CODE (cst) != INTEGER_CST)
5051 warning (OPT_Wattributes,
5052 "%qE attribute requires an integer constant argument",
5054 *no_add_attrs = true;
5056 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5058 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5060 *no_add_attrs = true;
5068 /* Do not warn when emulating the MS ABI. */
5069 if ((TREE_CODE (*node) != FUNCTION_TYPE
5070 && TREE_CODE (*node) != METHOD_TYPE)
5071 || ix86_function_type_abi (*node) != MS_ABI)
5072 warning (OPT_Wattributes, "%qE attribute ignored",
5074 *no_add_attrs = true;
5078 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5079 if (is_attribute_p ("fastcall", name))
5081 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5083 error ("fastcall and cdecl attributes are not compatible");
5085 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5087 error ("fastcall and stdcall attributes are not compatible");
5089 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5091 error ("fastcall and regparm attributes are not compatible");
5093 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5095 error ("fastcall and thiscall attributes are not compatible");
5099 /* Can combine stdcall with fastcall (redundant), regparm and
5101 else if (is_attribute_p ("stdcall", name))
5103 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5105 error ("stdcall and cdecl attributes are not compatible");
5107 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5109 error ("stdcall and fastcall attributes are not compatible");
5111 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5113 error ("stdcall and thiscall attributes are not compatible");
5117 /* Can combine cdecl with regparm and sseregparm. */
5118 else if (is_attribute_p ("cdecl", name))
5120 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5122 error ("stdcall and cdecl attributes are not compatible");
5124 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5126 error ("fastcall and cdecl attributes are not compatible");
5128 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5130 error ("cdecl and thiscall attributes are not compatible");
5133 else if (is_attribute_p ("thiscall", name))
5135 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5136 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5138 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5140 error ("stdcall and thiscall attributes are not compatible");
5142 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5144 error ("fastcall and thiscall attributes are not compatible");
5146 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5148 error ("cdecl and thiscall attributes are not compatible");
5152 /* Can combine sseregparm with all attributes. */
5157 /* Return 0 if the attributes for two types are incompatible, 1 if they
5158 are compatible, and 2 if they are nearly compatible (which causes a
5159 warning to be generated). */
5162 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5164 /* Check for mismatch of non-default calling convention. */
5165 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5167 if (TREE_CODE (type1) != FUNCTION_TYPE
5168 && TREE_CODE (type1) != METHOD_TYPE)
5171 /* Check for mismatched fastcall/regparm types. */
5172 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5173 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5174 || (ix86_function_regparm (type1, NULL)
5175 != ix86_function_regparm (type2, NULL)))
5178 /* Check for mismatched sseregparm types. */
5179 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5180 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5183 /* Check for mismatched thiscall types. */
5184 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5185 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5188 /* Check for mismatched return types (cdecl vs stdcall). */
5189 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5190 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5196 /* Return the regparm value for a function with the indicated TYPE and DECL.
5197 DECL may be NULL when calling function indirectly
5198 or considering a libcall. */
5201 ix86_function_regparm (const_tree type, const_tree decl)
5207 return (ix86_function_type_abi (type) == SYSV_ABI
5208 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5210 regparm = ix86_regparm;
5211 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5214 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5218 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5221 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5224 /* Use register calling convention for local functions when possible. */
5226 && TREE_CODE (decl) == FUNCTION_DECL
5228 && !(profile_flag && !flag_fentry))
5230 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5231 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5234 int local_regparm, globals = 0, regno;
5236 /* Make sure no regparm register is taken by a
5237 fixed register variable. */
5238 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5239 if (fixed_regs[local_regparm])
5242 /* We don't want to use regparm(3) for nested functions as
5243 these use a static chain pointer in the third argument. */
5244 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5247 /* In 32-bit mode save a register for the split stack. */
5248 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5251 /* Each fixed register usage increases register pressure,
5252 so less registers should be used for argument passing.
5253 This functionality can be overriden by an explicit
5255 for (regno = 0; regno <= DI_REG; regno++)
5256 if (fixed_regs[regno])
5260 = globals < local_regparm ? local_regparm - globals : 0;
5262 if (local_regparm > regparm)
5263 regparm = local_regparm;
5270 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5271 DFmode (2) arguments in SSE registers for a function with the
5272 indicated TYPE and DECL. DECL may be NULL when calling function
5273 indirectly or considering a libcall. Otherwise return 0. */
5276 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5278 gcc_assert (!TARGET_64BIT);
5280 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5281 by the sseregparm attribute. */
5282 if (TARGET_SSEREGPARM
5283 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5290 error ("Calling %qD with attribute sseregparm without "
5291 "SSE/SSE2 enabled", decl);
5293 error ("Calling %qT with attribute sseregparm without "
5294 "SSE/SSE2 enabled", type);
5302 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5303 (and DFmode for SSE2) arguments in SSE registers. */
5304 if (decl && TARGET_SSE_MATH && optimize
5305 && !(profile_flag && !flag_fentry))
5307 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5308 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5310 return TARGET_SSE2 ? 2 : 1;
5316 /* Return true if EAX is live at the start of the function. Used by
5317 ix86_expand_prologue to determine if we need special help before
5318 calling allocate_stack_worker. */
5321 ix86_eax_live_at_start_p (void)
5323 /* Cheat. Don't bother working forward from ix86_function_regparm
5324 to the function type to whether an actual argument is located in
5325 eax. Instead just look at cfg info, which is still close enough
5326 to correct at this point. This gives false positives for broken
5327 functions that might use uninitialized data that happens to be
5328 allocated in eax, but who cares? */
5329 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5332 /* Value is the number of bytes of arguments automatically
5333 popped when returning from a subroutine call.
5334 FUNDECL is the declaration node of the function (as a tree),
5335 FUNTYPE is the data type of the function (as a tree),
5336 or for a library call it is an identifier node for the subroutine name.
5337 SIZE is the number of bytes of arguments passed on the stack.
5339 On the 80386, the RTD insn may be used to pop them if the number
5340 of args is fixed, but if the number is variable then the caller
5341 must pop them all. RTD can't be used for library calls now
5342 because the library is compiled with the Unix compiler.
5343 Use of RTD is a selectable option, since it is incompatible with
5344 standard Unix calling sequences. If the option is not selected,
5345 the caller must always pop the args.
5347 The attribute stdcall is equivalent to RTD on a per module basis. */
5350 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5354 /* None of the 64-bit ABIs pop arguments. */
5358 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5360 /* Cdecl functions override -mrtd, and never pop the stack. */
5361 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5363 /* Stdcall and fastcall functions will pop the stack if not
5365 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5366 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5367 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5370 if (rtd && ! stdarg_p (funtype))
5374 /* Lose any fake structure return argument if it is passed on the stack. */
5375 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5376 && !KEEP_AGGREGATE_RETURN_POINTER)
5378 int nregs = ix86_function_regparm (funtype, fundecl);
5380 return GET_MODE_SIZE (Pmode);
5386 /* Argument support functions. */
5388 /* Return true when register may be used to pass function parameters. */
5390 ix86_function_arg_regno_p (int regno)
5393 const int *parm_regs;
5398 return (regno < REGPARM_MAX
5399 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5401 return (regno < REGPARM_MAX
5402 || (TARGET_MMX && MMX_REGNO_P (regno)
5403 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5404 || (TARGET_SSE && SSE_REGNO_P (regno)
5405 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5410 if (SSE_REGNO_P (regno) && TARGET_SSE)
5415 if (TARGET_SSE && SSE_REGNO_P (regno)
5416 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5420 /* TODO: The function should depend on current function ABI but
5421 builtins.c would need updating then. Therefore we use the
5424 /* RAX is used as hidden argument to va_arg functions. */
5425 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5428 if (ix86_abi == MS_ABI)
5429 parm_regs = x86_64_ms_abi_int_parameter_registers;
5431 parm_regs = x86_64_int_parameter_registers;
5432 for (i = 0; i < (ix86_abi == MS_ABI
5433 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5434 if (regno == parm_regs[i])
5439 /* Return if we do not know how to pass TYPE solely in registers. */
5442 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5444 if (must_pass_in_stack_var_size_or_pad (mode, type))
5447 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5448 The layout_type routine is crafty and tries to trick us into passing
5449 currently unsupported vector types on the stack by using TImode. */
5450 return (!TARGET_64BIT && mode == TImode
5451 && type && TREE_CODE (type) != VECTOR_TYPE);
5454 /* It returns the size, in bytes, of the area reserved for arguments passed
5455 in registers for the function represented by fndecl dependent to the used
5458 ix86_reg_parm_stack_space (const_tree fndecl)
5460 enum calling_abi call_abi = SYSV_ABI;
5461 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5462 call_abi = ix86_function_abi (fndecl);
5464 call_abi = ix86_function_type_abi (fndecl);
5465 if (call_abi == MS_ABI)
5470 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5473 ix86_function_type_abi (const_tree fntype)
5475 if (TARGET_64BIT && fntype != NULL)
5477 enum calling_abi abi = ix86_abi;
5478 if (abi == SYSV_ABI)
5480 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5483 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5491 ix86_function_ms_hook_prologue (const_tree fn)
5493 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5495 if (decl_function_context (fn) != NULL_TREE)
5496 error_at (DECL_SOURCE_LOCATION (fn),
5497 "ms_hook_prologue is not compatible with nested function");
5504 static enum calling_abi
5505 ix86_function_abi (const_tree fndecl)
5509 return ix86_function_type_abi (TREE_TYPE (fndecl));
5512 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5515 ix86_cfun_abi (void)
5517 if (! cfun || ! TARGET_64BIT)
5519 return cfun->machine->call_abi;
5522 /* Write the extra assembler code needed to declare a function properly. */
5525 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5528 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5532 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5533 unsigned int filler_cc = 0xcccccccc;
5535 for (i = 0; i < filler_count; i += 4)
5536 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5539 ASM_OUTPUT_LABEL (asm_out_file, fname);
5541 /* Output magic byte marker, if hot-patch attribute is set. */
5546 /* leaq [%rsp + 0], %rsp */
5547 asm_fprintf (asm_out_file, ASM_BYTE
5548 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5552 /* movl.s %edi, %edi
5554 movl.s %esp, %ebp */
5555 asm_fprintf (asm_out_file, ASM_BYTE
5556 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5562 extern void init_regs (void);
5564 /* Implementation of call abi switching target hook. Specific to FNDECL
5565 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5566 for more details. */
5568 ix86_call_abi_override (const_tree fndecl)
5570 if (fndecl == NULL_TREE)
5571 cfun->machine->call_abi = ix86_abi;
5573 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5576 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5577 re-initialization of init_regs each time we switch function context since
5578 this is needed only during RTL expansion. */
5580 ix86_maybe_switch_abi (void)
5583 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5587 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5588 for a call to a function whose data type is FNTYPE.
5589 For a library call, FNTYPE is 0. */
5592 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5593 tree fntype, /* tree ptr for function decl */
5594 rtx libname, /* SYMBOL_REF of library name or 0 */
5598 struct cgraph_local_info *i;
5601 memset (cum, 0, sizeof (*cum));
5603 /* Initialize for the current callee. */
5606 cfun->machine->callee_pass_avx256_p = false;
5607 cfun->machine->callee_return_avx256_p = false;
5612 i = cgraph_local_info (fndecl);
5613 cum->call_abi = ix86_function_abi (fndecl);
5614 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5619 cum->call_abi = ix86_function_type_abi (fntype);
5621 fnret_type = TREE_TYPE (fntype);
5626 if (TARGET_VZEROUPPER && fnret_type)
5628 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5630 if (function_pass_avx256_p (fnret_value))
5632 /* The return value of this function uses 256bit AVX modes. */
5633 cfun->machine->use_avx256_p = true;
5635 cfun->machine->callee_return_avx256_p = true;
5637 cfun->machine->caller_return_avx256_p = true;
5641 cum->caller = caller;
5643 /* Set up the number of registers to use for passing arguments. */
5645 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5646 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5647 "or subtarget optimization implying it");
5648 cum->nregs = ix86_regparm;
5651 cum->nregs = (cum->call_abi == SYSV_ABI
5652 ? X86_64_REGPARM_MAX
5653 : X86_64_MS_REGPARM_MAX);
5657 cum->sse_nregs = SSE_REGPARM_MAX;
5660 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5661 ? X86_64_SSE_REGPARM_MAX
5662 : X86_64_MS_SSE_REGPARM_MAX);
5666 cum->mmx_nregs = MMX_REGPARM_MAX;
5667 cum->warn_avx = true;
5668 cum->warn_sse = true;
5669 cum->warn_mmx = true;
5671 /* Because type might mismatch in between caller and callee, we need to
5672 use actual type of function for local calls.
5673 FIXME: cgraph_analyze can be told to actually record if function uses
5674 va_start so for local functions maybe_vaarg can be made aggressive
5676 FIXME: once typesytem is fixed, we won't need this code anymore. */
5678 fntype = TREE_TYPE (fndecl);
5679 cum->maybe_vaarg = (fntype
5680 ? (!prototype_p (fntype) || stdarg_p (fntype))
5685 /* If there are variable arguments, then we won't pass anything
5686 in registers in 32-bit mode. */
5687 if (stdarg_p (fntype))
5698 /* Use ecx and edx registers if function has fastcall attribute,
5699 else look for regparm information. */
5702 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5705 cum->fastcall = 1; /* Same first register as in fastcall. */
5707 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5713 cum->nregs = ix86_function_regparm (fntype, fndecl);
5716 /* Set up the number of SSE registers used for passing SFmode
5717 and DFmode arguments. Warn for mismatching ABI. */
5718 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5722 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5723 But in the case of vector types, it is some vector mode.
5725 When we have only some of our vector isa extensions enabled, then there
5726 are some modes for which vector_mode_supported_p is false. For these
5727 modes, the generic vector support in gcc will choose some non-vector mode
5728 in order to implement the type. By computing the natural mode, we'll
5729 select the proper ABI location for the operand and not depend on whatever
5730 the middle-end decides to do with these vector types.
5732 The midde-end can't deal with the vector types > 16 bytes. In this
5733 case, we return the original mode and warn ABI change if CUM isn't
5736 static enum machine_mode
5737 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5739 enum machine_mode mode = TYPE_MODE (type);
5741 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5743 HOST_WIDE_INT size = int_size_in_bytes (type);
5744 if ((size == 8 || size == 16 || size == 32)
5745 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5746 && TYPE_VECTOR_SUBPARTS (type) > 1)
5748 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5750 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5751 mode = MIN_MODE_VECTOR_FLOAT;
5753 mode = MIN_MODE_VECTOR_INT;
5755 /* Get the mode which has this inner mode and number of units. */
5756 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5757 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5758 && GET_MODE_INNER (mode) == innermode)
5760 if (size == 32 && !TARGET_AVX)
5762 static bool warnedavx;
5769 warning (0, "AVX vector argument without AVX "
5770 "enabled changes the ABI");
5772 return TYPE_MODE (type);
5785 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5786 this may not agree with the mode that the type system has chosen for the
5787 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5788 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5791 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5796 if (orig_mode != BLKmode)
5797 tmp = gen_rtx_REG (orig_mode, regno);
5800 tmp = gen_rtx_REG (mode, regno);
5801 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5802 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5808 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5809 of this code is to classify each 8bytes of incoming argument by the register
5810 class and assign registers accordingly. */
5812 /* Return the union class of CLASS1 and CLASS2.
5813 See the x86-64 PS ABI for details. */
5815 static enum x86_64_reg_class
5816 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5818 /* Rule #1: If both classes are equal, this is the resulting class. */
5819 if (class1 == class2)
5822 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5824 if (class1 == X86_64_NO_CLASS)
5826 if (class2 == X86_64_NO_CLASS)
5829 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5830 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5831 return X86_64_MEMORY_CLASS;
5833 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5834 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5835 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5836 return X86_64_INTEGERSI_CLASS;
5837 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5838 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5839 return X86_64_INTEGER_CLASS;
5841 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5843 if (class1 == X86_64_X87_CLASS
5844 || class1 == X86_64_X87UP_CLASS
5845 || class1 == X86_64_COMPLEX_X87_CLASS
5846 || class2 == X86_64_X87_CLASS
5847 || class2 == X86_64_X87UP_CLASS
5848 || class2 == X86_64_COMPLEX_X87_CLASS)
5849 return X86_64_MEMORY_CLASS;
5851 /* Rule #6: Otherwise class SSE is used. */
5852 return X86_64_SSE_CLASS;
5855 /* Classify the argument of type TYPE and mode MODE.
5856 CLASSES will be filled by the register class used to pass each word
5857 of the operand. The number of words is returned. In case the parameter
5858 should be passed in memory, 0 is returned. As a special case for zero
5859 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5861 BIT_OFFSET is used internally for handling records and specifies offset
5862 of the offset in bits modulo 256 to avoid overflow cases.
5864 See the x86-64 PS ABI for details.
5868 classify_argument (enum machine_mode mode, const_tree type,
5869 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5871 HOST_WIDE_INT bytes =
5872 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5873 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5875 /* Variable sized entities are always passed/returned in memory. */
5879 if (mode != VOIDmode
5880 && targetm.calls.must_pass_in_stack (mode, type))
5883 if (type && AGGREGATE_TYPE_P (type))
5887 enum x86_64_reg_class subclasses[MAX_CLASSES];
5889 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5893 for (i = 0; i < words; i++)
5894 classes[i] = X86_64_NO_CLASS;
5896 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5897 signalize memory class, so handle it as special case. */
5900 classes[0] = X86_64_NO_CLASS;
5904 /* Classify each field of record and merge classes. */
5905 switch (TREE_CODE (type))
5908 /* And now merge the fields of structure. */
5909 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5911 if (TREE_CODE (field) == FIELD_DECL)
5915 if (TREE_TYPE (field) == error_mark_node)
5918 /* Bitfields are always classified as integer. Handle them
5919 early, since later code would consider them to be
5920 misaligned integers. */
5921 if (DECL_BIT_FIELD (field))
5923 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5924 i < ((int_bit_position (field) + (bit_offset % 64))
5925 + tree_low_cst (DECL_SIZE (field), 0)
5928 merge_classes (X86_64_INTEGER_CLASS,
5935 type = TREE_TYPE (field);
5937 /* Flexible array member is ignored. */
5938 if (TYPE_MODE (type) == BLKmode
5939 && TREE_CODE (type) == ARRAY_TYPE
5940 && TYPE_SIZE (type) == NULL_TREE
5941 && TYPE_DOMAIN (type) != NULL_TREE
5942 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5947 if (!warned && warn_psabi)
5950 inform (input_location,
5951 "The ABI of passing struct with"
5952 " a flexible array member has"
5953 " changed in GCC 4.4");
5957 num = classify_argument (TYPE_MODE (type), type,
5959 (int_bit_position (field)
5960 + bit_offset) % 256);
5963 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5964 for (i = 0; i < num && (i + pos) < words; i++)
5966 merge_classes (subclasses[i], classes[i + pos]);
5973 /* Arrays are handled as small records. */
5976 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5977 TREE_TYPE (type), subclasses, bit_offset);
5981 /* The partial classes are now full classes. */
5982 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5983 subclasses[0] = X86_64_SSE_CLASS;
5984 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5985 && !((bit_offset % 64) == 0 && bytes == 4))
5986 subclasses[0] = X86_64_INTEGER_CLASS;
5988 for (i = 0; i < words; i++)
5989 classes[i] = subclasses[i % num];
5994 case QUAL_UNION_TYPE:
5995 /* Unions are similar to RECORD_TYPE but offset is always 0.
5997 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5999 if (TREE_CODE (field) == FIELD_DECL)
6003 if (TREE_TYPE (field) == error_mark_node)
6006 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6007 TREE_TYPE (field), subclasses,
6011 for (i = 0; i < num; i++)
6012 classes[i] = merge_classes (subclasses[i], classes[i]);
6023 /* When size > 16 bytes, if the first one isn't
6024 X86_64_SSE_CLASS or any other ones aren't
6025 X86_64_SSEUP_CLASS, everything should be passed in
6027 if (classes[0] != X86_64_SSE_CLASS)
6030 for (i = 1; i < words; i++)
6031 if (classes[i] != X86_64_SSEUP_CLASS)
6035 /* Final merger cleanup. */
6036 for (i = 0; i < words; i++)
6038 /* If one class is MEMORY, everything should be passed in
6040 if (classes[i] == X86_64_MEMORY_CLASS)
6043 /* The X86_64_SSEUP_CLASS should be always preceded by
6044 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6045 if (classes[i] == X86_64_SSEUP_CLASS
6046 && classes[i - 1] != X86_64_SSE_CLASS
6047 && classes[i - 1] != X86_64_SSEUP_CLASS)
6049 /* The first one should never be X86_64_SSEUP_CLASS. */
6050 gcc_assert (i != 0);
6051 classes[i] = X86_64_SSE_CLASS;
6054 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6055 everything should be passed in memory. */
6056 if (classes[i] == X86_64_X87UP_CLASS
6057 && (classes[i - 1] != X86_64_X87_CLASS))
6061 /* The first one should never be X86_64_X87UP_CLASS. */
6062 gcc_assert (i != 0);
6063 if (!warned && warn_psabi)
6066 inform (input_location,
6067 "The ABI of passing union with long double"
6068 " has changed in GCC 4.4");
6076 /* Compute alignment needed. We align all types to natural boundaries with
6077 exception of XFmode that is aligned to 64bits. */
6078 if (mode != VOIDmode && mode != BLKmode)
6080 int mode_alignment = GET_MODE_BITSIZE (mode);
6083 mode_alignment = 128;
6084 else if (mode == XCmode)
6085 mode_alignment = 256;
6086 if (COMPLEX_MODE_P (mode))
6087 mode_alignment /= 2;
6088 /* Misaligned fields are always returned in memory. */
6089 if (bit_offset % mode_alignment)
6093 /* for V1xx modes, just use the base mode */
6094 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6095 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6096 mode = GET_MODE_INNER (mode);
6098 /* Classification of atomic types. */
6103 classes[0] = X86_64_SSE_CLASS;
6106 classes[0] = X86_64_SSE_CLASS;
6107 classes[1] = X86_64_SSEUP_CLASS;
6117 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6121 classes[0] = X86_64_INTEGERSI_CLASS;
6124 else if (size <= 64)
6126 classes[0] = X86_64_INTEGER_CLASS;
6129 else if (size <= 64+32)
6131 classes[0] = X86_64_INTEGER_CLASS;
6132 classes[1] = X86_64_INTEGERSI_CLASS;
6135 else if (size <= 64+64)
6137 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6145 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6149 /* OImode shouldn't be used directly. */
6154 if (!(bit_offset % 64))
6155 classes[0] = X86_64_SSESF_CLASS;
6157 classes[0] = X86_64_SSE_CLASS;
6160 classes[0] = X86_64_SSEDF_CLASS;
6163 classes[0] = X86_64_X87_CLASS;
6164 classes[1] = X86_64_X87UP_CLASS;
6167 classes[0] = X86_64_SSE_CLASS;
6168 classes[1] = X86_64_SSEUP_CLASS;
6171 classes[0] = X86_64_SSE_CLASS;
6172 if (!(bit_offset % 64))
6178 if (!warned && warn_psabi)
6181 inform (input_location,
6182 "The ABI of passing structure with complex float"
6183 " member has changed in GCC 4.4");
6185 classes[1] = X86_64_SSESF_CLASS;
6189 classes[0] = X86_64_SSEDF_CLASS;
6190 classes[1] = X86_64_SSEDF_CLASS;
6193 classes[0] = X86_64_COMPLEX_X87_CLASS;
6196 /* This modes is larger than 16 bytes. */
6204 classes[0] = X86_64_SSE_CLASS;
6205 classes[1] = X86_64_SSEUP_CLASS;
6206 classes[2] = X86_64_SSEUP_CLASS;
6207 classes[3] = X86_64_SSEUP_CLASS;
6215 classes[0] = X86_64_SSE_CLASS;
6216 classes[1] = X86_64_SSEUP_CLASS;
6224 classes[0] = X86_64_SSE_CLASS;
6230 gcc_assert (VECTOR_MODE_P (mode));
6235 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6237 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6238 classes[0] = X86_64_INTEGERSI_CLASS;
6240 classes[0] = X86_64_INTEGER_CLASS;
6241 classes[1] = X86_64_INTEGER_CLASS;
6242 return 1 + (bytes > 8);
6246 /* Examine the argument and return set number of register required in each
6247 class. Return 0 iff parameter should be passed in memory. */
6249 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6250 int *int_nregs, int *sse_nregs)
6252 enum x86_64_reg_class regclass[MAX_CLASSES];
6253 int n = classify_argument (mode, type, regclass, 0);
6259 for (n--; n >= 0; n--)
6260 switch (regclass[n])
6262 case X86_64_INTEGER_CLASS:
6263 case X86_64_INTEGERSI_CLASS:
6266 case X86_64_SSE_CLASS:
6267 case X86_64_SSESF_CLASS:
6268 case X86_64_SSEDF_CLASS:
6271 case X86_64_NO_CLASS:
6272 case X86_64_SSEUP_CLASS:
6274 case X86_64_X87_CLASS:
6275 case X86_64_X87UP_CLASS:
6279 case X86_64_COMPLEX_X87_CLASS:
6280 return in_return ? 2 : 0;
6281 case X86_64_MEMORY_CLASS:
6287 /* Construct container for the argument used by GCC interface. See
6288 FUNCTION_ARG for the detailed description. */
6291 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6292 const_tree type, int in_return, int nintregs, int nsseregs,
6293 const int *intreg, int sse_regno)
6295 /* The following variables hold the static issued_error state. */
6296 static bool issued_sse_arg_error;
6297 static bool issued_sse_ret_error;
6298 static bool issued_x87_ret_error;
6300 enum machine_mode tmpmode;
6302 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6303 enum x86_64_reg_class regclass[MAX_CLASSES];
6307 int needed_sseregs, needed_intregs;
6308 rtx exp[MAX_CLASSES];
6311 n = classify_argument (mode, type, regclass, 0);
6314 if (!examine_argument (mode, type, in_return, &needed_intregs,
6317 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6320 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6321 some less clueful developer tries to use floating-point anyway. */
6322 if (needed_sseregs && !TARGET_SSE)
6326 if (!issued_sse_ret_error)
6328 error ("SSE register return with SSE disabled");
6329 issued_sse_ret_error = true;
6332 else if (!issued_sse_arg_error)
6334 error ("SSE register argument with SSE disabled");
6335 issued_sse_arg_error = true;
6340 /* Likewise, error if the ABI requires us to return values in the
6341 x87 registers and the user specified -mno-80387. */
6342 if (!TARGET_80387 && in_return)
6343 for (i = 0; i < n; i++)
6344 if (regclass[i] == X86_64_X87_CLASS
6345 || regclass[i] == X86_64_X87UP_CLASS
6346 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6348 if (!issued_x87_ret_error)
6350 error ("x87 register return with x87 disabled");
6351 issued_x87_ret_error = true;
6356 /* First construct simple cases. Avoid SCmode, since we want to use
6357 single register to pass this type. */
6358 if (n == 1 && mode != SCmode)
6359 switch (regclass[0])
6361 case X86_64_INTEGER_CLASS:
6362 case X86_64_INTEGERSI_CLASS:
6363 return gen_rtx_REG (mode, intreg[0]);
6364 case X86_64_SSE_CLASS:
6365 case X86_64_SSESF_CLASS:
6366 case X86_64_SSEDF_CLASS:
6367 if (mode != BLKmode)
6368 return gen_reg_or_parallel (mode, orig_mode,
6369 SSE_REGNO (sse_regno));
6371 case X86_64_X87_CLASS:
6372 case X86_64_COMPLEX_X87_CLASS:
6373 return gen_rtx_REG (mode, FIRST_STACK_REG);
6374 case X86_64_NO_CLASS:
6375 /* Zero sized array, struct or class. */
6380 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6381 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6382 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6384 && regclass[0] == X86_64_SSE_CLASS
6385 && regclass[1] == X86_64_SSEUP_CLASS
6386 && regclass[2] == X86_64_SSEUP_CLASS
6387 && regclass[3] == X86_64_SSEUP_CLASS
6389 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6392 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6393 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6394 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6395 && regclass[1] == X86_64_INTEGER_CLASS
6396 && (mode == CDImode || mode == TImode || mode == TFmode)
6397 && intreg[0] + 1 == intreg[1])
6398 return gen_rtx_REG (mode, intreg[0]);
6400 /* Otherwise figure out the entries of the PARALLEL. */
6401 for (i = 0; i < n; i++)
6405 switch (regclass[i])
6407 case X86_64_NO_CLASS:
6409 case X86_64_INTEGER_CLASS:
6410 case X86_64_INTEGERSI_CLASS:
6411 /* Merge TImodes on aligned occasions here too. */
6412 if (i * 8 + 8 > bytes)
6413 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6414 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6418 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6419 if (tmpmode == BLKmode)
6421 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6422 gen_rtx_REG (tmpmode, *intreg),
6426 case X86_64_SSESF_CLASS:
6427 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6428 gen_rtx_REG (SFmode,
6429 SSE_REGNO (sse_regno)),
6433 case X86_64_SSEDF_CLASS:
6434 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6435 gen_rtx_REG (DFmode,
6436 SSE_REGNO (sse_regno)),
6440 case X86_64_SSE_CLASS:
6448 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6458 && regclass[1] == X86_64_SSEUP_CLASS
6459 && regclass[2] == X86_64_SSEUP_CLASS
6460 && regclass[3] == X86_64_SSEUP_CLASS);
6467 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6468 gen_rtx_REG (tmpmode,
6469 SSE_REGNO (sse_regno)),
6478 /* Empty aligned struct, union or class. */
6482 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6483 for (i = 0; i < nexps; i++)
6484 XVECEXP (ret, 0, i) = exp [i];
6488 /* Update the data in CUM to advance over an argument of mode MODE
6489 and data type TYPE. (TYPE is null for libcalls where that information
6490 may not be available.) */
6493 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6494 const_tree type, HOST_WIDE_INT bytes,
6495 HOST_WIDE_INT words)
6511 cum->words += words;
6512 cum->nregs -= words;
6513 cum->regno += words;
6515 if (cum->nregs <= 0)
6523 /* OImode shouldn't be used directly. */
6527 if (cum->float_in_sse < 2)
6530 if (cum->float_in_sse < 1)
6547 if (!type || !AGGREGATE_TYPE_P (type))
6549 cum->sse_words += words;
6550 cum->sse_nregs -= 1;
6551 cum->sse_regno += 1;
6552 if (cum->sse_nregs <= 0)
6566 if (!type || !AGGREGATE_TYPE_P (type))
6568 cum->mmx_words += words;
6569 cum->mmx_nregs -= 1;
6570 cum->mmx_regno += 1;
6571 if (cum->mmx_nregs <= 0)
6582 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6583 const_tree type, HOST_WIDE_INT words, bool named)
6585 int int_nregs, sse_nregs;
6587 /* Unnamed 256bit vector mode parameters are passed on stack. */
6588 if (!named && VALID_AVX256_REG_MODE (mode))
6591 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6592 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6594 cum->nregs -= int_nregs;
6595 cum->sse_nregs -= sse_nregs;
6596 cum->regno += int_nregs;
6597 cum->sse_regno += sse_nregs;
6601 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6602 cum->words = (cum->words + align - 1) & ~(align - 1);
6603 cum->words += words;
6608 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6609 HOST_WIDE_INT words)
6611 /* Otherwise, this should be passed indirect. */
6612 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6614 cum->words += words;
6622 /* Update the data in CUM to advance over an argument of mode MODE and
6623 data type TYPE. (TYPE is null for libcalls where that information
6624 may not be available.) */
6627 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6628 const_tree type, bool named)
6630 HOST_WIDE_INT bytes, words;
6632 if (mode == BLKmode)
6633 bytes = int_size_in_bytes (type);
6635 bytes = GET_MODE_SIZE (mode);
6636 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6639 mode = type_natural_mode (type, NULL);
6641 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6642 function_arg_advance_ms_64 (cum, bytes, words);
6643 else if (TARGET_64BIT)
6644 function_arg_advance_64 (cum, mode, type, words, named);
6646 function_arg_advance_32 (cum, mode, type, bytes, words);
6649 /* Define where to put the arguments to a function.
6650 Value is zero to push the argument on the stack,
6651 or a hard register in which to store the argument.
6653 MODE is the argument's machine mode.
6654 TYPE is the data type of the argument (as a tree).
6655 This is null for libcalls where that information may
6657 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6658 the preceding args and about the function being called.
6659 NAMED is nonzero if this argument is a named parameter
6660 (otherwise it is an extra parameter matching an ellipsis). */
6663 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6664 enum machine_mode orig_mode, const_tree type,
6665 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6667 static bool warnedsse, warnedmmx;
6669 /* Avoid the AL settings for the Unix64 ABI. */
6670 if (mode == VOIDmode)
6686 if (words <= cum->nregs)
6688 int regno = cum->regno;
6690 /* Fastcall allocates the first two DWORD (SImode) or
6691 smaller arguments to ECX and EDX if it isn't an
6697 || (type && AGGREGATE_TYPE_P (type)))
6700 /* ECX not EAX is the first allocated register. */
6701 if (regno == AX_REG)
6704 return gen_rtx_REG (mode, regno);
6709 if (cum->float_in_sse < 2)
6712 if (cum->float_in_sse < 1)
6716 /* In 32bit, we pass TImode in xmm registers. */
6723 if (!type || !AGGREGATE_TYPE_P (type))
6725 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6728 warning (0, "SSE vector argument without SSE enabled "
6732 return gen_reg_or_parallel (mode, orig_mode,
6733 cum->sse_regno + FIRST_SSE_REG);
6738 /* OImode shouldn't be used directly. */
6747 if (!type || !AGGREGATE_TYPE_P (type))
6750 return gen_reg_or_parallel (mode, orig_mode,
6751 cum->sse_regno + FIRST_SSE_REG);
6761 if (!type || !AGGREGATE_TYPE_P (type))
6763 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6766 warning (0, "MMX vector argument without MMX enabled "
6770 return gen_reg_or_parallel (mode, orig_mode,
6771 cum->mmx_regno + FIRST_MMX_REG);
6780 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6781 enum machine_mode orig_mode, const_tree type, bool named)
6783 /* Handle a hidden AL argument containing number of registers
6784 for varargs x86-64 functions. */
6785 if (mode == VOIDmode)
6786 return GEN_INT (cum->maybe_vaarg
6787 ? (cum->sse_nregs < 0
6788 ? X86_64_SSE_REGPARM_MAX
6803 /* Unnamed 256bit vector mode parameters are passed on stack. */
6809 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6811 &x86_64_int_parameter_registers [cum->regno],
6816 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6817 enum machine_mode orig_mode, bool named,
6818 HOST_WIDE_INT bytes)
6822 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6823 We use value of -2 to specify that current function call is MSABI. */
6824 if (mode == VOIDmode)
6825 return GEN_INT (-2);
6827 /* If we've run out of registers, it goes on the stack. */
6828 if (cum->nregs == 0)
6831 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6833 /* Only floating point modes are passed in anything but integer regs. */
6834 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6837 regno = cum->regno + FIRST_SSE_REG;
6842 /* Unnamed floating parameters are passed in both the
6843 SSE and integer registers. */
6844 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6845 t2 = gen_rtx_REG (mode, regno);
6846 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6847 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6848 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6851 /* Handle aggregated types passed in register. */
6852 if (orig_mode == BLKmode)
6854 if (bytes > 0 && bytes <= 8)
6855 mode = (bytes > 4 ? DImode : SImode);
6856 if (mode == BLKmode)
6860 return gen_reg_or_parallel (mode, orig_mode, regno);
6863 /* Return where to put the arguments to a function.
6864 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6866 MODE is the argument's machine mode. TYPE is the data type of the
6867 argument. It is null for libcalls where that information may not be
6868 available. CUM gives information about the preceding args and about
6869 the function being called. NAMED is nonzero if this argument is a
6870 named parameter (otherwise it is an extra parameter matching an
6874 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6875 const_tree type, bool named)
6877 enum machine_mode mode = omode;
6878 HOST_WIDE_INT bytes, words;
6881 if (mode == BLKmode)
6882 bytes = int_size_in_bytes (type);
6884 bytes = GET_MODE_SIZE (mode);
6885 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6887 /* To simplify the code below, represent vector types with a vector mode
6888 even if MMX/SSE are not active. */
6889 if (type && TREE_CODE (type) == VECTOR_TYPE)
6890 mode = type_natural_mode (type, cum);
6892 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6893 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6894 else if (TARGET_64BIT)
6895 arg = function_arg_64 (cum, mode, omode, type, named);
6897 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6899 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6901 /* This argument uses 256bit AVX modes. */
6902 cfun->machine->use_avx256_p = true;
6904 cfun->machine->callee_pass_avx256_p = true;
6906 cfun->machine->caller_pass_avx256_p = true;
6912 /* A C expression that indicates when an argument must be passed by
6913 reference. If nonzero for an argument, a copy of that argument is
6914 made in memory and a pointer to the argument is passed instead of
6915 the argument itself. The pointer is passed in whatever way is
6916 appropriate for passing a pointer to that type. */
6919 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6920 enum machine_mode mode ATTRIBUTE_UNUSED,
6921 const_tree type, bool named ATTRIBUTE_UNUSED)
6923 /* See Windows x64 Software Convention. */
6924 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6926 int msize = (int) GET_MODE_SIZE (mode);
6929 /* Arrays are passed by reference. */
6930 if (TREE_CODE (type) == ARRAY_TYPE)
6933 if (AGGREGATE_TYPE_P (type))
6935 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6936 are passed by reference. */
6937 msize = int_size_in_bytes (type);
6941 /* __m128 is passed by reference. */
6943 case 1: case 2: case 4: case 8:
6949 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6955 /* Return true when TYPE should be 128bit aligned for 32bit argument
6956 passing ABI. XXX: This function is obsolete and is only used for
6957 checking psABI compatibility with previous versions of GCC. */
6960 ix86_compat_aligned_value_p (const_tree type)
6962 enum machine_mode mode = TYPE_MODE (type);
6963 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6967 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6969 if (TYPE_ALIGN (type) < 128)
6972 if (AGGREGATE_TYPE_P (type))
6974 /* Walk the aggregates recursively. */
6975 switch (TREE_CODE (type))
6979 case QUAL_UNION_TYPE:
6983 /* Walk all the structure fields. */
6984 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6986 if (TREE_CODE (field) == FIELD_DECL
6987 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
6994 /* Just for use if some languages passes arrays by value. */
6995 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7006 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7007 XXX: This function is obsolete and is only used for checking psABI
7008 compatibility with previous versions of GCC. */
7011 ix86_compat_function_arg_boundary (enum machine_mode mode,
7012 const_tree type, int align)
7014 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7015 natural boundaries. */
7016 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7018 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7019 make an exception for SSE modes since these require 128bit
7022 The handling here differs from field_alignment. ICC aligns MMX
7023 arguments to 4 byte boundaries, while structure fields are aligned
7024 to 8 byte boundaries. */
7027 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7028 align = PARM_BOUNDARY;
7032 if (!ix86_compat_aligned_value_p (type))
7033 align = PARM_BOUNDARY;
7036 if (align > BIGGEST_ALIGNMENT)
7037 align = BIGGEST_ALIGNMENT;
7041 /* Return true when TYPE should be 128bit aligned for 32bit argument
7045 ix86_contains_aligned_value_p (const_tree type)
7047 enum machine_mode mode = TYPE_MODE (type);
7049 if (mode == XFmode || mode == XCmode)
7052 if (TYPE_ALIGN (type) < 128)
7055 if (AGGREGATE_TYPE_P (type))
7057 /* Walk the aggregates recursively. */
7058 switch (TREE_CODE (type))
7062 case QUAL_UNION_TYPE:
7066 /* Walk all the structure fields. */
7067 for (field = TYPE_FIELDS (type);
7069 field = DECL_CHAIN (field))
7071 if (TREE_CODE (field) == FIELD_DECL
7072 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7079 /* Just for use if some languages passes arrays by value. */
7080 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7089 return TYPE_ALIGN (type) >= 128;
7094 /* Gives the alignment boundary, in bits, of an argument with the
7095 specified mode and type. */
7098 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7103 /* Since the main variant type is used for call, we convert it to
7104 the main variant type. */
7105 type = TYPE_MAIN_VARIANT (type);
7106 align = TYPE_ALIGN (type);
7109 align = GET_MODE_ALIGNMENT (mode);
7110 if (align < PARM_BOUNDARY)
7111 align = PARM_BOUNDARY;
7115 int saved_align = align;
7119 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7122 if (mode == XFmode || mode == XCmode)
7123 align = PARM_BOUNDARY;
7125 else if (!ix86_contains_aligned_value_p (type))
7126 align = PARM_BOUNDARY;
7129 align = PARM_BOUNDARY;
7134 && align != ix86_compat_function_arg_boundary (mode, type,
7138 inform (input_location,
7139 "The ABI of passing parameter with %dbyte"
7140 " alignment has changed in GCC 4.6",
7141 align / BITS_PER_UNIT);
7148 /* Return true if N is a possible register number of function value. */
7151 ix86_function_value_regno_p (const unsigned int regno)
7158 case FIRST_FLOAT_REG:
7159 /* TODO: The function should depend on current function ABI but
7160 builtins.c would need updating then. Therefore we use the
7162 if (TARGET_64BIT && ix86_abi == MS_ABI)
7164 return TARGET_FLOAT_RETURNS_IN_80387;
7170 if (TARGET_MACHO || TARGET_64BIT)
7178 /* Define how to find the value returned by a function.
7179 VALTYPE is the data type of the value (as a tree).
7180 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7181 otherwise, FUNC is 0. */
7184 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7185 const_tree fntype, const_tree fn)
7189 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7190 we normally prevent this case when mmx is not available. However
7191 some ABIs may require the result to be returned like DImode. */
7192 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7193 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7195 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7196 we prevent this case when sse is not available. However some ABIs
7197 may require the result to be returned like integer TImode. */
7198 else if (mode == TImode
7199 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7200 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7202 /* 32-byte vector modes in %ymm0. */
7203 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7204 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7206 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7207 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7208 regno = FIRST_FLOAT_REG;
7210 /* Most things go in %eax. */
7213 /* Override FP return register with %xmm0 for local functions when
7214 SSE math is enabled or for functions with sseregparm attribute. */
7215 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7217 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7218 if ((sse_level >= 1 && mode == SFmode)
7219 || (sse_level == 2 && mode == DFmode))
7220 regno = FIRST_SSE_REG;
7223 /* OImode shouldn't be used directly. */
7224 gcc_assert (mode != OImode);
7226 return gen_rtx_REG (orig_mode, regno);
7230 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7235 /* Handle libcalls, which don't provide a type node. */
7236 if (valtype == NULL)
7248 return gen_rtx_REG (mode, FIRST_SSE_REG);
7251 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7255 return gen_rtx_REG (mode, AX_REG);
7259 ret = construct_container (mode, orig_mode, valtype, 1,
7260 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7261 x86_64_int_return_registers, 0);
7263 /* For zero sized structures, construct_container returns NULL, but we
7264 need to keep rest of compiler happy by returning meaningful value. */
7266 ret = gen_rtx_REG (orig_mode, AX_REG);
7272 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7274 unsigned int regno = AX_REG;
7278 switch (GET_MODE_SIZE (mode))
7281 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7282 && !COMPLEX_MODE_P (mode))
7283 regno = FIRST_SSE_REG;
7287 if (mode == SFmode || mode == DFmode)
7288 regno = FIRST_SSE_REG;
7294 return gen_rtx_REG (orig_mode, regno);
7298 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7299 enum machine_mode orig_mode, enum machine_mode mode)
7301 const_tree fn, fntype;
7304 if (fntype_or_decl && DECL_P (fntype_or_decl))
7305 fn = fntype_or_decl;
7306 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7308 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7309 return function_value_ms_64 (orig_mode, mode);
7310 else if (TARGET_64BIT)
7311 return function_value_64 (orig_mode, mode, valtype);
7313 return function_value_32 (orig_mode, mode, fntype, fn);
7317 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7318 bool outgoing ATTRIBUTE_UNUSED)
7320 enum machine_mode mode, orig_mode;
7322 orig_mode = TYPE_MODE (valtype);
7323 mode = type_natural_mode (valtype, NULL);
7324 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7328 ix86_libcall_value (enum machine_mode mode)
7330 return ix86_function_value_1 (NULL, NULL, mode, mode);
7333 /* Return true iff type is returned in memory. */
7335 static bool ATTRIBUTE_UNUSED
7336 return_in_memory_32 (const_tree type, enum machine_mode mode)
7340 if (mode == BLKmode)
7343 size = int_size_in_bytes (type);
7345 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7348 if (VECTOR_MODE_P (mode) || mode == TImode)
7350 /* User-created vectors small enough to fit in EAX. */
7354 /* MMX/3dNow values are returned in MM0,
7355 except when it doesn't exits or the ABI prescribes otherwise. */
7357 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7359 /* SSE values are returned in XMM0, except when it doesn't exist. */
7363 /* AVX values are returned in YMM0, except when it doesn't exist. */
7374 /* OImode shouldn't be used directly. */
7375 gcc_assert (mode != OImode);
7380 static bool ATTRIBUTE_UNUSED
7381 return_in_memory_64 (const_tree type, enum machine_mode mode)
7383 int needed_intregs, needed_sseregs;
7384 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7387 static bool ATTRIBUTE_UNUSED
7388 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7390 HOST_WIDE_INT size = int_size_in_bytes (type);
7392 /* __m128 is returned in xmm0. */
7393 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7394 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7397 /* Otherwise, the size must be exactly in [1248]. */
7398 return size != 1 && size != 2 && size != 4 && size != 8;
7402 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7404 #ifdef SUBTARGET_RETURN_IN_MEMORY
7405 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7407 const enum machine_mode mode = type_natural_mode (type, NULL);
7411 if (ix86_function_type_abi (fntype) == MS_ABI)
7412 return return_in_memory_ms_64 (type, mode);
7414 return return_in_memory_64 (type, mode);
7417 return return_in_memory_32 (type, mode);
7421 /* When returning SSE vector types, we have a choice of either
7422 (1) being abi incompatible with a -march switch, or
7423 (2) generating an error.
7424 Given no good solution, I think the safest thing is one warning.
7425 The user won't be able to use -Werror, but....
7427 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7428 called in response to actually generating a caller or callee that
7429 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7430 via aggregate_value_p for general type probing from tree-ssa. */
7433 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7435 static bool warnedsse, warnedmmx;
7437 if (!TARGET_64BIT && type)
7439 /* Look at the return type of the function, not the function type. */
7440 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7442 if (!TARGET_SSE && !warnedsse)
7445 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7448 warning (0, "SSE vector return without SSE enabled "
7453 if (!TARGET_MMX && !warnedmmx)
7455 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7458 warning (0, "MMX vector return without MMX enabled "
7468 /* Create the va_list data type. */
7470 /* Returns the calling convention specific va_list date type.
7471 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7474 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7476 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7478 /* For i386 we use plain pointer to argument area. */
7479 if (!TARGET_64BIT || abi == MS_ABI)
7480 return build_pointer_type (char_type_node);
7482 record = lang_hooks.types.make_type (RECORD_TYPE);
7483 type_decl = build_decl (BUILTINS_LOCATION,
7484 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7486 f_gpr = build_decl (BUILTINS_LOCATION,
7487 FIELD_DECL, get_identifier ("gp_offset"),
7488 unsigned_type_node);
7489 f_fpr = build_decl (BUILTINS_LOCATION,
7490 FIELD_DECL, get_identifier ("fp_offset"),
7491 unsigned_type_node);
7492 f_ovf = build_decl (BUILTINS_LOCATION,
7493 FIELD_DECL, get_identifier ("overflow_arg_area"),
7495 f_sav = build_decl (BUILTINS_LOCATION,
7496 FIELD_DECL, get_identifier ("reg_save_area"),
7499 va_list_gpr_counter_field = f_gpr;
7500 va_list_fpr_counter_field = f_fpr;
7502 DECL_FIELD_CONTEXT (f_gpr) = record;
7503 DECL_FIELD_CONTEXT (f_fpr) = record;
7504 DECL_FIELD_CONTEXT (f_ovf) = record;
7505 DECL_FIELD_CONTEXT (f_sav) = record;
7507 TYPE_STUB_DECL (record) = type_decl;
7508 TYPE_NAME (record) = type_decl;
7509 TYPE_FIELDS (record) = f_gpr;
7510 DECL_CHAIN (f_gpr) = f_fpr;
7511 DECL_CHAIN (f_fpr) = f_ovf;
7512 DECL_CHAIN (f_ovf) = f_sav;
7514 layout_type (record);
7516 /* The correct type is an array type of one element. */
7517 return build_array_type (record, build_index_type (size_zero_node));
7520 /* Setup the builtin va_list data type and for 64-bit the additional
7521 calling convention specific va_list data types. */
7524 ix86_build_builtin_va_list (void)
7526 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7528 /* Initialize abi specific va_list builtin types. */
7532 if (ix86_abi == MS_ABI)
7534 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7535 if (TREE_CODE (t) != RECORD_TYPE)
7536 t = build_variant_type_copy (t);
7537 sysv_va_list_type_node = t;
7542 if (TREE_CODE (t) != RECORD_TYPE)
7543 t = build_variant_type_copy (t);
7544 sysv_va_list_type_node = t;
7546 if (ix86_abi != MS_ABI)
7548 t = ix86_build_builtin_va_list_abi (MS_ABI);
7549 if (TREE_CODE (t) != RECORD_TYPE)
7550 t = build_variant_type_copy (t);
7551 ms_va_list_type_node = t;
7556 if (TREE_CODE (t) != RECORD_TYPE)
7557 t = build_variant_type_copy (t);
7558 ms_va_list_type_node = t;
7565 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7568 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7574 /* GPR size of varargs save area. */
7575 if (cfun->va_list_gpr_size)
7576 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7578 ix86_varargs_gpr_size = 0;
7580 /* FPR size of varargs save area. We don't need it if we don't pass
7581 anything in SSE registers. */
7582 if (TARGET_SSE && cfun->va_list_fpr_size)
7583 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7585 ix86_varargs_fpr_size = 0;
7587 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7590 save_area = frame_pointer_rtx;
7591 set = get_varargs_alias_set ();
7593 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7594 if (max > X86_64_REGPARM_MAX)
7595 max = X86_64_REGPARM_MAX;
7597 for (i = cum->regno; i < max; i++)
7599 mem = gen_rtx_MEM (Pmode,
7600 plus_constant (save_area, i * UNITS_PER_WORD));
7601 MEM_NOTRAP_P (mem) = 1;
7602 set_mem_alias_set (mem, set);
7603 emit_move_insn (mem, gen_rtx_REG (Pmode,
7604 x86_64_int_parameter_registers[i]));
7607 if (ix86_varargs_fpr_size)
7609 enum machine_mode smode;
7612 /* Now emit code to save SSE registers. The AX parameter contains number
7613 of SSE parameter registers used to call this function, though all we
7614 actually check here is the zero/non-zero status. */
7616 label = gen_label_rtx ();
7617 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7618 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7621 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7622 we used movdqa (i.e. TImode) instead? Perhaps even better would
7623 be if we could determine the real mode of the data, via a hook
7624 into pass_stdarg. Ignore all that for now. */
7626 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7627 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7629 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7630 if (max > X86_64_SSE_REGPARM_MAX)
7631 max = X86_64_SSE_REGPARM_MAX;
7633 for (i = cum->sse_regno; i < max; ++i)
7635 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7636 mem = gen_rtx_MEM (smode, mem);
7637 MEM_NOTRAP_P (mem) = 1;
7638 set_mem_alias_set (mem, set);
7639 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7641 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7649 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7651 alias_set_type set = get_varargs_alias_set ();
7654 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7658 mem = gen_rtx_MEM (Pmode,
7659 plus_constant (virtual_incoming_args_rtx,
7660 i * UNITS_PER_WORD));
7661 MEM_NOTRAP_P (mem) = 1;
7662 set_mem_alias_set (mem, set);
7664 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7665 emit_move_insn (mem, reg);
7670 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7671 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7674 CUMULATIVE_ARGS next_cum;
7677 /* This argument doesn't appear to be used anymore. Which is good,
7678 because the old code here didn't suppress rtl generation. */
7679 gcc_assert (!no_rtl);
7684 fntype = TREE_TYPE (current_function_decl);
7686 /* For varargs, we do not want to skip the dummy va_dcl argument.
7687 For stdargs, we do want to skip the last named argument. */
7689 if (stdarg_p (fntype))
7690 ix86_function_arg_advance (&next_cum, mode, type, true);
7692 if (cum->call_abi == MS_ABI)
7693 setup_incoming_varargs_ms_64 (&next_cum);
7695 setup_incoming_varargs_64 (&next_cum);
7698 /* Checks if TYPE is of kind va_list char *. */
7701 is_va_list_char_pointer (tree type)
7705 /* For 32-bit it is always true. */
7708 canonic = ix86_canonical_va_list_type (type);
7709 return (canonic == ms_va_list_type_node
7710 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7713 /* Implement va_start. */
7716 ix86_va_start (tree valist, rtx nextarg)
7718 HOST_WIDE_INT words, n_gpr, n_fpr;
7719 tree f_gpr, f_fpr, f_ovf, f_sav;
7720 tree gpr, fpr, ovf, sav, t;
7724 if (flag_split_stack
7725 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7727 unsigned int scratch_regno;
7729 /* When we are splitting the stack, we can't refer to the stack
7730 arguments using internal_arg_pointer, because they may be on
7731 the old stack. The split stack prologue will arrange to
7732 leave a pointer to the old stack arguments in a scratch
7733 register, which we here copy to a pseudo-register. The split
7734 stack prologue can't set the pseudo-register directly because
7735 it (the prologue) runs before any registers have been saved. */
7737 scratch_regno = split_stack_prologue_scratch_regno ();
7738 if (scratch_regno != INVALID_REGNUM)
7742 reg = gen_reg_rtx (Pmode);
7743 cfun->machine->split_stack_varargs_pointer = reg;
7746 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7750 push_topmost_sequence ();
7751 emit_insn_after (seq, entry_of_function ());
7752 pop_topmost_sequence ();
7756 /* Only 64bit target needs something special. */
7757 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7759 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7760 std_expand_builtin_va_start (valist, nextarg);
7765 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7766 next = expand_binop (ptr_mode, add_optab,
7767 cfun->machine->split_stack_varargs_pointer,
7768 crtl->args.arg_offset_rtx,
7769 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7770 convert_move (va_r, next, 0);
7775 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7776 f_fpr = DECL_CHAIN (f_gpr);
7777 f_ovf = DECL_CHAIN (f_fpr);
7778 f_sav = DECL_CHAIN (f_ovf);
7780 valist = build_simple_mem_ref (valist);
7781 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7782 /* The following should be folded into the MEM_REF offset. */
7783 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7785 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7787 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7789 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7792 /* Count number of gp and fp argument registers used. */
7793 words = crtl->args.info.words;
7794 n_gpr = crtl->args.info.regno;
7795 n_fpr = crtl->args.info.sse_regno;
7797 if (cfun->va_list_gpr_size)
7799 type = TREE_TYPE (gpr);
7800 t = build2 (MODIFY_EXPR, type,
7801 gpr, build_int_cst (type, n_gpr * 8));
7802 TREE_SIDE_EFFECTS (t) = 1;
7803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7806 if (TARGET_SSE && cfun->va_list_fpr_size)
7808 type = TREE_TYPE (fpr);
7809 t = build2 (MODIFY_EXPR, type, fpr,
7810 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7811 TREE_SIDE_EFFECTS (t) = 1;
7812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7815 /* Find the overflow area. */
7816 type = TREE_TYPE (ovf);
7817 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7818 ovf_rtx = crtl->args.internal_arg_pointer;
7820 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7821 t = make_tree (type, ovf_rtx);
7823 t = build2 (POINTER_PLUS_EXPR, type, t,
7824 size_int (words * UNITS_PER_WORD));
7825 t = build2 (MODIFY_EXPR, type, ovf, t);
7826 TREE_SIDE_EFFECTS (t) = 1;
7827 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7829 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7831 /* Find the register save area.
7832 Prologue of the function save it right above stack frame. */
7833 type = TREE_TYPE (sav);
7834 t = make_tree (type, frame_pointer_rtx);
7835 if (!ix86_varargs_gpr_size)
7836 t = build2 (POINTER_PLUS_EXPR, type, t,
7837 size_int (-8 * X86_64_REGPARM_MAX));
7838 t = build2 (MODIFY_EXPR, type, sav, t);
7839 TREE_SIDE_EFFECTS (t) = 1;
7840 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7844 /* Implement va_arg. */
7847 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7850 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7851 tree f_gpr, f_fpr, f_ovf, f_sav;
7852 tree gpr, fpr, ovf, sav, t;
7854 tree lab_false, lab_over = NULL_TREE;
7859 enum machine_mode nat_mode;
7860 unsigned int arg_boundary;
7862 /* Only 64bit target needs something special. */
7863 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7864 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7866 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7867 f_fpr = DECL_CHAIN (f_gpr);
7868 f_ovf = DECL_CHAIN (f_fpr);
7869 f_sav = DECL_CHAIN (f_ovf);
7871 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7872 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7873 valist = build_va_arg_indirect_ref (valist);
7874 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7875 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7876 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7878 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7880 type = build_pointer_type (type);
7881 size = int_size_in_bytes (type);
7882 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7884 nat_mode = type_natural_mode (type, NULL);
7893 /* Unnamed 256bit vector mode parameters are passed on stack. */
7894 if (ix86_cfun_abi () == SYSV_ABI)
7901 container = construct_container (nat_mode, TYPE_MODE (type),
7902 type, 0, X86_64_REGPARM_MAX,
7903 X86_64_SSE_REGPARM_MAX, intreg,
7908 /* Pull the value out of the saved registers. */
7910 addr = create_tmp_var (ptr_type_node, "addr");
7914 int needed_intregs, needed_sseregs;
7916 tree int_addr, sse_addr;
7918 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7919 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7921 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7923 need_temp = (!REG_P (container)
7924 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7925 || TYPE_ALIGN (type) > 128));
7927 /* In case we are passing structure, verify that it is consecutive block
7928 on the register save area. If not we need to do moves. */
7929 if (!need_temp && !REG_P (container))
7931 /* Verify that all registers are strictly consecutive */
7932 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7936 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7938 rtx slot = XVECEXP (container, 0, i);
7939 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7940 || INTVAL (XEXP (slot, 1)) != i * 16)
7948 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7950 rtx slot = XVECEXP (container, 0, i);
7951 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7952 || INTVAL (XEXP (slot, 1)) != i * 8)
7964 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7965 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7968 /* First ensure that we fit completely in registers. */
7971 t = build_int_cst (TREE_TYPE (gpr),
7972 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7973 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7974 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7975 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7976 gimplify_and_add (t, pre_p);
7980 t = build_int_cst (TREE_TYPE (fpr),
7981 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7982 + X86_64_REGPARM_MAX * 8);
7983 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7984 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7985 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7986 gimplify_and_add (t, pre_p);
7989 /* Compute index to start of area used for integer regs. */
7992 /* int_addr = gpr + sav; */
7993 t = fold_convert (sizetype, gpr);
7994 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7995 gimplify_assign (int_addr, t, pre_p);
7999 /* sse_addr = fpr + sav; */
8000 t = fold_convert (sizetype, fpr);
8001 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8002 gimplify_assign (sse_addr, t, pre_p);
8006 int i, prev_size = 0;
8007 tree temp = create_tmp_var (type, "va_arg_tmp");
8010 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8011 gimplify_assign (addr, t, pre_p);
8013 for (i = 0; i < XVECLEN (container, 0); i++)
8015 rtx slot = XVECEXP (container, 0, i);
8016 rtx reg = XEXP (slot, 0);
8017 enum machine_mode mode = GET_MODE (reg);
8023 tree dest_addr, dest;
8024 int cur_size = GET_MODE_SIZE (mode);
8026 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8027 prev_size = INTVAL (XEXP (slot, 1));
8028 if (prev_size + cur_size > size)
8030 cur_size = size - prev_size;
8031 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8032 if (mode == BLKmode)
8035 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8036 if (mode == GET_MODE (reg))
8037 addr_type = build_pointer_type (piece_type);
8039 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8041 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8044 if (SSE_REGNO_P (REGNO (reg)))
8046 src_addr = sse_addr;
8047 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8051 src_addr = int_addr;
8052 src_offset = REGNO (reg) * 8;
8054 src_addr = fold_convert (addr_type, src_addr);
8055 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8056 size_int (src_offset));
8058 dest_addr = fold_convert (daddr_type, addr);
8059 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8060 size_int (prev_size));
8061 if (cur_size == GET_MODE_SIZE (mode))
8063 src = build_va_arg_indirect_ref (src_addr);
8064 dest = build_va_arg_indirect_ref (dest_addr);
8066 gimplify_assign (dest, src, pre_p);
8071 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8072 3, dest_addr, src_addr,
8073 size_int (cur_size));
8074 gimplify_and_add (copy, pre_p);
8076 prev_size += cur_size;
8082 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8083 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8084 gimplify_assign (gpr, t, pre_p);
8089 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8090 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8091 gimplify_assign (fpr, t, pre_p);
8094 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8096 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8099 /* ... otherwise out of the overflow area. */
8101 /* When we align parameter on stack for caller, if the parameter
8102 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8103 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8104 here with caller. */
8105 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
8106 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8107 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8109 /* Care for on-stack alignment if needed. */
8110 if (arg_boundary <= 64 || size == 0)
8114 HOST_WIDE_INT align = arg_boundary / 8;
8115 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8116 size_int (align - 1));
8117 t = fold_convert (sizetype, t);
8118 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8120 t = fold_convert (TREE_TYPE (ovf), t);
8123 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8124 gimplify_assign (addr, t, pre_p);
8126 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8127 size_int (rsize * UNITS_PER_WORD));
8128 gimplify_assign (unshare_expr (ovf), t, pre_p);
8131 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8133 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8134 addr = fold_convert (ptrtype, addr);
8137 addr = build_va_arg_indirect_ref (addr);
8138 return build_va_arg_indirect_ref (addr);
8141 /* Return true if OPNUM's MEM should be matched
8142 in movabs* patterns. */
8145 ix86_check_movabs (rtx insn, int opnum)
8149 set = PATTERN (insn);
8150 if (GET_CODE (set) == PARALLEL)
8151 set = XVECEXP (set, 0, 0);
8152 gcc_assert (GET_CODE (set) == SET);
8153 mem = XEXP (set, opnum);
8154 while (GET_CODE (mem) == SUBREG)
8155 mem = SUBREG_REG (mem);
8156 gcc_assert (MEM_P (mem));
8157 return volatile_ok || !MEM_VOLATILE_P (mem);
8160 /* Initialize the table of extra 80387 mathematical constants. */
8163 init_ext_80387_constants (void)
8165 static const char * cst[5] =
8167 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8168 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8169 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8170 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8171 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8175 for (i = 0; i < 5; i++)
8177 real_from_string (&ext_80387_constants_table[i], cst[i]);
8178 /* Ensure each constant is rounded to XFmode precision. */
8179 real_convert (&ext_80387_constants_table[i],
8180 XFmode, &ext_80387_constants_table[i]);
8183 ext_80387_constants_init = 1;
8186 /* Return non-zero if the constant is something that
8187 can be loaded with a special instruction. */
8190 standard_80387_constant_p (rtx x)
8192 enum machine_mode mode = GET_MODE (x);
8196 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8199 if (x == CONST0_RTX (mode))
8201 if (x == CONST1_RTX (mode))
8204 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8206 /* For XFmode constants, try to find a special 80387 instruction when
8207 optimizing for size or on those CPUs that benefit from them. */
8209 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8213 if (! ext_80387_constants_init)
8214 init_ext_80387_constants ();
8216 for (i = 0; i < 5; i++)
8217 if (real_identical (&r, &ext_80387_constants_table[i]))
8221 /* Load of the constant -0.0 or -1.0 will be split as
8222 fldz;fchs or fld1;fchs sequence. */
8223 if (real_isnegzero (&r))
8225 if (real_identical (&r, &dconstm1))
8231 /* Return the opcode of the special instruction to be used to load
8235 standard_80387_constant_opcode (rtx x)
8237 switch (standard_80387_constant_p (x))
8261 /* Return the CONST_DOUBLE representing the 80387 constant that is
8262 loaded by the specified special instruction. The argument IDX
8263 matches the return value from standard_80387_constant_p. */
8266 standard_80387_constant_rtx (int idx)
8270 if (! ext_80387_constants_init)
8271 init_ext_80387_constants ();
8287 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8291 /* Return 1 if X is all 0s and 2 if x is all 1s
8292 in supported SSE vector mode. */
8295 standard_sse_constant_p (rtx x)
8297 enum machine_mode mode = GET_MODE (x);
8299 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8301 if (vector_all_ones_operand (x, mode))
8317 /* Return the opcode of the special instruction to be used to load
8321 standard_sse_constant_opcode (rtx insn, rtx x)
8323 switch (standard_sse_constant_p (x))
8326 switch (get_attr_mode (insn))
8329 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8331 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8332 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8334 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8336 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8337 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8339 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8341 return "vxorps\t%x0, %x0, %x0";
8343 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8344 return "vxorps\t%x0, %x0, %x0";
8346 return "vxorpd\t%x0, %x0, %x0";
8348 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8349 return "vxorps\t%x0, %x0, %x0";
8351 return "vpxor\t%x0, %x0, %x0";
8356 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8363 /* Returns true if OP contains a symbol reference */
8366 symbolic_reference_mentioned_p (rtx op)
8371 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8374 fmt = GET_RTX_FORMAT (GET_CODE (op));
8375 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8381 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8382 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8386 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8393 /* Return true if it is appropriate to emit `ret' instructions in the
8394 body of a function. Do this only if the epilogue is simple, needing a
8395 couple of insns. Prior to reloading, we can't tell how many registers
8396 must be saved, so return false then. Return false if there is no frame
8397 marker to de-allocate. */
8400 ix86_can_use_return_insn_p (void)
8402 struct ix86_frame frame;
8404 if (! reload_completed || frame_pointer_needed)
8407 /* Don't allow more than 32k pop, since that's all we can do
8408 with one instruction. */
8409 if (crtl->args.pops_args && crtl->args.size >= 32768)
8412 ix86_compute_frame_layout (&frame);
8413 return (frame.stack_pointer_offset == UNITS_PER_WORD
8414 && (frame.nregs + frame.nsseregs) == 0);
8417 /* Value should be nonzero if functions must have frame pointers.
8418 Zero means the frame pointer need not be set up (and parms may
8419 be accessed via the stack pointer) in functions that seem suitable. */
8422 ix86_frame_pointer_required (void)
8424 /* If we accessed previous frames, then the generated code expects
8425 to be able to access the saved ebp value in our frame. */
8426 if (cfun->machine->accesses_prev_frame)
8429 /* Several x86 os'es need a frame pointer for other reasons,
8430 usually pertaining to setjmp. */
8431 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8434 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8435 turns off the frame pointer by default. Turn it back on now if
8436 we've not got a leaf function. */
8437 if (TARGET_OMIT_LEAF_FRAME_POINTER
8438 && (!current_function_is_leaf
8439 || ix86_current_function_calls_tls_descriptor))
8442 if (crtl->profile && !flag_fentry)
8448 /* Record that the current function accesses previous call frames. */
8451 ix86_setup_frame_addresses (void)
8453 cfun->machine->accesses_prev_frame = 1;
8456 #ifndef USE_HIDDEN_LINKONCE
8457 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8458 # define USE_HIDDEN_LINKONCE 1
8460 # define USE_HIDDEN_LINKONCE 0
8464 static int pic_labels_used;
8466 /* Fills in the label name that should be used for a pc thunk for
8467 the given register. */
8470 get_pc_thunk_name (char name[32], unsigned int regno)
8472 gcc_assert (!TARGET_64BIT);
8474 if (USE_HIDDEN_LINKONCE)
8475 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8477 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8481 /* This function generates code for -fpic that loads %ebx with
8482 the return address of the caller and then returns. */
8485 ix86_code_end (void)
8490 for (regno = AX_REG; regno <= SP_REG; regno++)
8495 if (!(pic_labels_used & (1 << regno)))
8498 get_pc_thunk_name (name, regno);
8500 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8501 get_identifier (name),
8502 build_function_type (void_type_node, void_list_node));
8503 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8504 NULL_TREE, void_type_node);
8505 TREE_PUBLIC (decl) = 1;
8506 TREE_STATIC (decl) = 1;
8511 switch_to_section (darwin_sections[text_coal_section]);
8512 fputs ("\t.weak_definition\t", asm_out_file);
8513 assemble_name (asm_out_file, name);
8514 fputs ("\n\t.private_extern\t", asm_out_file);
8515 assemble_name (asm_out_file, name);
8516 putc ('\n', asm_out_file);
8517 ASM_OUTPUT_LABEL (asm_out_file, name);
8518 DECL_WEAK (decl) = 1;
8522 if (USE_HIDDEN_LINKONCE)
8524 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8526 targetm.asm_out.unique_section (decl, 0);
8527 switch_to_section (get_named_section (decl, NULL, 0));
8529 targetm.asm_out.globalize_label (asm_out_file, name);
8530 fputs ("\t.hidden\t", asm_out_file);
8531 assemble_name (asm_out_file, name);
8532 putc ('\n', asm_out_file);
8533 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8537 switch_to_section (text_section);
8538 ASM_OUTPUT_LABEL (asm_out_file, name);
8541 DECL_INITIAL (decl) = make_node (BLOCK);
8542 current_function_decl = decl;
8543 init_function_start (decl);
8544 first_function_block_is_cold = false;
8545 /* Make sure unwind info is emitted for the thunk if needed. */
8546 final_start_function (emit_barrier (), asm_out_file, 1);
8548 /* Pad stack IP move with 4 instructions (two NOPs count
8549 as one instruction). */
8550 if (TARGET_PAD_SHORT_FUNCTION)
8555 fputs ("\tnop\n", asm_out_file);
8558 xops[0] = gen_rtx_REG (Pmode, regno);
8559 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8560 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8561 fputs ("\tret\n", asm_out_file);
8562 final_end_function ();
8563 init_insn_lengths ();
8564 free_after_compilation (cfun);
8566 current_function_decl = NULL;
8569 if (flag_split_stack)
8570 file_end_indicate_split_stack ();
8573 /* Emit code for the SET_GOT patterns. */
8576 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8582 if (TARGET_VXWORKS_RTP && flag_pic)
8584 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8585 xops[2] = gen_rtx_MEM (Pmode,
8586 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8587 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8589 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8590 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8591 an unadorned address. */
8592 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8593 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8594 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8598 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8600 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8602 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8605 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8608 output_asm_insn ("call\t%a2", xops);
8609 #ifdef DWARF2_UNWIND_INFO
8610 /* The call to next label acts as a push. */
8611 if (dwarf2out_do_frame ())
8615 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8616 gen_rtx_PLUS (Pmode,
8619 RTX_FRAME_RELATED_P (insn) = 1;
8620 dwarf2out_frame_debug (insn, true);
8627 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8628 is what will be referenced by the Mach-O PIC subsystem. */
8630 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8633 targetm.asm_out.internal_label (asm_out_file, "L",
8634 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8638 output_asm_insn ("pop%z0\t%0", xops);
8639 #ifdef DWARF2_UNWIND_INFO
8640 /* The pop is a pop and clobbers dest, but doesn't restore it
8641 for unwind info purposes. */
8642 if (dwarf2out_do_frame ())
8646 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8647 dwarf2out_frame_debug (insn, true);
8648 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8649 gen_rtx_PLUS (Pmode,
8652 RTX_FRAME_RELATED_P (insn) = 1;
8653 dwarf2out_frame_debug (insn, true);
8662 get_pc_thunk_name (name, REGNO (dest));
8663 pic_labels_used |= 1 << REGNO (dest);
8665 #ifdef DWARF2_UNWIND_INFO
8666 /* Ensure all queued register saves are flushed before the
8668 if (dwarf2out_do_frame ())
8669 dwarf2out_flush_queued_reg_saves ();
8671 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8672 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8673 output_asm_insn ("call\t%X2", xops);
8674 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8675 is what will be referenced by the Mach-O PIC subsystem. */
8678 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8680 targetm.asm_out.internal_label (asm_out_file, "L",
8681 CODE_LABEL_NUMBER (label));
8688 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8689 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8691 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8696 /* Generate an "push" pattern for input ARG. */
8701 struct machine_function *m = cfun->machine;
8703 if (m->fs.cfa_reg == stack_pointer_rtx)
8704 m->fs.cfa_offset += UNITS_PER_WORD;
8705 m->fs.sp_offset += UNITS_PER_WORD;
8707 return gen_rtx_SET (VOIDmode,
8709 gen_rtx_PRE_DEC (Pmode,
8710 stack_pointer_rtx)),
8714 /* Generate an "pop" pattern for input ARG. */
8719 return gen_rtx_SET (VOIDmode,
8722 gen_rtx_POST_INC (Pmode,
8723 stack_pointer_rtx)));
8726 /* Return >= 0 if there is an unused call-clobbered register available
8727 for the entire function. */
8730 ix86_select_alt_pic_regnum (void)
8732 if (current_function_is_leaf
8734 && !ix86_current_function_calls_tls_descriptor)
8737 /* Can't use the same register for both PIC and DRAP. */
8739 drap = REGNO (crtl->drap_reg);
8742 for (i = 2; i >= 0; --i)
8743 if (i != drap && !df_regs_ever_live_p (i))
8747 return INVALID_REGNUM;
8750 /* Return 1 if we need to save REGNO. */
8752 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8754 if (pic_offset_table_rtx
8755 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8756 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8758 || crtl->calls_eh_return
8759 || crtl->uses_const_pool))
8761 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8766 if (crtl->calls_eh_return && maybe_eh_return)
8771 unsigned test = EH_RETURN_DATA_REGNO (i);
8772 if (test == INVALID_REGNUM)
8779 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8782 return (df_regs_ever_live_p (regno)
8783 && !call_used_regs[regno]
8784 && !fixed_regs[regno]
8785 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8788 /* Return number of saved general prupose registers. */
8791 ix86_nsaved_regs (void)
8796 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8797 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8802 /* Return number of saved SSE registrers. */
8805 ix86_nsaved_sseregs (void)
8810 if (ix86_cfun_abi () != MS_ABI)
8812 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8813 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8818 /* Given FROM and TO register numbers, say whether this elimination is
8819 allowed. If stack alignment is needed, we can only replace argument
8820 pointer with hard frame pointer, or replace frame pointer with stack
8821 pointer. Otherwise, frame pointer elimination is automatically
8822 handled and all other eliminations are valid. */
8825 ix86_can_eliminate (const int from, const int to)
8827 if (stack_realign_fp)
8828 return ((from == ARG_POINTER_REGNUM
8829 && to == HARD_FRAME_POINTER_REGNUM)
8830 || (from == FRAME_POINTER_REGNUM
8831 && to == STACK_POINTER_REGNUM));
8833 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8836 /* Return the offset between two registers, one to be eliminated, and the other
8837 its replacement, at the start of a routine. */
8840 ix86_initial_elimination_offset (int from, int to)
8842 struct ix86_frame frame;
8843 ix86_compute_frame_layout (&frame);
8845 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8846 return frame.hard_frame_pointer_offset;
8847 else if (from == FRAME_POINTER_REGNUM
8848 && to == HARD_FRAME_POINTER_REGNUM)
8849 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8852 gcc_assert (to == STACK_POINTER_REGNUM);
8854 if (from == ARG_POINTER_REGNUM)
8855 return frame.stack_pointer_offset;
8857 gcc_assert (from == FRAME_POINTER_REGNUM);
8858 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8862 /* In a dynamically-aligned function, we can't know the offset from
8863 stack pointer to frame pointer, so we must ensure that setjmp
8864 eliminates fp against the hard fp (%ebp) rather than trying to
8865 index from %esp up to the top of the frame across a gap that is
8866 of unknown (at compile-time) size. */
8868 ix86_builtin_setjmp_frame_value (void)
8870 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8873 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8874 field in the TCB, so they can not be used together. */
8877 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8881 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8883 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8886 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8889 error ("%<-fsplit-stack%> requires "
8890 "assembler support for CFI directives");
8898 /* When using -fsplit-stack, the allocation routines set a field in
8899 the TCB to the bottom of the stack plus this much space, measured
8902 #define SPLIT_STACK_AVAILABLE 256
8904 /* Fill structure ix86_frame about frame of currently computed function. */
8907 ix86_compute_frame_layout (struct ix86_frame *frame)
8909 unsigned int stack_alignment_needed;
8910 HOST_WIDE_INT offset;
8911 unsigned int preferred_alignment;
8912 HOST_WIDE_INT size = get_frame_size ();
8913 HOST_WIDE_INT to_allocate;
8915 frame->nregs = ix86_nsaved_regs ();
8916 frame->nsseregs = ix86_nsaved_sseregs ();
8918 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8919 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8921 /* MS ABI seem to require stack alignment to be always 16 except for function
8922 prologues and leaf. */
8923 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8924 && (!current_function_is_leaf || cfun->calls_alloca != 0
8925 || ix86_current_function_calls_tls_descriptor))
8927 preferred_alignment = 16;
8928 stack_alignment_needed = 16;
8929 crtl->preferred_stack_boundary = 128;
8930 crtl->stack_alignment_needed = 128;
8933 gcc_assert (!size || stack_alignment_needed);
8934 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8935 gcc_assert (preferred_alignment <= stack_alignment_needed);
8937 /* During reload iteration the amount of registers saved can change.
8938 Recompute the value as needed. Do not recompute when amount of registers
8939 didn't change as reload does multiple calls to the function and does not
8940 expect the decision to change within single iteration. */
8941 if (!optimize_function_for_size_p (cfun)
8942 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8944 int count = frame->nregs;
8945 struct cgraph_node *node = cgraph_node (current_function_decl);
8947 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8948 /* The fast prologue uses move instead of push to save registers. This
8949 is significantly longer, but also executes faster as modern hardware
8950 can execute the moves in parallel, but can't do that for push/pop.
8952 Be careful about choosing what prologue to emit: When function takes
8953 many instructions to execute we may use slow version as well as in
8954 case function is known to be outside hot spot (this is known with
8955 feedback only). Weight the size of function by number of registers
8956 to save as it is cheap to use one or two push instructions but very
8957 slow to use many of them. */
8959 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8960 if (node->frequency < NODE_FREQUENCY_NORMAL
8961 || (flag_branch_probabilities
8962 && node->frequency < NODE_FREQUENCY_HOT))
8963 cfun->machine->use_fast_prologue_epilogue = false;
8965 cfun->machine->use_fast_prologue_epilogue
8966 = !expensive_function_p (count);
8968 if (TARGET_PROLOGUE_USING_MOVE
8969 && cfun->machine->use_fast_prologue_epilogue)
8970 frame->save_regs_using_mov = true;
8972 frame->save_regs_using_mov = false;
8974 /* If static stack checking is enabled and done with probes, the registers
8975 need to be saved before allocating the frame. */
8976 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8977 frame->save_regs_using_mov = false;
8979 /* Skip return address. */
8980 offset = UNITS_PER_WORD;
8982 /* Skip pushed static chain. */
8983 if (ix86_static_chain_on_stack)
8984 offset += UNITS_PER_WORD;
8986 /* Skip saved base pointer. */
8987 if (frame_pointer_needed)
8988 offset += UNITS_PER_WORD;
8990 frame->hard_frame_pointer_offset = offset;
8992 /* Register save area */
8993 offset += frame->nregs * UNITS_PER_WORD;
8994 frame->reg_save_offset = offset;
8996 /* Align and set SSE register save area. */
8997 if (frame->nsseregs)
8999 /* The only ABI that has saved SSE registers (Win64) also has a
9000 16-byte aligned default stack, and thus we don't need to be
9001 within the re-aligned local stack frame to save them. */
9002 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9003 offset = (offset + 16 - 1) & -16;
9004 offset += frame->nsseregs * 16;
9006 frame->sse_reg_save_offset = offset;
9008 /* The re-aligned stack starts here. Values before this point are not
9009 directly comparable with values below this point. In order to make
9010 sure that no value happens to be the same before and after, force
9011 the alignment computation below to add a non-zero value. */
9012 if (stack_realign_fp)
9013 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9016 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9017 offset += frame->va_arg_size;
9019 /* Align start of frame for local function. */
9020 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9022 /* Frame pointer points here. */
9023 frame->frame_pointer_offset = offset;
9027 /* Add outgoing arguments area. Can be skipped if we eliminated
9028 all the function calls as dead code.
9029 Skipping is however impossible when function calls alloca. Alloca
9030 expander assumes that last crtl->outgoing_args_size
9031 of stack frame are unused. */
9032 if (ACCUMULATE_OUTGOING_ARGS
9033 && (!current_function_is_leaf || cfun->calls_alloca
9034 || ix86_current_function_calls_tls_descriptor))
9036 offset += crtl->outgoing_args_size;
9037 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9040 frame->outgoing_arguments_size = 0;
9042 /* Align stack boundary. Only needed if we're calling another function
9044 if (!current_function_is_leaf || cfun->calls_alloca
9045 || ix86_current_function_calls_tls_descriptor)
9046 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9048 /* We've reached end of stack frame. */
9049 frame->stack_pointer_offset = offset;
9051 /* Size prologue needs to allocate. */
9052 to_allocate = offset - frame->sse_reg_save_offset;
9054 if ((!to_allocate && frame->nregs <= 1)
9055 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9056 frame->save_regs_using_mov = false;
9058 if (ix86_using_red_zone ()
9059 && current_function_sp_is_unchanging
9060 && current_function_is_leaf
9061 && !ix86_current_function_calls_tls_descriptor)
9063 frame->red_zone_size = to_allocate;
9064 if (frame->save_regs_using_mov)
9065 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9066 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9067 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9070 frame->red_zone_size = 0;
9071 frame->stack_pointer_offset -= frame->red_zone_size;
9074 /* This is semi-inlined memory_address_length, but simplified
9075 since we know that we're always dealing with reg+offset, and
9076 to avoid having to create and discard all that rtl. */
9079 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9085 /* EBP and R13 cannot be encoded without an offset. */
9086 len = (regno == BP_REG || regno == R13_REG);
9088 else if (IN_RANGE (offset, -128, 127))
9091 /* ESP and R12 must be encoded with a SIB byte. */
9092 if (regno == SP_REG || regno == R12_REG)
9098 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9099 The valid base registers are taken from CFUN->MACHINE->FS. */
9102 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9104 const struct machine_function *m = cfun->machine;
9105 rtx base_reg = NULL;
9106 HOST_WIDE_INT base_offset = 0;
9108 if (m->use_fast_prologue_epilogue)
9110 /* Choose the base register most likely to allow the most scheduling
9111 opportunities. Generally FP is valid througout the function,
9112 while DRAP must be reloaded within the epilogue. But choose either
9113 over the SP due to increased encoding size. */
9117 base_reg = hard_frame_pointer_rtx;
9118 base_offset = m->fs.fp_offset - cfa_offset;
9120 else if (m->fs.drap_valid)
9122 base_reg = crtl->drap_reg;
9123 base_offset = 0 - cfa_offset;
9125 else if (m->fs.sp_valid)
9127 base_reg = stack_pointer_rtx;
9128 base_offset = m->fs.sp_offset - cfa_offset;
9133 HOST_WIDE_INT toffset;
9136 /* Choose the base register with the smallest address encoding.
9137 With a tie, choose FP > DRAP > SP. */
9140 base_reg = stack_pointer_rtx;
9141 base_offset = m->fs.sp_offset - cfa_offset;
9142 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9144 if (m->fs.drap_valid)
9146 toffset = 0 - cfa_offset;
9147 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9150 base_reg = crtl->drap_reg;
9151 base_offset = toffset;
9157 toffset = m->fs.fp_offset - cfa_offset;
9158 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9161 base_reg = hard_frame_pointer_rtx;
9162 base_offset = toffset;
9167 gcc_assert (base_reg != NULL);
9169 return plus_constant (base_reg, base_offset);
9172 /* Emit code to save registers in the prologue. */
9175 ix86_emit_save_regs (void)
9180 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9181 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9183 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9184 RTX_FRAME_RELATED_P (insn) = 1;
9188 /* Emit a single register save at CFA - CFA_OFFSET. */
9191 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9192 HOST_WIDE_INT cfa_offset)
9194 struct machine_function *m = cfun->machine;
9195 rtx reg = gen_rtx_REG (mode, regno);
9196 rtx mem, addr, base, insn;
9198 addr = choose_baseaddr (cfa_offset);
9199 mem = gen_frame_mem (mode, addr);
9201 /* For SSE saves, we need to indicate the 128-bit alignment. */
9202 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9204 insn = emit_move_insn (mem, reg);
9205 RTX_FRAME_RELATED_P (insn) = 1;
9208 if (GET_CODE (base) == PLUS)
9209 base = XEXP (base, 0);
9210 gcc_checking_assert (REG_P (base));
9212 /* When saving registers into a re-aligned local stack frame, avoid
9213 any tricky guessing by dwarf2out. */
9214 if (m->fs.realigned)
9216 gcc_checking_assert (stack_realign_drap);
9218 if (regno == REGNO (crtl->drap_reg))
9220 /* A bit of a hack. We force the DRAP register to be saved in
9221 the re-aligned stack frame, which provides us with a copy
9222 of the CFA that will last past the prologue. Install it. */
9223 gcc_checking_assert (cfun->machine->fs.fp_valid);
9224 addr = plus_constant (hard_frame_pointer_rtx,
9225 cfun->machine->fs.fp_offset - cfa_offset);
9226 mem = gen_rtx_MEM (mode, addr);
9227 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9231 /* The frame pointer is a stable reference within the
9232 aligned frame. Use it. */
9233 gcc_checking_assert (cfun->machine->fs.fp_valid);
9234 addr = plus_constant (hard_frame_pointer_rtx,
9235 cfun->machine->fs.fp_offset - cfa_offset);
9236 mem = gen_rtx_MEM (mode, addr);
9237 add_reg_note (insn, REG_CFA_EXPRESSION,
9238 gen_rtx_SET (VOIDmode, mem, reg));
9242 /* The memory may not be relative to the current CFA register,
9243 which means that we may need to generate a new pattern for
9244 use by the unwind info. */
9245 else if (base != m->fs.cfa_reg)
9247 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9248 mem = gen_rtx_MEM (mode, addr);
9249 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9253 /* Emit code to save registers using MOV insns.
9254 First register is stored at CFA - CFA_OFFSET. */
9256 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9260 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9261 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9263 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9264 cfa_offset -= UNITS_PER_WORD;
9268 /* Emit code to save SSE registers using MOV insns.
9269 First register is stored at CFA - CFA_OFFSET. */
9271 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9275 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9276 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9278 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9283 static GTY(()) rtx queued_cfa_restores;
9285 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9286 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9287 Don't add the note if the previously saved value will be left untouched
9288 within stack red-zone till return, as unwinders can find the same value
9289 in the register and on the stack. */
9292 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9294 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9299 add_reg_note (insn, REG_CFA_RESTORE, reg);
9300 RTX_FRAME_RELATED_P (insn) = 1;
9304 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9307 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9310 ix86_add_queued_cfa_restore_notes (rtx insn)
9313 if (!queued_cfa_restores)
9315 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9317 XEXP (last, 1) = REG_NOTES (insn);
9318 REG_NOTES (insn) = queued_cfa_restores;
9319 queued_cfa_restores = NULL_RTX;
9320 RTX_FRAME_RELATED_P (insn) = 1;
9323 /* Expand prologue or epilogue stack adjustment.
9324 The pattern exist to put a dependency on all ebp-based memory accesses.
9325 STYLE should be negative if instructions should be marked as frame related,
9326 zero if %r11 register is live and cannot be freely used and positive
9330 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9331 int style, bool set_cfa)
9333 struct machine_function *m = cfun->machine;
9337 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9338 else if (x86_64_immediate_operand (offset, DImode))
9339 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9343 /* r11 is used by indirect sibcall return as well, set before the
9344 epilogue and used after the epilogue. */
9346 tmp = gen_rtx_REG (DImode, R11_REG);
9349 gcc_assert (src != hard_frame_pointer_rtx
9350 && dest != hard_frame_pointer_rtx);
9351 tmp = hard_frame_pointer_rtx;
9353 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9355 RTX_FRAME_RELATED_P (insn) = 1;
9357 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9360 insn = emit_insn (insn);
9362 ix86_add_queued_cfa_restore_notes (insn);
9368 gcc_assert (m->fs.cfa_reg == src);
9369 m->fs.cfa_offset += INTVAL (offset);
9370 m->fs.cfa_reg = dest;
9372 r = gen_rtx_PLUS (Pmode, src, offset);
9373 r = gen_rtx_SET (VOIDmode, dest, r);
9374 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9375 RTX_FRAME_RELATED_P (insn) = 1;
9378 RTX_FRAME_RELATED_P (insn) = 1;
9380 if (dest == stack_pointer_rtx)
9382 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9383 bool valid = m->fs.sp_valid;
9385 if (src == hard_frame_pointer_rtx)
9387 valid = m->fs.fp_valid;
9388 ooffset = m->fs.fp_offset;
9390 else if (src == crtl->drap_reg)
9392 valid = m->fs.drap_valid;
9397 /* Else there are two possibilities: SP itself, which we set
9398 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9399 taken care of this by hand along the eh_return path. */
9400 gcc_checking_assert (src == stack_pointer_rtx
9401 || offset == const0_rtx);
9404 m->fs.sp_offset = ooffset - INTVAL (offset);
9405 m->fs.sp_valid = valid;
9409 /* Find an available register to be used as dynamic realign argument
9410 pointer regsiter. Such a register will be written in prologue and
9411 used in begin of body, so it must not be
9412 1. parameter passing register.
9414 We reuse static-chain register if it is available. Otherwise, we
9415 use DI for i386 and R13 for x86-64. We chose R13 since it has
9418 Return: the regno of chosen register. */
9421 find_drap_reg (void)
9423 tree decl = cfun->decl;
9427 /* Use R13 for nested function or function need static chain.
9428 Since function with tail call may use any caller-saved
9429 registers in epilogue, DRAP must not use caller-saved
9430 register in such case. */
9431 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9438 /* Use DI for nested function or function need static chain.
9439 Since function with tail call may use any caller-saved
9440 registers in epilogue, DRAP must not use caller-saved
9441 register in such case. */
9442 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9445 /* Reuse static chain register if it isn't used for parameter
9447 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9448 && !lookup_attribute ("fastcall",
9449 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9450 && !lookup_attribute ("thiscall",
9451 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9458 /* Return minimum incoming stack alignment. */
9461 ix86_minimum_incoming_stack_boundary (bool sibcall)
9463 unsigned int incoming_stack_boundary;
9465 /* Prefer the one specified at command line. */
9466 if (ix86_user_incoming_stack_boundary)
9467 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9468 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9469 if -mstackrealign is used, it isn't used for sibcall check and
9470 estimated stack alignment is 128bit. */
9473 && ix86_force_align_arg_pointer
9474 && crtl->stack_alignment_estimated == 128)
9475 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9477 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9479 /* Incoming stack alignment can be changed on individual functions
9480 via force_align_arg_pointer attribute. We use the smallest
9481 incoming stack boundary. */
9482 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9483 && lookup_attribute (ix86_force_align_arg_pointer_string,
9484 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9485 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9487 /* The incoming stack frame has to be aligned at least at
9488 parm_stack_boundary. */
9489 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9490 incoming_stack_boundary = crtl->parm_stack_boundary;
9492 /* Stack at entrance of main is aligned by runtime. We use the
9493 smallest incoming stack boundary. */
9494 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9495 && DECL_NAME (current_function_decl)
9496 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9497 && DECL_FILE_SCOPE_P (current_function_decl))
9498 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9500 return incoming_stack_boundary;
9503 /* Update incoming stack boundary and estimated stack alignment. */
9506 ix86_update_stack_boundary (void)
9508 ix86_incoming_stack_boundary
9509 = ix86_minimum_incoming_stack_boundary (false);
9511 /* x86_64 vararg needs 16byte stack alignment for register save
9515 && crtl->stack_alignment_estimated < 128)
9516 crtl->stack_alignment_estimated = 128;
9519 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9520 needed or an rtx for DRAP otherwise. */
9523 ix86_get_drap_rtx (void)
9525 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9526 crtl->need_drap = true;
9528 if (stack_realign_drap)
9530 /* Assign DRAP to vDRAP and returns vDRAP */
9531 unsigned int regno = find_drap_reg ();
9536 arg_ptr = gen_rtx_REG (Pmode, regno);
9537 crtl->drap_reg = arg_ptr;
9540 drap_vreg = copy_to_reg (arg_ptr);
9544 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9547 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9548 RTX_FRAME_RELATED_P (insn) = 1;
9556 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9559 ix86_internal_arg_pointer (void)
9561 return virtual_incoming_args_rtx;
9564 struct scratch_reg {
9569 /* Return a short-lived scratch register for use on function entry.
9570 In 32-bit mode, it is valid only after the registers are saved
9571 in the prologue. This register must be released by means of
9572 release_scratch_register_on_entry once it is dead. */
9575 get_scratch_register_on_entry (struct scratch_reg *sr)
9583 /* We always use R11 in 64-bit mode. */
9588 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9590 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9591 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9592 int regparm = ix86_function_regparm (fntype, decl);
9594 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9596 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9597 for the static chain register. */
9598 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9599 && drap_regno != AX_REG)
9601 else if (regparm < 2 && drap_regno != DX_REG)
9603 /* ecx is the static chain register. */
9604 else if (regparm < 3 && !fastcall_p && !static_chain_p
9605 && drap_regno != CX_REG)
9607 else if (ix86_save_reg (BX_REG, true))
9609 /* esi is the static chain register. */
9610 else if (!(regparm == 3 && static_chain_p)
9611 && ix86_save_reg (SI_REG, true))
9613 else if (ix86_save_reg (DI_REG, true))
9617 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9622 sr->reg = gen_rtx_REG (Pmode, regno);
9625 rtx insn = emit_insn (gen_push (sr->reg));
9626 RTX_FRAME_RELATED_P (insn) = 1;
9630 /* Release a scratch register obtained from the preceding function. */
9633 release_scratch_register_on_entry (struct scratch_reg *sr)
9637 rtx x, insn = emit_insn (gen_pop (sr->reg));
9639 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9640 RTX_FRAME_RELATED_P (insn) = 1;
9641 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9642 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9643 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9647 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9649 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9652 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9654 /* We skip the probe for the first interval + a small dope of 4 words and
9655 probe that many bytes past the specified size to maintain a protection
9656 area at the botton of the stack. */
9657 const int dope = 4 * UNITS_PER_WORD;
9658 rtx size_rtx = GEN_INT (size);
9660 /* See if we have a constant small number of probes to generate. If so,
9661 that's the easy case. The run-time loop is made up of 11 insns in the
9662 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9663 for n # of intervals. */
9664 if (size <= 5 * PROBE_INTERVAL)
9666 HOST_WIDE_INT i, adjust;
9667 bool first_probe = true;
9669 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9670 values of N from 1 until it exceeds SIZE. If only one probe is
9671 needed, this will not generate any code. Then adjust and probe
9672 to PROBE_INTERVAL + SIZE. */
9673 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9677 adjust = 2 * PROBE_INTERVAL + dope;
9678 first_probe = false;
9681 adjust = PROBE_INTERVAL;
9683 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9684 plus_constant (stack_pointer_rtx, -adjust)));
9685 emit_stack_probe (stack_pointer_rtx);
9689 adjust = size + PROBE_INTERVAL + dope;
9691 adjust = size + PROBE_INTERVAL - i;
9693 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9694 plus_constant (stack_pointer_rtx, -adjust)));
9695 emit_stack_probe (stack_pointer_rtx);
9697 /* Adjust back to account for the additional first interval. */
9698 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9699 plus_constant (stack_pointer_rtx,
9700 PROBE_INTERVAL + dope)));
9703 /* Otherwise, do the same as above, but in a loop. Note that we must be
9704 extra careful with variables wrapping around because we might be at
9705 the very top (or the very bottom) of the address space and we have
9706 to be able to handle this case properly; in particular, we use an
9707 equality test for the loop condition. */
9710 HOST_WIDE_INT rounded_size;
9711 struct scratch_reg sr;
9713 get_scratch_register_on_entry (&sr);
9716 /* Step 1: round SIZE to the previous multiple of the interval. */
9718 rounded_size = size & -PROBE_INTERVAL;
9721 /* Step 2: compute initial and final value of the loop counter. */
9723 /* SP = SP_0 + PROBE_INTERVAL. */
9724 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9725 plus_constant (stack_pointer_rtx,
9726 - (PROBE_INTERVAL + dope))));
9728 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9729 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9730 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9731 gen_rtx_PLUS (Pmode, sr.reg,
9732 stack_pointer_rtx)));
9737 while (SP != LAST_ADDR)
9739 SP = SP + PROBE_INTERVAL
9743 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9744 values of N from 1 until it is equal to ROUNDED_SIZE. */
9746 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9749 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9750 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9752 if (size != rounded_size)
9754 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9755 plus_constant (stack_pointer_rtx,
9756 rounded_size - size)));
9757 emit_stack_probe (stack_pointer_rtx);
9760 /* Adjust back to account for the additional first interval. */
9761 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9762 plus_constant (stack_pointer_rtx,
9763 PROBE_INTERVAL + dope)));
9765 release_scratch_register_on_entry (&sr);
9768 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9769 cfun->machine->fs.sp_offset += size;
9771 /* Make sure nothing is scheduled before we are done. */
9772 emit_insn (gen_blockage ());
9775 /* Adjust the stack pointer up to REG while probing it. */
9778 output_adjust_stack_and_probe (rtx reg)
9780 static int labelno = 0;
9781 char loop_lab[32], end_lab[32];
9784 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9785 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9787 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9789 /* Jump to END_LAB if SP == LAST_ADDR. */
9790 xops[0] = stack_pointer_rtx;
9792 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9793 fputs ("\tje\t", asm_out_file);
9794 assemble_name_raw (asm_out_file, end_lab);
9795 fputc ('\n', asm_out_file);
9797 /* SP = SP + PROBE_INTERVAL. */
9798 xops[1] = GEN_INT (PROBE_INTERVAL);
9799 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9802 xops[1] = const0_rtx;
9803 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9805 fprintf (asm_out_file, "\tjmp\t");
9806 assemble_name_raw (asm_out_file, loop_lab);
9807 fputc ('\n', asm_out_file);
9809 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9814 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9815 inclusive. These are offsets from the current stack pointer. */
9818 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9820 /* See if we have a constant small number of probes to generate. If so,
9821 that's the easy case. The run-time loop is made up of 7 insns in the
9822 generic case while the compile-time loop is made up of n insns for n #
9824 if (size <= 7 * PROBE_INTERVAL)
9828 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9829 it exceeds SIZE. If only one probe is needed, this will not
9830 generate any code. Then probe at FIRST + SIZE. */
9831 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9832 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9834 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9837 /* Otherwise, do the same as above, but in a loop. Note that we must be
9838 extra careful with variables wrapping around because we might be at
9839 the very top (or the very bottom) of the address space and we have
9840 to be able to handle this case properly; in particular, we use an
9841 equality test for the loop condition. */
9844 HOST_WIDE_INT rounded_size, last;
9845 struct scratch_reg sr;
9847 get_scratch_register_on_entry (&sr);
9850 /* Step 1: round SIZE to the previous multiple of the interval. */
9852 rounded_size = size & -PROBE_INTERVAL;
9855 /* Step 2: compute initial and final value of the loop counter. */
9857 /* TEST_OFFSET = FIRST. */
9858 emit_move_insn (sr.reg, GEN_INT (-first));
9860 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9861 last = first + rounded_size;
9866 while (TEST_ADDR != LAST_ADDR)
9868 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9872 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9873 until it is equal to ROUNDED_SIZE. */
9875 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9878 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9879 that SIZE is equal to ROUNDED_SIZE. */
9881 if (size != rounded_size)
9882 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9885 rounded_size - size));
9887 release_scratch_register_on_entry (&sr);
9890 /* Make sure nothing is scheduled before we are done. */
9891 emit_insn (gen_blockage ());
9894 /* Probe a range of stack addresses from REG to END, inclusive. These are
9895 offsets from the current stack pointer. */
9898 output_probe_stack_range (rtx reg, rtx end)
9900 static int labelno = 0;
9901 char loop_lab[32], end_lab[32];
9904 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9905 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9907 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9909 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9912 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9913 fputs ("\tje\t", asm_out_file);
9914 assemble_name_raw (asm_out_file, end_lab);
9915 fputc ('\n', asm_out_file);
9917 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9918 xops[1] = GEN_INT (PROBE_INTERVAL);
9919 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9921 /* Probe at TEST_ADDR. */
9922 xops[0] = stack_pointer_rtx;
9924 xops[2] = const0_rtx;
9925 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9927 fprintf (asm_out_file, "\tjmp\t");
9928 assemble_name_raw (asm_out_file, loop_lab);
9929 fputc ('\n', asm_out_file);
9931 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9936 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9937 to be generated in correct form. */
9939 ix86_finalize_stack_realign_flags (void)
9941 /* Check if stack realign is really needed after reload, and
9942 stores result in cfun */
9943 unsigned int incoming_stack_boundary
9944 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9945 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9946 unsigned int stack_realign = (incoming_stack_boundary
9947 < (current_function_is_leaf
9948 ? crtl->max_used_stack_slot_alignment
9949 : crtl->stack_alignment_needed));
9951 if (crtl->stack_realign_finalized)
9953 /* After stack_realign_needed is finalized, we can't no longer
9955 gcc_assert (crtl->stack_realign_needed == stack_realign);
9959 crtl->stack_realign_needed = stack_realign;
9960 crtl->stack_realign_finalized = true;
9964 /* Expand the prologue into a bunch of separate insns. */
9967 ix86_expand_prologue (void)
9969 struct machine_function *m = cfun->machine;
9972 struct ix86_frame frame;
9973 HOST_WIDE_INT allocate;
9974 bool int_registers_saved;
9976 ix86_finalize_stack_realign_flags ();
9978 /* DRAP should not coexist with stack_realign_fp */
9979 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9981 memset (&m->fs, 0, sizeof (m->fs));
9983 /* Initialize CFA state for before the prologue. */
9984 m->fs.cfa_reg = stack_pointer_rtx;
9985 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9987 /* Track SP offset to the CFA. We continue tracking this after we've
9988 swapped the CFA register away from SP. In the case of re-alignment
9989 this is fudged; we're interested to offsets within the local frame. */
9990 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9991 m->fs.sp_valid = true;
9993 ix86_compute_frame_layout (&frame);
9995 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9997 /* We should have already generated an error for any use of
9998 ms_hook on a nested function. */
9999 gcc_checking_assert (!ix86_static_chain_on_stack);
10001 /* Check if profiling is active and we shall use profiling before
10002 prologue variant. If so sorry. */
10003 if (crtl->profile && flag_fentry != 0)
10004 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
10006 /* In ix86_asm_output_function_label we emitted:
10007 8b ff movl.s %edi,%edi
10009 8b ec movl.s %esp,%ebp
10011 This matches the hookable function prologue in Win32 API
10012 functions in Microsoft Windows XP Service Pack 2 and newer.
10013 Wine uses this to enable Windows apps to hook the Win32 API
10014 functions provided by Wine.
10016 What that means is that we've already set up the frame pointer. */
10018 if (frame_pointer_needed
10019 && !(crtl->drap_reg && crtl->stack_realign_needed))
10023 /* We've decided to use the frame pointer already set up.
10024 Describe this to the unwinder by pretending that both
10025 push and mov insns happen right here.
10027 Putting the unwind info here at the end of the ms_hook
10028 is done so that we can make absolutely certain we get
10029 the required byte sequence at the start of the function,
10030 rather than relying on an assembler that can produce
10031 the exact encoding required.
10033 However it does mean (in the unpatched case) that we have
10034 a 1 insn window where the asynchronous unwind info is
10035 incorrect. However, if we placed the unwind info at
10036 its correct location we would have incorrect unwind info
10037 in the patched case. Which is probably all moot since
10038 I don't expect Wine generates dwarf2 unwind info for the
10039 system libraries that use this feature. */
10041 insn = emit_insn (gen_blockage ());
10043 push = gen_push (hard_frame_pointer_rtx);
10044 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10045 stack_pointer_rtx);
10046 RTX_FRAME_RELATED_P (push) = 1;
10047 RTX_FRAME_RELATED_P (mov) = 1;
10049 RTX_FRAME_RELATED_P (insn) = 1;
10050 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10051 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10053 /* Note that gen_push incremented m->fs.cfa_offset, even
10054 though we didn't emit the push insn here. */
10055 m->fs.cfa_reg = hard_frame_pointer_rtx;
10056 m->fs.fp_offset = m->fs.cfa_offset;
10057 m->fs.fp_valid = true;
10061 /* The frame pointer is not needed so pop %ebp again.
10062 This leaves us with a pristine state. */
10063 emit_insn (gen_pop (hard_frame_pointer_rtx));
10067 /* The first insn of a function that accepts its static chain on the
10068 stack is to push the register that would be filled in by a direct
10069 call. This insn will be skipped by the trampoline. */
10070 else if (ix86_static_chain_on_stack)
10072 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10073 emit_insn (gen_blockage ());
10075 /* We don't want to interpret this push insn as a register save,
10076 only as a stack adjustment. The real copy of the register as
10077 a save will be done later, if needed. */
10078 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10079 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10080 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10081 RTX_FRAME_RELATED_P (insn) = 1;
10084 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10085 of DRAP is needed and stack realignment is really needed after reload */
10086 if (stack_realign_drap)
10088 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10090 /* Only need to push parameter pointer reg if it is caller saved. */
10091 if (!call_used_regs[REGNO (crtl->drap_reg)])
10093 /* Push arg pointer reg */
10094 insn = emit_insn (gen_push (crtl->drap_reg));
10095 RTX_FRAME_RELATED_P (insn) = 1;
10098 /* Grab the argument pointer. */
10099 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10100 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10101 RTX_FRAME_RELATED_P (insn) = 1;
10102 m->fs.cfa_reg = crtl->drap_reg;
10103 m->fs.cfa_offset = 0;
10105 /* Align the stack. */
10106 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10108 GEN_INT (-align_bytes)));
10109 RTX_FRAME_RELATED_P (insn) = 1;
10111 /* Replicate the return address on the stack so that return
10112 address can be reached via (argp - 1) slot. This is needed
10113 to implement macro RETURN_ADDR_RTX and intrinsic function
10114 expand_builtin_return_addr etc. */
10115 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10116 t = gen_frame_mem (Pmode, t);
10117 insn = emit_insn (gen_push (t));
10118 RTX_FRAME_RELATED_P (insn) = 1;
10120 /* For the purposes of frame and register save area addressing,
10121 we've started over with a new frame. */
10122 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10123 m->fs.realigned = true;
10126 if (frame_pointer_needed && !m->fs.fp_valid)
10128 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10129 slower on all targets. Also sdb doesn't like it. */
10130 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10131 RTX_FRAME_RELATED_P (insn) = 1;
10133 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10134 RTX_FRAME_RELATED_P (insn) = 1;
10136 if (m->fs.cfa_reg == stack_pointer_rtx)
10137 m->fs.cfa_reg = hard_frame_pointer_rtx;
10138 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
10139 m->fs.fp_offset = m->fs.sp_offset;
10140 m->fs.fp_valid = true;
10143 int_registers_saved = (frame.nregs == 0);
10145 if (!int_registers_saved)
10147 /* If saving registers via PUSH, do so now. */
10148 if (!frame.save_regs_using_mov)
10150 ix86_emit_save_regs ();
10151 int_registers_saved = true;
10152 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10155 /* When using red zone we may start register saving before allocating
10156 the stack frame saving one cycle of the prologue. However, avoid
10157 doing this if we have to probe the stack; at least on x86_64 the
10158 stack probe can turn into a call that clobbers a red zone location. */
10159 else if (ix86_using_red_zone ()
10160 && (! TARGET_STACK_PROBE
10161 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10163 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10164 int_registers_saved = true;
10168 if (stack_realign_fp)
10170 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10171 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10173 /* The computation of the size of the re-aligned stack frame means
10174 that we must allocate the size of the register save area before
10175 performing the actual alignment. Otherwise we cannot guarantee
10176 that there's enough storage above the realignment point. */
10177 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10178 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10179 GEN_INT (m->fs.sp_offset
10180 - frame.sse_reg_save_offset),
10183 /* Align the stack. */
10184 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10186 GEN_INT (-align_bytes)));
10188 /* For the purposes of register save area addressing, the stack
10189 pointer is no longer valid. As for the value of sp_offset,
10190 see ix86_compute_frame_layout, which we need to match in order
10191 to pass verification of stack_pointer_offset at the end. */
10192 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10193 m->fs.sp_valid = false;
10196 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10198 if (flag_stack_usage)
10200 /* We start to count from ARG_POINTER. */
10201 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10203 /* If it was realigned, take into account the fake frame. */
10204 if (stack_realign_drap)
10206 if (ix86_static_chain_on_stack)
10207 stack_size += UNITS_PER_WORD;
10209 if (!call_used_regs[REGNO (crtl->drap_reg)])
10210 stack_size += UNITS_PER_WORD;
10212 /* This over-estimates by 1 minimal-stack-alignment-unit but
10213 mitigates that by counting in the new return address slot. */
10214 current_function_dynamic_stack_size
10215 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10218 current_function_static_stack_size = stack_size;
10221 /* The stack has already been decremented by the instruction calling us
10222 so we need to probe unconditionally to preserve the protection area. */
10223 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10225 /* We expect the registers to be saved when probes are used. */
10226 gcc_assert (int_registers_saved);
10228 if (STACK_CHECK_MOVING_SP)
10230 ix86_adjust_stack_and_probe (allocate);
10235 HOST_WIDE_INT size = allocate;
10237 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10238 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10240 if (TARGET_STACK_PROBE)
10241 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10243 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10249 else if (!ix86_target_stack_probe ()
10250 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10252 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10253 GEN_INT (-allocate), -1,
10254 m->fs.cfa_reg == stack_pointer_rtx);
10258 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10260 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10262 bool eax_live = false;
10263 bool r10_live = false;
10266 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10267 if (!TARGET_64BIT_MS_ABI)
10268 eax_live = ix86_eax_live_at_start_p ();
10272 emit_insn (gen_push (eax));
10273 allocate -= UNITS_PER_WORD;
10277 r10 = gen_rtx_REG (Pmode, R10_REG);
10278 emit_insn (gen_push (r10));
10279 allocate -= UNITS_PER_WORD;
10282 emit_move_insn (eax, GEN_INT (allocate));
10283 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10285 /* Use the fact that AX still contains ALLOCATE. */
10286 adjust_stack_insn = (TARGET_64BIT
10287 ? gen_pro_epilogue_adjust_stack_di_sub
10288 : gen_pro_epilogue_adjust_stack_si_sub);
10290 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10291 stack_pointer_rtx, eax));
10293 if (m->fs.cfa_reg == stack_pointer_rtx)
10295 m->fs.cfa_offset += allocate;
10297 RTX_FRAME_RELATED_P (insn) = 1;
10298 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10299 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10300 plus_constant (stack_pointer_rtx,
10303 m->fs.sp_offset += allocate;
10305 if (r10_live && eax_live)
10307 t = choose_baseaddr (m->fs.sp_offset - allocate);
10308 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10309 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10310 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10312 else if (eax_live || r10_live)
10314 t = choose_baseaddr (m->fs.sp_offset - allocate);
10315 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10318 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10320 if (!int_registers_saved)
10321 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10322 if (frame.nsseregs)
10323 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10325 pic_reg_used = false;
10326 if (pic_offset_table_rtx
10327 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10330 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10332 if (alt_pic_reg_used != INVALID_REGNUM)
10333 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10335 pic_reg_used = true;
10342 if (ix86_cmodel == CM_LARGE_PIC)
10344 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10345 rtx label = gen_label_rtx ();
10346 emit_label (label);
10347 LABEL_PRESERVE_P (label) = 1;
10348 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10349 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10350 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10351 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10352 pic_offset_table_rtx, tmp_reg));
10355 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10358 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10361 /* In the pic_reg_used case, make sure that the got load isn't deleted
10362 when mcount needs it. Blockage to avoid call movement across mcount
10363 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10365 if (crtl->profile && !flag_fentry && pic_reg_used)
10366 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10368 if (crtl->drap_reg && !crtl->stack_realign_needed)
10370 /* vDRAP is setup but after reload it turns out stack realign
10371 isn't necessary, here we will emit prologue to setup DRAP
10372 without stack realign adjustment */
10373 t = choose_baseaddr (0);
10374 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10377 /* Prevent instructions from being scheduled into register save push
10378 sequence when access to the redzone area is done through frame pointer.
10379 The offset between the frame pointer and the stack pointer is calculated
10380 relative to the value of the stack pointer at the end of the function
10381 prologue, and moving instructions that access redzone area via frame
10382 pointer inside push sequence violates this assumption. */
10383 if (frame_pointer_needed && frame.red_zone_size)
10384 emit_insn (gen_memory_blockage ());
10386 /* Emit cld instruction if stringops are used in the function. */
10387 if (TARGET_CLD && ix86_current_function_needs_cld)
10388 emit_insn (gen_cld ());
10391 /* Emit code to restore REG using a POP insn. */
10394 ix86_emit_restore_reg_using_pop (rtx reg)
10396 struct machine_function *m = cfun->machine;
10397 rtx insn = emit_insn (gen_pop (reg));
10399 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10400 m->fs.sp_offset -= UNITS_PER_WORD;
10402 if (m->fs.cfa_reg == crtl->drap_reg
10403 && REGNO (reg) == REGNO (crtl->drap_reg))
10405 /* Previously we'd represented the CFA as an expression
10406 like *(%ebp - 8). We've just popped that value from
10407 the stack, which means we need to reset the CFA to
10408 the drap register. This will remain until we restore
10409 the stack pointer. */
10410 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10411 RTX_FRAME_RELATED_P (insn) = 1;
10413 /* This means that the DRAP register is valid for addressing too. */
10414 m->fs.drap_valid = true;
10418 if (m->fs.cfa_reg == stack_pointer_rtx)
10420 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10421 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10422 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10423 RTX_FRAME_RELATED_P (insn) = 1;
10425 m->fs.cfa_offset -= UNITS_PER_WORD;
10428 /* When the frame pointer is the CFA, and we pop it, we are
10429 swapping back to the stack pointer as the CFA. This happens
10430 for stack frames that don't allocate other data, so we assume
10431 the stack pointer is now pointing at the return address, i.e.
10432 the function entry state, which makes the offset be 1 word. */
10433 if (reg == hard_frame_pointer_rtx)
10435 m->fs.fp_valid = false;
10436 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10438 m->fs.cfa_reg = stack_pointer_rtx;
10439 m->fs.cfa_offset -= UNITS_PER_WORD;
10441 add_reg_note (insn, REG_CFA_DEF_CFA,
10442 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10443 GEN_INT (m->fs.cfa_offset)));
10444 RTX_FRAME_RELATED_P (insn) = 1;
10449 /* Emit code to restore saved registers using POP insns. */
10452 ix86_emit_restore_regs_using_pop (void)
10454 unsigned int regno;
10456 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10457 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10458 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10461 /* Emit code and notes for the LEAVE instruction. */
10464 ix86_emit_leave (void)
10466 struct machine_function *m = cfun->machine;
10467 rtx insn = emit_insn (ix86_gen_leave ());
10469 ix86_add_queued_cfa_restore_notes (insn);
10471 gcc_assert (m->fs.fp_valid);
10472 m->fs.sp_valid = true;
10473 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10474 m->fs.fp_valid = false;
10476 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10478 m->fs.cfa_reg = stack_pointer_rtx;
10479 m->fs.cfa_offset = m->fs.sp_offset;
10481 add_reg_note (insn, REG_CFA_DEF_CFA,
10482 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10483 RTX_FRAME_RELATED_P (insn) = 1;
10484 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10489 /* Emit code to restore saved registers using MOV insns.
10490 First register is restored from CFA - CFA_OFFSET. */
10492 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10493 int maybe_eh_return)
10495 struct machine_function *m = cfun->machine;
10496 unsigned int regno;
10498 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10499 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10501 rtx reg = gen_rtx_REG (Pmode, regno);
10504 mem = choose_baseaddr (cfa_offset);
10505 mem = gen_frame_mem (Pmode, mem);
10506 insn = emit_move_insn (reg, mem);
10508 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10510 /* Previously we'd represented the CFA as an expression
10511 like *(%ebp - 8). We've just popped that value from
10512 the stack, which means we need to reset the CFA to
10513 the drap register. This will remain until we restore
10514 the stack pointer. */
10515 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10516 RTX_FRAME_RELATED_P (insn) = 1;
10518 /* This means that the DRAP register is valid for addressing. */
10519 m->fs.drap_valid = true;
10522 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10524 cfa_offset -= UNITS_PER_WORD;
10528 /* Emit code to restore saved registers using MOV insns.
10529 First register is restored from CFA - CFA_OFFSET. */
10531 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10532 int maybe_eh_return)
10534 unsigned int regno;
10536 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10537 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10539 rtx reg = gen_rtx_REG (V4SFmode, regno);
10542 mem = choose_baseaddr (cfa_offset);
10543 mem = gen_rtx_MEM (V4SFmode, mem);
10544 set_mem_align (mem, 128);
10545 emit_move_insn (reg, mem);
10547 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10553 /* Restore function stack, frame, and registers. */
10556 ix86_expand_epilogue (int style)
10558 struct machine_function *m = cfun->machine;
10559 struct machine_frame_state frame_state_save = m->fs;
10560 struct ix86_frame frame;
10561 bool restore_regs_via_mov;
10564 ix86_finalize_stack_realign_flags ();
10565 ix86_compute_frame_layout (&frame);
10567 m->fs.sp_valid = (!frame_pointer_needed
10568 || (current_function_sp_is_unchanging
10569 && !stack_realign_fp));
10570 gcc_assert (!m->fs.sp_valid
10571 || m->fs.sp_offset == frame.stack_pointer_offset);
10573 /* The FP must be valid if the frame pointer is present. */
10574 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10575 gcc_assert (!m->fs.fp_valid
10576 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10578 /* We must have *some* valid pointer to the stack frame. */
10579 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10581 /* The DRAP is never valid at this point. */
10582 gcc_assert (!m->fs.drap_valid);
10584 /* See the comment about red zone and frame
10585 pointer usage in ix86_expand_prologue. */
10586 if (frame_pointer_needed && frame.red_zone_size)
10587 emit_insn (gen_memory_blockage ());
10589 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10590 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10592 /* Determine the CFA offset of the end of the red-zone. */
10593 m->fs.red_zone_offset = 0;
10594 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10596 /* The red-zone begins below the return address. */
10597 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10599 /* When the register save area is in the aligned portion of
10600 the stack, determine the maximum runtime displacement that
10601 matches up with the aligned frame. */
10602 if (stack_realign_drap)
10603 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10607 /* Special care must be taken for the normal return case of a function
10608 using eh_return: the eax and edx registers are marked as saved, but
10609 not restored along this path. Adjust the save location to match. */
10610 if (crtl->calls_eh_return && style != 2)
10611 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10613 /* If we're only restoring one register and sp is not valid then
10614 using a move instruction to restore the register since it's
10615 less work than reloading sp and popping the register. */
10616 if (!m->fs.sp_valid && frame.nregs <= 1)
10617 restore_regs_via_mov = true;
10618 /* EH_RETURN requires the use of moves to function properly. */
10619 else if (crtl->calls_eh_return)
10620 restore_regs_via_mov = true;
10621 else if (TARGET_EPILOGUE_USING_MOVE
10622 && cfun->machine->use_fast_prologue_epilogue
10623 && (frame.nregs > 1
10624 || m->fs.sp_offset != frame.reg_save_offset))
10625 restore_regs_via_mov = true;
10626 else if (frame_pointer_needed
10628 && m->fs.sp_offset != frame.reg_save_offset)
10629 restore_regs_via_mov = true;
10630 else if (frame_pointer_needed
10631 && TARGET_USE_LEAVE
10632 && cfun->machine->use_fast_prologue_epilogue
10633 && frame.nregs == 1)
10634 restore_regs_via_mov = true;
10636 restore_regs_via_mov = false;
10638 if (restore_regs_via_mov || frame.nsseregs)
10640 /* Ensure that the entire register save area is addressable via
10641 the stack pointer, if we will restore via sp. */
10643 && m->fs.sp_offset > 0x7fffffff
10644 && !(m->fs.fp_valid || m->fs.drap_valid)
10645 && (frame.nsseregs + frame.nregs) != 0)
10647 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10648 GEN_INT (m->fs.sp_offset
10649 - frame.sse_reg_save_offset),
10651 m->fs.cfa_reg == stack_pointer_rtx);
10655 /* If there are any SSE registers to restore, then we have to do it
10656 via moves, since there's obviously no pop for SSE regs. */
10657 if (frame.nsseregs)
10658 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10661 if (restore_regs_via_mov)
10666 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10668 /* eh_return epilogues need %ecx added to the stack pointer. */
10671 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10673 /* Stack align doesn't work with eh_return. */
10674 gcc_assert (!stack_realign_drap);
10675 /* Neither does regparm nested functions. */
10676 gcc_assert (!ix86_static_chain_on_stack);
10678 if (frame_pointer_needed)
10680 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10681 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10682 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10684 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10685 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10687 /* Note that we use SA as a temporary CFA, as the return
10688 address is at the proper place relative to it. We
10689 pretend this happens at the FP restore insn because
10690 prior to this insn the FP would be stored at the wrong
10691 offset relative to SA, and after this insn we have no
10692 other reasonable register to use for the CFA. We don't
10693 bother resetting the CFA to the SP for the duration of
10694 the return insn. */
10695 add_reg_note (insn, REG_CFA_DEF_CFA,
10696 plus_constant (sa, UNITS_PER_WORD));
10697 ix86_add_queued_cfa_restore_notes (insn);
10698 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10699 RTX_FRAME_RELATED_P (insn) = 1;
10701 m->fs.cfa_reg = sa;
10702 m->fs.cfa_offset = UNITS_PER_WORD;
10703 m->fs.fp_valid = false;
10705 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10706 const0_rtx, style, false);
10710 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10711 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10712 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10713 ix86_add_queued_cfa_restore_notes (insn);
10715 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10716 if (m->fs.cfa_offset != UNITS_PER_WORD)
10718 m->fs.cfa_offset = UNITS_PER_WORD;
10719 add_reg_note (insn, REG_CFA_DEF_CFA,
10720 plus_constant (stack_pointer_rtx,
10722 RTX_FRAME_RELATED_P (insn) = 1;
10725 m->fs.sp_offset = UNITS_PER_WORD;
10726 m->fs.sp_valid = true;
10731 /* First step is to deallocate the stack frame so that we can
10732 pop the registers. */
10733 if (!m->fs.sp_valid)
10735 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10736 GEN_INT (m->fs.fp_offset
10737 - frame.reg_save_offset),
10740 else if (m->fs.sp_offset != frame.reg_save_offset)
10742 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10743 GEN_INT (m->fs.sp_offset
10744 - frame.reg_save_offset),
10746 m->fs.cfa_reg == stack_pointer_rtx);
10749 ix86_emit_restore_regs_using_pop ();
10752 /* If we used a stack pointer and haven't already got rid of it,
10754 if (m->fs.fp_valid)
10756 /* If the stack pointer is valid and pointing at the frame
10757 pointer store address, then we only need a pop. */
10758 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10759 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10760 /* Leave results in shorter dependency chains on CPUs that are
10761 able to grok it fast. */
10762 else if (TARGET_USE_LEAVE
10763 || optimize_function_for_size_p (cfun)
10764 || !cfun->machine->use_fast_prologue_epilogue)
10765 ix86_emit_leave ();
10768 pro_epilogue_adjust_stack (stack_pointer_rtx,
10769 hard_frame_pointer_rtx,
10770 const0_rtx, style, !using_drap);
10771 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10777 int param_ptr_offset = UNITS_PER_WORD;
10780 gcc_assert (stack_realign_drap);
10782 if (ix86_static_chain_on_stack)
10783 param_ptr_offset += UNITS_PER_WORD;
10784 if (!call_used_regs[REGNO (crtl->drap_reg)])
10785 param_ptr_offset += UNITS_PER_WORD;
10787 insn = emit_insn (gen_rtx_SET
10788 (VOIDmode, stack_pointer_rtx,
10789 gen_rtx_PLUS (Pmode,
10791 GEN_INT (-param_ptr_offset))));
10792 m->fs.cfa_reg = stack_pointer_rtx;
10793 m->fs.cfa_offset = param_ptr_offset;
10794 m->fs.sp_offset = param_ptr_offset;
10795 m->fs.realigned = false;
10797 add_reg_note (insn, REG_CFA_DEF_CFA,
10798 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10799 GEN_INT (param_ptr_offset)));
10800 RTX_FRAME_RELATED_P (insn) = 1;
10802 if (!call_used_regs[REGNO (crtl->drap_reg)])
10803 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10806 /* At this point the stack pointer must be valid, and we must have
10807 restored all of the registers. We may not have deallocated the
10808 entire stack frame. We've delayed this until now because it may
10809 be possible to merge the local stack deallocation with the
10810 deallocation forced by ix86_static_chain_on_stack. */
10811 gcc_assert (m->fs.sp_valid);
10812 gcc_assert (!m->fs.fp_valid);
10813 gcc_assert (!m->fs.realigned);
10814 if (m->fs.sp_offset != UNITS_PER_WORD)
10816 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10817 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10821 /* Sibcall epilogues don't want a return instruction. */
10824 m->fs = frame_state_save;
10828 /* Emit vzeroupper if needed. */
10829 if (TARGET_VZEROUPPER
10830 && cfun->machine->use_avx256_p
10831 && !cfun->machine->caller_return_avx256_p)
10833 cfun->machine->use_vzeroupper_p = 1;
10834 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10837 if (crtl->args.pops_args && crtl->args.size)
10839 rtx popc = GEN_INT (crtl->args.pops_args);
10841 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10842 address, do explicit add, and jump indirectly to the caller. */
10844 if (crtl->args.pops_args >= 65536)
10846 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10849 /* There is no "pascal" calling convention in any 64bit ABI. */
10850 gcc_assert (!TARGET_64BIT);
10852 insn = emit_insn (gen_pop (ecx));
10853 m->fs.cfa_offset -= UNITS_PER_WORD;
10854 m->fs.sp_offset -= UNITS_PER_WORD;
10856 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10857 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10858 add_reg_note (insn, REG_CFA_REGISTER,
10859 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10860 RTX_FRAME_RELATED_P (insn) = 1;
10862 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10864 emit_jump_insn (gen_return_indirect_internal (ecx));
10867 emit_jump_insn (gen_return_pop_internal (popc));
10870 emit_jump_insn (gen_return_internal ());
10872 /* Restore the state back to the state from the prologue,
10873 so that it's correct for the next epilogue. */
10874 m->fs = frame_state_save;
10877 /* Reset from the function's potential modifications. */
10880 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10881 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10883 if (pic_offset_table_rtx)
10884 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10886 /* Mach-O doesn't support labels at the end of objects, so if
10887 it looks like we might want one, insert a NOP. */
10889 rtx insn = get_last_insn ();
10892 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10893 insn = PREV_INSN (insn);
10897 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10898 fputs ("\tnop\n", file);
10904 /* Return a scratch register to use in the split stack prologue. The
10905 split stack prologue is used for -fsplit-stack. It is the first
10906 instructions in the function, even before the regular prologue.
10907 The scratch register can be any caller-saved register which is not
10908 used for parameters or for the static chain. */
10910 static unsigned int
10911 split_stack_prologue_scratch_regno (void)
10920 is_fastcall = (lookup_attribute ("fastcall",
10921 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10923 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10927 if (DECL_STATIC_CHAIN (cfun->decl))
10929 sorry ("-fsplit-stack does not support fastcall with "
10930 "nested function");
10931 return INVALID_REGNUM;
10935 else if (regparm < 3)
10937 if (!DECL_STATIC_CHAIN (cfun->decl))
10943 sorry ("-fsplit-stack does not support 2 register "
10944 " parameters for a nested function");
10945 return INVALID_REGNUM;
10952 /* FIXME: We could make this work by pushing a register
10953 around the addition and comparison. */
10954 sorry ("-fsplit-stack does not support 3 register parameters");
10955 return INVALID_REGNUM;
10960 /* A SYMBOL_REF for the function which allocates new stackspace for
10963 static GTY(()) rtx split_stack_fn;
10965 /* Handle -fsplit-stack. These are the first instructions in the
10966 function, even before the regular prologue. */
10969 ix86_expand_split_stack_prologue (void)
10971 struct ix86_frame frame;
10972 HOST_WIDE_INT allocate;
10974 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10975 rtx scratch_reg = NULL_RTX;
10976 rtx varargs_label = NULL_RTX;
10978 gcc_assert (flag_split_stack && reload_completed);
10980 ix86_finalize_stack_realign_flags ();
10981 ix86_compute_frame_layout (&frame);
10982 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10984 /* This is the label we will branch to if we have enough stack
10985 space. We expect the basic block reordering pass to reverse this
10986 branch if optimizing, so that we branch in the unlikely case. */
10987 label = gen_label_rtx ();
10989 /* We need to compare the stack pointer minus the frame size with
10990 the stack boundary in the TCB. The stack boundary always gives
10991 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10992 can compare directly. Otherwise we need to do an addition. */
10994 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10995 UNSPEC_STACK_CHECK);
10996 limit = gen_rtx_CONST (Pmode, limit);
10997 limit = gen_rtx_MEM (Pmode, limit);
10998 if (allocate < SPLIT_STACK_AVAILABLE)
10999 current = stack_pointer_rtx;
11002 unsigned int scratch_regno;
11005 /* We need a scratch register to hold the stack pointer minus
11006 the required frame size. Since this is the very start of the
11007 function, the scratch register can be any caller-saved
11008 register which is not used for parameters. */
11009 offset = GEN_INT (- allocate);
11010 scratch_regno = split_stack_prologue_scratch_regno ();
11011 if (scratch_regno == INVALID_REGNUM)
11013 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11014 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11016 /* We don't use ix86_gen_add3 in this case because it will
11017 want to split to lea, but when not optimizing the insn
11018 will not be split after this point. */
11019 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11020 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11025 emit_move_insn (scratch_reg, offset);
11026 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11027 stack_pointer_rtx));
11029 current = scratch_reg;
11032 ix86_expand_branch (GEU, current, limit, label);
11033 jump_insn = get_last_insn ();
11034 JUMP_LABEL (jump_insn) = label;
11036 /* Mark the jump as very likely to be taken. */
11037 add_reg_note (jump_insn, REG_BR_PROB,
11038 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11040 /* Get more stack space. We pass in the desired stack space and the
11041 size of the arguments to copy to the new stack. In 32-bit mode
11042 we push the parameters; __morestack will return on a new stack
11043 anyhow. In 64-bit mode we pass the parameters in r10 and
11045 allocate_rtx = GEN_INT (allocate);
11046 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11047 call_fusage = NULL_RTX;
11052 reg = gen_rtx_REG (Pmode, R10_REG);
11054 /* If this function uses a static chain, it will be in %r10.
11055 Preserve it across the call to __morestack. */
11056 if (DECL_STATIC_CHAIN (cfun->decl))
11060 rax = gen_rtx_REG (Pmode, AX_REG);
11061 emit_move_insn (rax, reg);
11062 use_reg (&call_fusage, rax);
11065 emit_move_insn (reg, allocate_rtx);
11066 use_reg (&call_fusage, reg);
11067 reg = gen_rtx_REG (Pmode, R11_REG);
11068 emit_move_insn (reg, GEN_INT (args_size));
11069 use_reg (&call_fusage, reg);
11073 emit_insn (gen_push (GEN_INT (args_size)));
11074 emit_insn (gen_push (allocate_rtx));
11076 if (split_stack_fn == NULL_RTX)
11077 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11078 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
11079 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11081 add_function_usage_to (call_insn, call_fusage);
11083 /* In order to make call/return prediction work right, we now need
11084 to execute a return instruction. See
11085 libgcc/config/i386/morestack.S for the details on how this works.
11087 For flow purposes gcc must not see this as a return
11088 instruction--we need control flow to continue at the subsequent
11089 label. Therefore, we use an unspec. */
11090 gcc_assert (crtl->args.pops_args < 65536);
11091 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11093 /* If we are in 64-bit mode and this function uses a static chain,
11094 we saved %r10 in %rax before calling _morestack. */
11095 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11096 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11097 gen_rtx_REG (Pmode, AX_REG));
11099 /* If this function calls va_start, we need to store a pointer to
11100 the arguments on the old stack, because they may not have been
11101 all copied to the new stack. At this point the old stack can be
11102 found at the frame pointer value used by __morestack, because
11103 __morestack has set that up before calling back to us. Here we
11104 store that pointer in a scratch register, and in
11105 ix86_expand_prologue we store the scratch register in a stack
11107 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11109 unsigned int scratch_regno;
11113 scratch_regno = split_stack_prologue_scratch_regno ();
11114 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11115 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11119 return address within this function
11120 return address of caller of this function
11122 So we add three words to get to the stack arguments.
11126 return address within this function
11127 first argument to __morestack
11128 second argument to __morestack
11129 return address of caller of this function
11131 So we add five words to get to the stack arguments.
11133 words = TARGET_64BIT ? 3 : 5;
11134 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11135 gen_rtx_PLUS (Pmode, frame_reg,
11136 GEN_INT (words * UNITS_PER_WORD))));
11138 varargs_label = gen_label_rtx ();
11139 emit_jump_insn (gen_jump (varargs_label));
11140 JUMP_LABEL (get_last_insn ()) = varargs_label;
11145 emit_label (label);
11146 LABEL_NUSES (label) = 1;
11148 /* If this function calls va_start, we now have to set the scratch
11149 register for the case where we do not call __morestack. In this
11150 case we need to set it based on the stack pointer. */
11151 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11153 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11154 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11155 GEN_INT (UNITS_PER_WORD))));
11157 emit_label (varargs_label);
11158 LABEL_NUSES (varargs_label) = 1;
11162 /* We may have to tell the dataflow pass that the split stack prologue
11163 is initializing a scratch register. */
11166 ix86_live_on_entry (bitmap regs)
11168 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11170 gcc_assert (flag_split_stack);
11171 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11175 /* Extract the parts of an RTL expression that is a valid memory address
11176 for an instruction. Return 0 if the structure of the address is
11177 grossly off. Return -1 if the address contains ASHIFT, so it is not
11178 strictly valid, but still used for computing length of lea instruction. */
11181 ix86_decompose_address (rtx addr, struct ix86_address *out)
11183 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11184 rtx base_reg, index_reg;
11185 HOST_WIDE_INT scale = 1;
11186 rtx scale_rtx = NULL_RTX;
11189 enum ix86_address_seg seg = SEG_DEFAULT;
11191 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11193 else if (GET_CODE (addr) == PLUS)
11195 rtx addends[4], op;
11203 addends[n++] = XEXP (op, 1);
11206 while (GET_CODE (op) == PLUS);
11211 for (i = n; i >= 0; --i)
11214 switch (GET_CODE (op))
11219 index = XEXP (op, 0);
11220 scale_rtx = XEXP (op, 1);
11226 index = XEXP (op, 0);
11227 tmp = XEXP (op, 1);
11228 if (!CONST_INT_P (tmp))
11230 scale = INTVAL (tmp);
11231 if ((unsigned HOST_WIDE_INT) scale > 3)
11233 scale = 1 << scale;
11237 if (XINT (op, 1) == UNSPEC_TP
11238 && TARGET_TLS_DIRECT_SEG_REFS
11239 && seg == SEG_DEFAULT)
11240 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11269 else if (GET_CODE (addr) == MULT)
11271 index = XEXP (addr, 0); /* index*scale */
11272 scale_rtx = XEXP (addr, 1);
11274 else if (GET_CODE (addr) == ASHIFT)
11276 /* We're called for lea too, which implements ashift on occasion. */
11277 index = XEXP (addr, 0);
11278 tmp = XEXP (addr, 1);
11279 if (!CONST_INT_P (tmp))
11281 scale = INTVAL (tmp);
11282 if ((unsigned HOST_WIDE_INT) scale > 3)
11284 scale = 1 << scale;
11288 disp = addr; /* displacement */
11290 /* Extract the integral value of scale. */
11293 if (!CONST_INT_P (scale_rtx))
11295 scale = INTVAL (scale_rtx);
11298 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11299 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11301 /* Avoid useless 0 displacement. */
11302 if (disp == const0_rtx && (base || index))
11305 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11306 if (base_reg && index_reg && scale == 1
11307 && (index_reg == arg_pointer_rtx
11308 || index_reg == frame_pointer_rtx
11309 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11312 tmp = base, base = index, index = tmp;
11313 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11316 /* Special case: %ebp cannot be encoded as a base without a displacement.
11320 && (base_reg == hard_frame_pointer_rtx
11321 || base_reg == frame_pointer_rtx
11322 || base_reg == arg_pointer_rtx
11323 || (REG_P (base_reg)
11324 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11325 || REGNO (base_reg) == R13_REG))))
11328 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11329 Avoid this by transforming to [%esi+0].
11330 Reload calls address legitimization without cfun defined, so we need
11331 to test cfun for being non-NULL. */
11332 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11333 && base_reg && !index_reg && !disp
11334 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11337 /* Special case: encode reg+reg instead of reg*2. */
11338 if (!base && index && scale == 2)
11339 base = index, base_reg = index_reg, scale = 1;
11341 /* Special case: scaling cannot be encoded without base or displacement. */
11342 if (!base && !disp && index && scale != 1)
11346 out->index = index;
11348 out->scale = scale;
11354 /* Return cost of the memory address x.
11355 For i386, it is better to use a complex address than let gcc copy
11356 the address into a reg and make a new pseudo. But not if the address
11357 requires to two regs - that would mean more pseudos with longer
11360 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11362 struct ix86_address parts;
11364 int ok = ix86_decompose_address (x, &parts);
11368 if (parts.base && GET_CODE (parts.base) == SUBREG)
11369 parts.base = SUBREG_REG (parts.base);
11370 if (parts.index && GET_CODE (parts.index) == SUBREG)
11371 parts.index = SUBREG_REG (parts.index);
11373 /* Attempt to minimize number of registers in the address. */
11375 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11377 && (!REG_P (parts.index)
11378 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11382 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11384 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11385 && parts.base != parts.index)
11388 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11389 since it's predecode logic can't detect the length of instructions
11390 and it degenerates to vector decoded. Increase cost of such
11391 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11392 to split such addresses or even refuse such addresses at all.
11394 Following addressing modes are affected:
11399 The first and last case may be avoidable by explicitly coding the zero in
11400 memory address, but I don't have AMD-K6 machine handy to check this
11404 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11405 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11406 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11412 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11413 this is used for to form addresses to local data when -fPIC is in
11417 darwin_local_data_pic (rtx disp)
11419 return (GET_CODE (disp) == UNSPEC
11420 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11423 /* Determine if a given RTX is a valid constant. We already know this
11424 satisfies CONSTANT_P. */
11427 legitimate_constant_p (rtx x)
11429 switch (GET_CODE (x))
11434 if (GET_CODE (x) == PLUS)
11436 if (!CONST_INT_P (XEXP (x, 1)))
11441 if (TARGET_MACHO && darwin_local_data_pic (x))
11444 /* Only some unspecs are valid as "constants". */
11445 if (GET_CODE (x) == UNSPEC)
11446 switch (XINT (x, 1))
11449 case UNSPEC_GOTOFF:
11450 case UNSPEC_PLTOFF:
11451 return TARGET_64BIT;
11453 case UNSPEC_NTPOFF:
11454 x = XVECEXP (x, 0, 0);
11455 return (GET_CODE (x) == SYMBOL_REF
11456 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11457 case UNSPEC_DTPOFF:
11458 x = XVECEXP (x, 0, 0);
11459 return (GET_CODE (x) == SYMBOL_REF
11460 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11465 /* We must have drilled down to a symbol. */
11466 if (GET_CODE (x) == LABEL_REF)
11468 if (GET_CODE (x) != SYMBOL_REF)
11473 /* TLS symbols are never valid. */
11474 if (SYMBOL_REF_TLS_MODEL (x))
11477 /* DLLIMPORT symbols are never valid. */
11478 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11479 && SYMBOL_REF_DLLIMPORT_P (x))
11484 if (GET_MODE (x) == TImode
11485 && x != CONST0_RTX (TImode)
11491 if (!standard_sse_constant_p (x))
11498 /* Otherwise we handle everything else in the move patterns. */
11502 /* Determine if it's legal to put X into the constant pool. This
11503 is not possible for the address of thread-local symbols, which
11504 is checked above. */
11507 ix86_cannot_force_const_mem (rtx x)
11509 /* We can always put integral constants and vectors in memory. */
11510 switch (GET_CODE (x))
11520 return !legitimate_constant_p (x);
11524 /* Nonzero if the constant value X is a legitimate general operand
11525 when generating PIC code. It is given that flag_pic is on and
11526 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11529 legitimate_pic_operand_p (rtx x)
11533 switch (GET_CODE (x))
11536 inner = XEXP (x, 0);
11537 if (GET_CODE (inner) == PLUS
11538 && CONST_INT_P (XEXP (inner, 1)))
11539 inner = XEXP (inner, 0);
11541 /* Only some unspecs are valid as "constants". */
11542 if (GET_CODE (inner) == UNSPEC)
11543 switch (XINT (inner, 1))
11546 case UNSPEC_GOTOFF:
11547 case UNSPEC_PLTOFF:
11548 return TARGET_64BIT;
11550 x = XVECEXP (inner, 0, 0);
11551 return (GET_CODE (x) == SYMBOL_REF
11552 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11553 case UNSPEC_MACHOPIC_OFFSET:
11554 return legitimate_pic_address_disp_p (x);
11562 return legitimate_pic_address_disp_p (x);
11569 /* Determine if a given CONST RTX is a valid memory displacement
11573 legitimate_pic_address_disp_p (rtx disp)
11577 /* In 64bit mode we can allow direct addresses of symbols and labels
11578 when they are not dynamic symbols. */
11581 rtx op0 = disp, op1;
11583 switch (GET_CODE (disp))
11589 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11591 op0 = XEXP (XEXP (disp, 0), 0);
11592 op1 = XEXP (XEXP (disp, 0), 1);
11593 if (!CONST_INT_P (op1)
11594 || INTVAL (op1) >= 16*1024*1024
11595 || INTVAL (op1) < -16*1024*1024)
11597 if (GET_CODE (op0) == LABEL_REF)
11599 if (GET_CODE (op0) != SYMBOL_REF)
11604 /* TLS references should always be enclosed in UNSPEC. */
11605 if (SYMBOL_REF_TLS_MODEL (op0))
11607 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11608 && ix86_cmodel != CM_LARGE_PIC)
11616 if (GET_CODE (disp) != CONST)
11618 disp = XEXP (disp, 0);
11622 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11623 of GOT tables. We should not need these anyway. */
11624 if (GET_CODE (disp) != UNSPEC
11625 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11626 && XINT (disp, 1) != UNSPEC_GOTOFF
11627 && XINT (disp, 1) != UNSPEC_PLTOFF))
11630 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11631 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11637 if (GET_CODE (disp) == PLUS)
11639 if (!CONST_INT_P (XEXP (disp, 1)))
11641 disp = XEXP (disp, 0);
11645 if (TARGET_MACHO && darwin_local_data_pic (disp))
11648 if (GET_CODE (disp) != UNSPEC)
11651 switch (XINT (disp, 1))
11656 /* We need to check for both symbols and labels because VxWorks loads
11657 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11659 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11660 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11661 case UNSPEC_GOTOFF:
11662 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11663 While ABI specify also 32bit relocation but we don't produce it in
11664 small PIC model at all. */
11665 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11666 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11668 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11670 case UNSPEC_GOTTPOFF:
11671 case UNSPEC_GOTNTPOFF:
11672 case UNSPEC_INDNTPOFF:
11675 disp = XVECEXP (disp, 0, 0);
11676 return (GET_CODE (disp) == SYMBOL_REF
11677 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11678 case UNSPEC_NTPOFF:
11679 disp = XVECEXP (disp, 0, 0);
11680 return (GET_CODE (disp) == SYMBOL_REF
11681 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11682 case UNSPEC_DTPOFF:
11683 disp = XVECEXP (disp, 0, 0);
11684 return (GET_CODE (disp) == SYMBOL_REF
11685 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11691 /* Recognizes RTL expressions that are valid memory addresses for an
11692 instruction. The MODE argument is the machine mode for the MEM
11693 expression that wants to use this address.
11695 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11696 convert common non-canonical forms to canonical form so that they will
11700 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11701 rtx addr, bool strict)
11703 struct ix86_address parts;
11704 rtx base, index, disp;
11705 HOST_WIDE_INT scale;
11707 if (ix86_decompose_address (addr, &parts) <= 0)
11708 /* Decomposition failed. */
11712 index = parts.index;
11714 scale = parts.scale;
11716 /* Validate base register.
11718 Don't allow SUBREG's that span more than a word here. It can lead to spill
11719 failures when the base is one word out of a two word structure, which is
11720 represented internally as a DImode int. */
11728 else if (GET_CODE (base) == SUBREG
11729 && REG_P (SUBREG_REG (base))
11730 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11732 reg = SUBREG_REG (base);
11734 /* Base is not a register. */
11737 if (GET_MODE (base) != Pmode)
11738 /* Base is not in Pmode. */
11741 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11742 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11743 /* Base is not valid. */
11747 /* Validate index register.
11749 Don't allow SUBREG's that span more than a word here -- same as above. */
11757 else if (GET_CODE (index) == SUBREG
11758 && REG_P (SUBREG_REG (index))
11759 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11761 reg = SUBREG_REG (index);
11763 /* Index is not a register. */
11766 if (GET_MODE (index) != Pmode)
11767 /* Index is not in Pmode. */
11770 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11771 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11772 /* Index is not valid. */
11776 /* Validate scale factor. */
11780 /* Scale without index. */
11783 if (scale != 2 && scale != 4 && scale != 8)
11784 /* Scale is not a valid multiplier. */
11788 /* Validate displacement. */
11791 if (GET_CODE (disp) == CONST
11792 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11793 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11794 switch (XINT (XEXP (disp, 0), 1))
11796 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11797 used. While ABI specify also 32bit relocations, we don't produce
11798 them at all and use IP relative instead. */
11800 case UNSPEC_GOTOFF:
11801 gcc_assert (flag_pic);
11803 goto is_legitimate_pic;
11805 /* 64bit address unspec. */
11808 case UNSPEC_GOTPCREL:
11809 gcc_assert (flag_pic);
11810 goto is_legitimate_pic;
11812 case UNSPEC_GOTTPOFF:
11813 case UNSPEC_GOTNTPOFF:
11814 case UNSPEC_INDNTPOFF:
11815 case UNSPEC_NTPOFF:
11816 case UNSPEC_DTPOFF:
11819 case UNSPEC_STACK_CHECK:
11820 gcc_assert (flag_split_stack);
11824 /* Invalid address unspec. */
11828 else if (SYMBOLIC_CONST (disp)
11832 && MACHOPIC_INDIRECT
11833 && !machopic_operand_p (disp)
11839 if (TARGET_64BIT && (index || base))
11841 /* foo@dtpoff(%rX) is ok. */
11842 if (GET_CODE (disp) != CONST
11843 || GET_CODE (XEXP (disp, 0)) != PLUS
11844 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11845 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11846 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11847 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11848 /* Non-constant pic memory reference. */
11851 else if (! legitimate_pic_address_disp_p (disp))
11852 /* Displacement is an invalid pic construct. */
11855 /* This code used to verify that a symbolic pic displacement
11856 includes the pic_offset_table_rtx register.
11858 While this is good idea, unfortunately these constructs may
11859 be created by "adds using lea" optimization for incorrect
11868 This code is nonsensical, but results in addressing
11869 GOT table with pic_offset_table_rtx base. We can't
11870 just refuse it easily, since it gets matched by
11871 "addsi3" pattern, that later gets split to lea in the
11872 case output register differs from input. While this
11873 can be handled by separate addsi pattern for this case
11874 that never results in lea, this seems to be easier and
11875 correct fix for crash to disable this test. */
11877 else if (GET_CODE (disp) != LABEL_REF
11878 && !CONST_INT_P (disp)
11879 && (GET_CODE (disp) != CONST
11880 || !legitimate_constant_p (disp))
11881 && (GET_CODE (disp) != SYMBOL_REF
11882 || !legitimate_constant_p (disp)))
11883 /* Displacement is not constant. */
11885 else if (TARGET_64BIT
11886 && !x86_64_immediate_operand (disp, VOIDmode))
11887 /* Displacement is out of range. */
11891 /* Everything looks valid. */
11895 /* Determine if a given RTX is a valid constant address. */
11898 constant_address_p (rtx x)
11900 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11903 /* Return a unique alias set for the GOT. */
11905 static alias_set_type
11906 ix86_GOT_alias_set (void)
11908 static alias_set_type set = -1;
11910 set = new_alias_set ();
11914 /* Return a legitimate reference for ORIG (an address) using the
11915 register REG. If REG is 0, a new pseudo is generated.
11917 There are two types of references that must be handled:
11919 1. Global data references must load the address from the GOT, via
11920 the PIC reg. An insn is emitted to do this load, and the reg is
11923 2. Static data references, constant pool addresses, and code labels
11924 compute the address as an offset from the GOT, whose base is in
11925 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11926 differentiate them from global data objects. The returned
11927 address is the PIC reg + an unspec constant.
11929 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11930 reg also appears in the address. */
11933 legitimize_pic_address (rtx orig, rtx reg)
11936 rtx new_rtx = orig;
11940 if (TARGET_MACHO && !TARGET_64BIT)
11943 reg = gen_reg_rtx (Pmode);
11944 /* Use the generic Mach-O PIC machinery. */
11945 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11949 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11951 else if (TARGET_64BIT
11952 && ix86_cmodel != CM_SMALL_PIC
11953 && gotoff_operand (addr, Pmode))
11956 /* This symbol may be referenced via a displacement from the PIC
11957 base address (@GOTOFF). */
11959 if (reload_in_progress)
11960 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11961 if (GET_CODE (addr) == CONST)
11962 addr = XEXP (addr, 0);
11963 if (GET_CODE (addr) == PLUS)
11965 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11967 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11970 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11971 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11973 tmpreg = gen_reg_rtx (Pmode);
11976 emit_move_insn (tmpreg, new_rtx);
11980 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11981 tmpreg, 1, OPTAB_DIRECT);
11984 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11986 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11988 /* This symbol may be referenced via a displacement from the PIC
11989 base address (@GOTOFF). */
11991 if (reload_in_progress)
11992 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11993 if (GET_CODE (addr) == CONST)
11994 addr = XEXP (addr, 0);
11995 if (GET_CODE (addr) == PLUS)
11997 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11999 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12002 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12003 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12004 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12008 emit_move_insn (reg, new_rtx);
12012 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12013 /* We can't use @GOTOFF for text labels on VxWorks;
12014 see gotoff_operand. */
12015 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12017 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12019 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12020 return legitimize_dllimport_symbol (addr, true);
12021 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12022 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12023 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12025 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12026 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12030 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12032 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12033 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12034 new_rtx = gen_const_mem (Pmode, new_rtx);
12035 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12038 reg = gen_reg_rtx (Pmode);
12039 /* Use directly gen_movsi, otherwise the address is loaded
12040 into register for CSE. We don't want to CSE this addresses,
12041 instead we CSE addresses from the GOT table, so skip this. */
12042 emit_insn (gen_movsi (reg, new_rtx));
12047 /* This symbol must be referenced via a load from the
12048 Global Offset Table (@GOT). */
12050 if (reload_in_progress)
12051 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12052 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12053 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12055 new_rtx = force_reg (Pmode, new_rtx);
12056 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12057 new_rtx = gen_const_mem (Pmode, new_rtx);
12058 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12061 reg = gen_reg_rtx (Pmode);
12062 emit_move_insn (reg, new_rtx);
12068 if (CONST_INT_P (addr)
12069 && !x86_64_immediate_operand (addr, VOIDmode))
12073 emit_move_insn (reg, addr);
12077 new_rtx = force_reg (Pmode, addr);
12079 else if (GET_CODE (addr) == CONST)
12081 addr = XEXP (addr, 0);
12083 /* We must match stuff we generate before. Assume the only
12084 unspecs that can get here are ours. Not that we could do
12085 anything with them anyway.... */
12086 if (GET_CODE (addr) == UNSPEC
12087 || (GET_CODE (addr) == PLUS
12088 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12090 gcc_assert (GET_CODE (addr) == PLUS);
12092 if (GET_CODE (addr) == PLUS)
12094 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12096 /* Check first to see if this is a constant offset from a @GOTOFF
12097 symbol reference. */
12098 if (gotoff_operand (op0, Pmode)
12099 && CONST_INT_P (op1))
12103 if (reload_in_progress)
12104 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12105 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12107 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12108 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12109 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12113 emit_move_insn (reg, new_rtx);
12119 if (INTVAL (op1) < -16*1024*1024
12120 || INTVAL (op1) >= 16*1024*1024)
12122 if (!x86_64_immediate_operand (op1, Pmode))
12123 op1 = force_reg (Pmode, op1);
12124 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12130 base = legitimize_pic_address (XEXP (addr, 0), reg);
12131 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12132 base == reg ? NULL_RTX : reg);
12134 if (CONST_INT_P (new_rtx))
12135 new_rtx = plus_constant (base, INTVAL (new_rtx));
12138 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12140 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12141 new_rtx = XEXP (new_rtx, 1);
12143 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12151 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12154 get_thread_pointer (int to_reg)
12158 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12162 reg = gen_reg_rtx (Pmode);
12163 insn = gen_rtx_SET (VOIDmode, reg, tp);
12164 insn = emit_insn (insn);
12169 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12170 false if we expect this to be used for a memory address and true if
12171 we expect to load the address into a register. */
12174 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12176 rtx dest, base, off, pic, tp;
12181 case TLS_MODEL_GLOBAL_DYNAMIC:
12182 dest = gen_reg_rtx (Pmode);
12183 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12185 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12187 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12190 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12191 insns = get_insns ();
12194 RTL_CONST_CALL_P (insns) = 1;
12195 emit_libcall_block (insns, dest, rax, x);
12197 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12198 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12200 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12202 if (TARGET_GNU2_TLS)
12204 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12206 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12210 case TLS_MODEL_LOCAL_DYNAMIC:
12211 base = gen_reg_rtx (Pmode);
12212 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12214 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12216 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12219 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12220 insns = get_insns ();
12223 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12224 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12225 RTL_CONST_CALL_P (insns) = 1;
12226 emit_libcall_block (insns, base, rax, note);
12228 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12229 emit_insn (gen_tls_local_dynamic_base_64 (base));
12231 emit_insn (gen_tls_local_dynamic_base_32 (base));
12233 if (TARGET_GNU2_TLS)
12235 rtx x = ix86_tls_module_base ();
12237 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12238 gen_rtx_MINUS (Pmode, x, tp));
12241 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12242 off = gen_rtx_CONST (Pmode, off);
12244 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12246 if (TARGET_GNU2_TLS)
12248 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12250 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12255 case TLS_MODEL_INITIAL_EXEC:
12259 type = UNSPEC_GOTNTPOFF;
12263 if (reload_in_progress)
12264 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12265 pic = pic_offset_table_rtx;
12266 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12268 else if (!TARGET_ANY_GNU_TLS)
12270 pic = gen_reg_rtx (Pmode);
12271 emit_insn (gen_set_got (pic));
12272 type = UNSPEC_GOTTPOFF;
12277 type = UNSPEC_INDNTPOFF;
12280 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12281 off = gen_rtx_CONST (Pmode, off);
12283 off = gen_rtx_PLUS (Pmode, pic, off);
12284 off = gen_const_mem (Pmode, off);
12285 set_mem_alias_set (off, ix86_GOT_alias_set ());
12287 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12289 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12290 off = force_reg (Pmode, off);
12291 return gen_rtx_PLUS (Pmode, base, off);
12295 base = get_thread_pointer (true);
12296 dest = gen_reg_rtx (Pmode);
12297 emit_insn (gen_subsi3 (dest, base, off));
12301 case TLS_MODEL_LOCAL_EXEC:
12302 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12303 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12304 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12305 off = gen_rtx_CONST (Pmode, off);
12307 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12309 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12310 return gen_rtx_PLUS (Pmode, base, off);
12314 base = get_thread_pointer (true);
12315 dest = gen_reg_rtx (Pmode);
12316 emit_insn (gen_subsi3 (dest, base, off));
12321 gcc_unreachable ();
12327 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12330 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12331 htab_t dllimport_map;
12334 get_dllimport_decl (tree decl)
12336 struct tree_map *h, in;
12339 const char *prefix;
12340 size_t namelen, prefixlen;
12345 if (!dllimport_map)
12346 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12348 in.hash = htab_hash_pointer (decl);
12349 in.base.from = decl;
12350 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12351 h = (struct tree_map *) *loc;
12355 *loc = h = ggc_alloc_tree_map ();
12357 h->base.from = decl;
12358 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12359 VAR_DECL, NULL, ptr_type_node);
12360 DECL_ARTIFICIAL (to) = 1;
12361 DECL_IGNORED_P (to) = 1;
12362 DECL_EXTERNAL (to) = 1;
12363 TREE_READONLY (to) = 1;
12365 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12366 name = targetm.strip_name_encoding (name);
12367 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12368 ? "*__imp_" : "*__imp__";
12369 namelen = strlen (name);
12370 prefixlen = strlen (prefix);
12371 imp_name = (char *) alloca (namelen + prefixlen + 1);
12372 memcpy (imp_name, prefix, prefixlen);
12373 memcpy (imp_name + prefixlen, name, namelen + 1);
12375 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12376 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12377 SET_SYMBOL_REF_DECL (rtl, to);
12378 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12380 rtl = gen_const_mem (Pmode, rtl);
12381 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12383 SET_DECL_RTL (to, rtl);
12384 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12389 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12390 true if we require the result be a register. */
12393 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12398 gcc_assert (SYMBOL_REF_DECL (symbol));
12399 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12401 x = DECL_RTL (imp_decl);
12403 x = force_reg (Pmode, x);
12407 /* Try machine-dependent ways of modifying an illegitimate address
12408 to be legitimate. If we find one, return the new, valid address.
12409 This macro is used in only one place: `memory_address' in explow.c.
12411 OLDX is the address as it was before break_out_memory_refs was called.
12412 In some cases it is useful to look at this to decide what needs to be done.
12414 It is always safe for this macro to do nothing. It exists to recognize
12415 opportunities to optimize the output.
12417 For the 80386, we handle X+REG by loading X into a register R and
12418 using R+REG. R will go in a general reg and indexing will be used.
12419 However, if REG is a broken-out memory address or multiplication,
12420 nothing needs to be done because REG can certainly go in a general reg.
12422 When -fpic is used, special handling is needed for symbolic references.
12423 See comments by legitimize_pic_address in i386.c for details. */
12426 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12427 enum machine_mode mode)
12432 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12434 return legitimize_tls_address (x, (enum tls_model) log, false);
12435 if (GET_CODE (x) == CONST
12436 && GET_CODE (XEXP (x, 0)) == PLUS
12437 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12438 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12440 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12441 (enum tls_model) log, false);
12442 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12445 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12447 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12448 return legitimize_dllimport_symbol (x, true);
12449 if (GET_CODE (x) == CONST
12450 && GET_CODE (XEXP (x, 0)) == PLUS
12451 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12452 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12454 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12455 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12459 if (flag_pic && SYMBOLIC_CONST (x))
12460 return legitimize_pic_address (x, 0);
12462 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12463 if (GET_CODE (x) == ASHIFT
12464 && CONST_INT_P (XEXP (x, 1))
12465 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12468 log = INTVAL (XEXP (x, 1));
12469 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12470 GEN_INT (1 << log));
12473 if (GET_CODE (x) == PLUS)
12475 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12477 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12478 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12479 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12482 log = INTVAL (XEXP (XEXP (x, 0), 1));
12483 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12484 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12485 GEN_INT (1 << log));
12488 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12489 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12490 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12493 log = INTVAL (XEXP (XEXP (x, 1), 1));
12494 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12495 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12496 GEN_INT (1 << log));
12499 /* Put multiply first if it isn't already. */
12500 if (GET_CODE (XEXP (x, 1)) == MULT)
12502 rtx tmp = XEXP (x, 0);
12503 XEXP (x, 0) = XEXP (x, 1);
12508 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12509 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12510 created by virtual register instantiation, register elimination, and
12511 similar optimizations. */
12512 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12515 x = gen_rtx_PLUS (Pmode,
12516 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12517 XEXP (XEXP (x, 1), 0)),
12518 XEXP (XEXP (x, 1), 1));
12522 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12523 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12524 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12525 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12526 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12527 && CONSTANT_P (XEXP (x, 1)))
12530 rtx other = NULL_RTX;
12532 if (CONST_INT_P (XEXP (x, 1)))
12534 constant = XEXP (x, 1);
12535 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12537 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12539 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12540 other = XEXP (x, 1);
12548 x = gen_rtx_PLUS (Pmode,
12549 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12550 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12551 plus_constant (other, INTVAL (constant)));
12555 if (changed && ix86_legitimate_address_p (mode, x, false))
12558 if (GET_CODE (XEXP (x, 0)) == MULT)
12561 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12564 if (GET_CODE (XEXP (x, 1)) == MULT)
12567 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12571 && REG_P (XEXP (x, 1))
12572 && REG_P (XEXP (x, 0)))
12575 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12578 x = legitimize_pic_address (x, 0);
12581 if (changed && ix86_legitimate_address_p (mode, x, false))
12584 if (REG_P (XEXP (x, 0)))
12586 rtx temp = gen_reg_rtx (Pmode);
12587 rtx val = force_operand (XEXP (x, 1), temp);
12589 emit_move_insn (temp, val);
12591 XEXP (x, 1) = temp;
12595 else if (REG_P (XEXP (x, 1)))
12597 rtx temp = gen_reg_rtx (Pmode);
12598 rtx val = force_operand (XEXP (x, 0), temp);
12600 emit_move_insn (temp, val);
12602 XEXP (x, 0) = temp;
12610 /* Print an integer constant expression in assembler syntax. Addition
12611 and subtraction are the only arithmetic that may appear in these
12612 expressions. FILE is the stdio stream to write to, X is the rtx, and
12613 CODE is the operand print code from the output string. */
12616 output_pic_addr_const (FILE *file, rtx x, int code)
12620 switch (GET_CODE (x))
12623 gcc_assert (flag_pic);
12628 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12629 output_addr_const (file, x);
12632 const char *name = XSTR (x, 0);
12634 /* Mark the decl as referenced so that cgraph will
12635 output the function. */
12636 if (SYMBOL_REF_DECL (x))
12637 mark_decl_referenced (SYMBOL_REF_DECL (x));
12640 if (MACHOPIC_INDIRECT
12641 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12642 name = machopic_indirection_name (x, /*stub_p=*/true);
12644 assemble_name (file, name);
12646 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12647 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12648 fputs ("@PLT", file);
12655 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12656 assemble_name (asm_out_file, buf);
12660 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12664 /* This used to output parentheses around the expression,
12665 but that does not work on the 386 (either ATT or BSD assembler). */
12666 output_pic_addr_const (file, XEXP (x, 0), code);
12670 if (GET_MODE (x) == VOIDmode)
12672 /* We can use %d if the number is <32 bits and positive. */
12673 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12674 fprintf (file, "0x%lx%08lx",
12675 (unsigned long) CONST_DOUBLE_HIGH (x),
12676 (unsigned long) CONST_DOUBLE_LOW (x));
12678 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12681 /* We can't handle floating point constants;
12682 TARGET_PRINT_OPERAND must handle them. */
12683 output_operand_lossage ("floating constant misused");
12687 /* Some assemblers need integer constants to appear first. */
12688 if (CONST_INT_P (XEXP (x, 0)))
12690 output_pic_addr_const (file, XEXP (x, 0), code);
12692 output_pic_addr_const (file, XEXP (x, 1), code);
12696 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12697 output_pic_addr_const (file, XEXP (x, 1), code);
12699 output_pic_addr_const (file, XEXP (x, 0), code);
12705 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12706 output_pic_addr_const (file, XEXP (x, 0), code);
12708 output_pic_addr_const (file, XEXP (x, 1), code);
12710 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12714 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12716 bool f = i386_asm_output_addr_const_extra (file, x);
12721 gcc_assert (XVECLEN (x, 0) == 1);
12722 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12723 switch (XINT (x, 1))
12726 fputs ("@GOT", file);
12728 case UNSPEC_GOTOFF:
12729 fputs ("@GOTOFF", file);
12731 case UNSPEC_PLTOFF:
12732 fputs ("@PLTOFF", file);
12734 case UNSPEC_GOTPCREL:
12735 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12736 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12738 case UNSPEC_GOTTPOFF:
12739 /* FIXME: This might be @TPOFF in Sun ld too. */
12740 fputs ("@gottpoff", file);
12743 fputs ("@tpoff", file);
12745 case UNSPEC_NTPOFF:
12747 fputs ("@tpoff", file);
12749 fputs ("@ntpoff", file);
12751 case UNSPEC_DTPOFF:
12752 fputs ("@dtpoff", file);
12754 case UNSPEC_GOTNTPOFF:
12756 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12757 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12759 fputs ("@gotntpoff", file);
12761 case UNSPEC_INDNTPOFF:
12762 fputs ("@indntpoff", file);
12765 case UNSPEC_MACHOPIC_OFFSET:
12767 machopic_output_function_base_name (file);
12771 output_operand_lossage ("invalid UNSPEC as operand");
12777 output_operand_lossage ("invalid expression as operand");
12781 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12782 We need to emit DTP-relative relocations. */
12784 static void ATTRIBUTE_UNUSED
12785 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12787 fputs (ASM_LONG, file);
12788 output_addr_const (file, x);
12789 fputs ("@dtpoff", file);
12795 fputs (", 0", file);
12798 gcc_unreachable ();
12802 /* Return true if X is a representation of the PIC register. This copes
12803 with calls from ix86_find_base_term, where the register might have
12804 been replaced by a cselib value. */
12807 ix86_pic_register_p (rtx x)
12809 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12810 return (pic_offset_table_rtx
12811 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12813 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12816 /* Helper function for ix86_delegitimize_address.
12817 Attempt to delegitimize TLS local-exec accesses. */
12820 ix86_delegitimize_tls_address (rtx orig_x)
12822 rtx x = orig_x, unspec;
12823 struct ix86_address addr;
12825 if (!TARGET_TLS_DIRECT_SEG_REFS)
12829 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12831 if (ix86_decompose_address (x, &addr) == 0
12832 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12833 || addr.disp == NULL_RTX
12834 || GET_CODE (addr.disp) != CONST)
12836 unspec = XEXP (addr.disp, 0);
12837 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12838 unspec = XEXP (unspec, 0);
12839 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12841 x = XVECEXP (unspec, 0, 0);
12842 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12843 if (unspec != XEXP (addr.disp, 0))
12844 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12847 rtx idx = addr.index;
12848 if (addr.scale != 1)
12849 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12850 x = gen_rtx_PLUS (Pmode, idx, x);
12853 x = gen_rtx_PLUS (Pmode, addr.base, x);
12854 if (MEM_P (orig_x))
12855 x = replace_equiv_address_nv (orig_x, x);
12859 /* In the name of slightly smaller debug output, and to cater to
12860 general assembler lossage, recognize PIC+GOTOFF and turn it back
12861 into a direct symbol reference.
12863 On Darwin, this is necessary to avoid a crash, because Darwin
12864 has a different PIC label for each routine but the DWARF debugging
12865 information is not associated with any particular routine, so it's
12866 necessary to remove references to the PIC label from RTL stored by
12867 the DWARF output code. */
12870 ix86_delegitimize_address (rtx x)
12872 rtx orig_x = delegitimize_mem_from_attrs (x);
12873 /* addend is NULL or some rtx if x is something+GOTOFF where
12874 something doesn't include the PIC register. */
12875 rtx addend = NULL_RTX;
12876 /* reg_addend is NULL or a multiple of some register. */
12877 rtx reg_addend = NULL_RTX;
12878 /* const_addend is NULL or a const_int. */
12879 rtx const_addend = NULL_RTX;
12880 /* This is the result, or NULL. */
12881 rtx result = NULL_RTX;
12890 if (GET_CODE (x) != CONST
12891 || GET_CODE (XEXP (x, 0)) != UNSPEC
12892 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12893 || !MEM_P (orig_x))
12894 return ix86_delegitimize_tls_address (orig_x);
12895 x = XVECEXP (XEXP (x, 0), 0, 0);
12896 if (GET_MODE (orig_x) != Pmode)
12897 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12901 if (GET_CODE (x) != PLUS
12902 || GET_CODE (XEXP (x, 1)) != CONST)
12903 return ix86_delegitimize_tls_address (orig_x);
12905 if (ix86_pic_register_p (XEXP (x, 0)))
12906 /* %ebx + GOT/GOTOFF */
12908 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12910 /* %ebx + %reg * scale + GOT/GOTOFF */
12911 reg_addend = XEXP (x, 0);
12912 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12913 reg_addend = XEXP (reg_addend, 1);
12914 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12915 reg_addend = XEXP (reg_addend, 0);
12918 reg_addend = NULL_RTX;
12919 addend = XEXP (x, 0);
12923 addend = XEXP (x, 0);
12925 x = XEXP (XEXP (x, 1), 0);
12926 if (GET_CODE (x) == PLUS
12927 && CONST_INT_P (XEXP (x, 1)))
12929 const_addend = XEXP (x, 1);
12933 if (GET_CODE (x) == UNSPEC
12934 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12935 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12936 result = XVECEXP (x, 0, 0);
12938 if (TARGET_MACHO && darwin_local_data_pic (x)
12939 && !MEM_P (orig_x))
12940 result = XVECEXP (x, 0, 0);
12943 return ix86_delegitimize_tls_address (orig_x);
12946 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12948 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12951 /* If the rest of original X doesn't involve the PIC register, add
12952 addend and subtract pic_offset_table_rtx. This can happen e.g.
12954 leal (%ebx, %ecx, 4), %ecx
12956 movl foo@GOTOFF(%ecx), %edx
12957 in which case we return (%ecx - %ebx) + foo. */
12958 if (pic_offset_table_rtx)
12959 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12960 pic_offset_table_rtx),
12965 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12966 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12970 /* If X is a machine specific address (i.e. a symbol or label being
12971 referenced as a displacement from the GOT implemented using an
12972 UNSPEC), then return the base term. Otherwise return X. */
12975 ix86_find_base_term (rtx x)
12981 if (GET_CODE (x) != CONST)
12983 term = XEXP (x, 0);
12984 if (GET_CODE (term) == PLUS
12985 && (CONST_INT_P (XEXP (term, 1))
12986 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12987 term = XEXP (term, 0);
12988 if (GET_CODE (term) != UNSPEC
12989 || XINT (term, 1) != UNSPEC_GOTPCREL)
12992 return XVECEXP (term, 0, 0);
12995 return ix86_delegitimize_address (x);
12999 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13000 int fp, FILE *file)
13002 const char *suffix;
13004 if (mode == CCFPmode || mode == CCFPUmode)
13006 code = ix86_fp_compare_code_to_integer (code);
13010 code = reverse_condition (code);
13061 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13065 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13066 Those same assemblers have the same but opposite lossage on cmov. */
13067 if (mode == CCmode)
13068 suffix = fp ? "nbe" : "a";
13069 else if (mode == CCCmode)
13072 gcc_unreachable ();
13088 gcc_unreachable ();
13092 gcc_assert (mode == CCmode || mode == CCCmode);
13109 gcc_unreachable ();
13113 /* ??? As above. */
13114 gcc_assert (mode == CCmode || mode == CCCmode);
13115 suffix = fp ? "nb" : "ae";
13118 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13122 /* ??? As above. */
13123 if (mode == CCmode)
13125 else if (mode == CCCmode)
13126 suffix = fp ? "nb" : "ae";
13128 gcc_unreachable ();
13131 suffix = fp ? "u" : "p";
13134 suffix = fp ? "nu" : "np";
13137 gcc_unreachable ();
13139 fputs (suffix, file);
13142 /* Print the name of register X to FILE based on its machine mode and number.
13143 If CODE is 'w', pretend the mode is HImode.
13144 If CODE is 'b', pretend the mode is QImode.
13145 If CODE is 'k', pretend the mode is SImode.
13146 If CODE is 'q', pretend the mode is DImode.
13147 If CODE is 'x', pretend the mode is V4SFmode.
13148 If CODE is 't', pretend the mode is V8SFmode.
13149 If CODE is 'h', pretend the reg is the 'high' byte register.
13150 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13151 If CODE is 'd', duplicate the operand for AVX instruction.
13155 print_reg (rtx x, int code, FILE *file)
13158 bool duplicated = code == 'd' && TARGET_AVX;
13160 gcc_assert (x == pc_rtx
13161 || (REGNO (x) != ARG_POINTER_REGNUM
13162 && REGNO (x) != FRAME_POINTER_REGNUM
13163 && REGNO (x) != FLAGS_REG
13164 && REGNO (x) != FPSR_REG
13165 && REGNO (x) != FPCR_REG));
13167 if (ASSEMBLER_DIALECT == ASM_ATT)
13172 gcc_assert (TARGET_64BIT);
13173 fputs ("rip", file);
13177 if (code == 'w' || MMX_REG_P (x))
13179 else if (code == 'b')
13181 else if (code == 'k')
13183 else if (code == 'q')
13185 else if (code == 'y')
13187 else if (code == 'h')
13189 else if (code == 'x')
13191 else if (code == 't')
13194 code = GET_MODE_SIZE (GET_MODE (x));
13196 /* Irritatingly, AMD extended registers use different naming convention
13197 from the normal registers. */
13198 if (REX_INT_REG_P (x))
13200 gcc_assert (TARGET_64BIT);
13204 error ("extended registers have no high halves");
13207 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13210 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13213 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13216 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13219 error ("unsupported operand size for extended register");
13229 if (STACK_TOP_P (x))
13238 if (! ANY_FP_REG_P (x))
13239 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13244 reg = hi_reg_name[REGNO (x)];
13247 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13249 reg = qi_reg_name[REGNO (x)];
13252 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13254 reg = qi_high_reg_name[REGNO (x)];
13259 gcc_assert (!duplicated);
13261 fputs (hi_reg_name[REGNO (x)] + 1, file);
13266 gcc_unreachable ();
13272 if (ASSEMBLER_DIALECT == ASM_ATT)
13273 fprintf (file, ", %%%s", reg);
13275 fprintf (file, ", %s", reg);
13279 /* Locate some local-dynamic symbol still in use by this function
13280 so that we can print its name in some tls_local_dynamic_base
13284 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13288 if (GET_CODE (x) == SYMBOL_REF
13289 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13291 cfun->machine->some_ld_name = XSTR (x, 0);
13298 static const char *
13299 get_some_local_dynamic_name (void)
13303 if (cfun->machine->some_ld_name)
13304 return cfun->machine->some_ld_name;
13306 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13307 if (NONDEBUG_INSN_P (insn)
13308 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13309 return cfun->machine->some_ld_name;
13314 /* Meaning of CODE:
13315 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13316 C -- print opcode suffix for set/cmov insn.
13317 c -- like C, but print reversed condition
13318 F,f -- likewise, but for floating-point.
13319 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13321 R -- print the prefix for register names.
13322 z -- print the opcode suffix for the size of the current operand.
13323 Z -- likewise, with special suffixes for x87 instructions.
13324 * -- print a star (in certain assembler syntax)
13325 A -- print an absolute memory reference.
13326 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13327 s -- print a shift double count, followed by the assemblers argument
13329 b -- print the QImode name of the register for the indicated operand.
13330 %b0 would print %al if operands[0] is reg 0.
13331 w -- likewise, print the HImode name of the register.
13332 k -- likewise, print the SImode name of the register.
13333 q -- likewise, print the DImode name of the register.
13334 x -- likewise, print the V4SFmode name of the register.
13335 t -- likewise, print the V8SFmode name of the register.
13336 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13337 y -- print "st(0)" instead of "st" as a register.
13338 d -- print duplicated register operand for AVX instruction.
13339 D -- print condition for SSE cmp instruction.
13340 P -- if PIC, print an @PLT suffix.
13341 X -- don't print any sort of PIC '@' suffix for a symbol.
13342 & -- print some in-use local-dynamic symbol name.
13343 H -- print a memory address offset by 8; used for sse high-parts
13344 Y -- print condition for XOP pcom* instruction.
13345 + -- print a branch hint as 'cs' or 'ds' prefix
13346 ; -- print a semicolon (after prefixes due to bug in older gas).
13347 @ -- print a segment register of thread base pointer load
13351 ix86_print_operand (FILE *file, rtx x, int code)
13358 if (ASSEMBLER_DIALECT == ASM_ATT)
13364 const char *name = get_some_local_dynamic_name ();
13366 output_operand_lossage ("'%%&' used without any "
13367 "local dynamic TLS references");
13369 assemble_name (file, name);
13374 switch (ASSEMBLER_DIALECT)
13381 /* Intel syntax. For absolute addresses, registers should not
13382 be surrounded by braces. */
13386 ix86_print_operand (file, x, 0);
13393 gcc_unreachable ();
13396 ix86_print_operand (file, x, 0);
13401 if (ASSEMBLER_DIALECT == ASM_ATT)
13406 if (ASSEMBLER_DIALECT == ASM_ATT)
13411 if (ASSEMBLER_DIALECT == ASM_ATT)
13416 if (ASSEMBLER_DIALECT == ASM_ATT)
13421 if (ASSEMBLER_DIALECT == ASM_ATT)
13426 if (ASSEMBLER_DIALECT == ASM_ATT)
13431 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13433 /* Opcodes don't get size suffixes if using Intel opcodes. */
13434 if (ASSEMBLER_DIALECT == ASM_INTEL)
13437 switch (GET_MODE_SIZE (GET_MODE (x)))
13456 output_operand_lossage
13457 ("invalid operand size for operand code '%c'", code);
13462 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13464 (0, "non-integer operand used with operand code '%c'", code);
13468 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13469 if (ASSEMBLER_DIALECT == ASM_INTEL)
13472 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13474 switch (GET_MODE_SIZE (GET_MODE (x)))
13477 #ifdef HAVE_AS_IX86_FILDS
13487 #ifdef HAVE_AS_IX86_FILDQ
13490 fputs ("ll", file);
13498 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13500 /* 387 opcodes don't get size suffixes
13501 if the operands are registers. */
13502 if (STACK_REG_P (x))
13505 switch (GET_MODE_SIZE (GET_MODE (x)))
13526 output_operand_lossage
13527 ("invalid operand type used with operand code '%c'", code);
13531 output_operand_lossage
13532 ("invalid operand size for operand code '%c'", code);
13549 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13551 ix86_print_operand (file, x, 0);
13552 fputs (", ", file);
13557 /* Little bit of braindamage here. The SSE compare instructions
13558 does use completely different names for the comparisons that the
13559 fp conditional moves. */
13562 switch (GET_CODE (x))
13565 fputs ("eq", file);
13568 fputs ("eq_us", file);
13571 fputs ("lt", file);
13574 fputs ("nge", file);
13577 fputs ("le", file);
13580 fputs ("ngt", file);
13583 fputs ("unord", file);
13586 fputs ("neq", file);
13589 fputs ("neq_oq", file);
13592 fputs ("ge", file);
13595 fputs ("nlt", file);
13598 fputs ("gt", file);
13601 fputs ("nle", file);
13604 fputs ("ord", file);
13607 output_operand_lossage ("operand is not a condition code, "
13608 "invalid operand code 'D'");
13614 switch (GET_CODE (x))
13618 fputs ("eq", file);
13622 fputs ("lt", file);
13626 fputs ("le", file);
13629 fputs ("unord", file);
13633 fputs ("neq", file);
13637 fputs ("nlt", file);
13641 fputs ("nle", file);
13644 fputs ("ord", file);
13647 output_operand_lossage ("operand is not a condition code, "
13648 "invalid operand code 'D'");
13654 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13655 if (ASSEMBLER_DIALECT == ASM_ATT)
13657 switch (GET_MODE (x))
13659 case HImode: putc ('w', file); break;
13661 case SFmode: putc ('l', file); break;
13663 case DFmode: putc ('q', file); break;
13664 default: gcc_unreachable ();
13671 if (!COMPARISON_P (x))
13673 output_operand_lossage ("operand is neither a constant nor a "
13674 "condition code, invalid operand code "
13678 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13681 if (!COMPARISON_P (x))
13683 output_operand_lossage ("operand is neither a constant nor a "
13684 "condition code, invalid operand code "
13688 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13689 if (ASSEMBLER_DIALECT == ASM_ATT)
13692 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13695 /* Like above, but reverse condition */
13697 /* Check to see if argument to %c is really a constant
13698 and not a condition code which needs to be reversed. */
13699 if (!COMPARISON_P (x))
13701 output_operand_lossage ("operand is neither a constant nor a "
13702 "condition code, invalid operand "
13706 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13709 if (!COMPARISON_P (x))
13711 output_operand_lossage ("operand is neither a constant nor a "
13712 "condition code, invalid operand "
13716 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13717 if (ASSEMBLER_DIALECT == ASM_ATT)
13720 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13724 /* It doesn't actually matter what mode we use here, as we're
13725 only going to use this for printing. */
13726 x = adjust_address_nv (x, DImode, 8);
13734 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13737 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13740 int pred_val = INTVAL (XEXP (x, 0));
13742 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13743 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13745 int taken = pred_val > REG_BR_PROB_BASE / 2;
13746 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13748 /* Emit hints only in the case default branch prediction
13749 heuristics would fail. */
13750 if (taken != cputaken)
13752 /* We use 3e (DS) prefix for taken branches and
13753 2e (CS) prefix for not taken branches. */
13755 fputs ("ds ; ", file);
13757 fputs ("cs ; ", file);
13765 switch (GET_CODE (x))
13768 fputs ("neq", file);
13771 fputs ("eq", file);
13775 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13779 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13783 fputs ("le", file);
13787 fputs ("lt", file);
13790 fputs ("unord", file);
13793 fputs ("ord", file);
13796 fputs ("ueq", file);
13799 fputs ("nlt", file);
13802 fputs ("nle", file);
13805 fputs ("ule", file);
13808 fputs ("ult", file);
13811 fputs ("une", file);
13814 output_operand_lossage ("operand is not a condition code, "
13815 "invalid operand code 'Y'");
13821 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13827 if (ASSEMBLER_DIALECT == ASM_ATT)
13830 /* The kernel uses a different segment register for performance
13831 reasons; a system call would not have to trash the userspace
13832 segment register, which would be expensive. */
13833 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13834 fputs ("fs", file);
13836 fputs ("gs", file);
13840 output_operand_lossage ("invalid operand code '%c'", code);
13845 print_reg (x, code, file);
13847 else if (MEM_P (x))
13849 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13850 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13851 && GET_MODE (x) != BLKmode)
13854 switch (GET_MODE_SIZE (GET_MODE (x)))
13856 case 1: size = "BYTE"; break;
13857 case 2: size = "WORD"; break;
13858 case 4: size = "DWORD"; break;
13859 case 8: size = "QWORD"; break;
13860 case 12: size = "TBYTE"; break;
13862 if (GET_MODE (x) == XFmode)
13867 case 32: size = "YMMWORD"; break;
13869 gcc_unreachable ();
13872 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13875 else if (code == 'w')
13877 else if (code == 'k')
13880 fputs (size, file);
13881 fputs (" PTR ", file);
13885 /* Avoid (%rip) for call operands. */
13886 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13887 && !CONST_INT_P (x))
13888 output_addr_const (file, x);
13889 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13890 output_operand_lossage ("invalid constraints for operand");
13892 output_address (x);
13895 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13900 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13901 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13903 if (ASSEMBLER_DIALECT == ASM_ATT)
13905 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13907 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13909 fprintf (file, "0x%08x", (unsigned int) l);
13912 /* These float cases don't actually occur as immediate operands. */
13913 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13917 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13918 fputs (dstr, file);
13921 else if (GET_CODE (x) == CONST_DOUBLE
13922 && GET_MODE (x) == XFmode)
13926 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13927 fputs (dstr, file);
13932 /* We have patterns that allow zero sets of memory, for instance.
13933 In 64-bit mode, we should probably support all 8-byte vectors,
13934 since we can in fact encode that into an immediate. */
13935 if (GET_CODE (x) == CONST_VECTOR)
13937 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13943 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13945 if (ASSEMBLER_DIALECT == ASM_ATT)
13948 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13949 || GET_CODE (x) == LABEL_REF)
13951 if (ASSEMBLER_DIALECT == ASM_ATT)
13954 fputs ("OFFSET FLAT:", file);
13957 if (CONST_INT_P (x))
13958 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13960 output_pic_addr_const (file, x, code);
13962 output_addr_const (file, x);
13967 ix86_print_operand_punct_valid_p (unsigned char code)
13969 return (code == '@' || code == '*' || code == '+'
13970 || code == '&' || code == ';');
13973 /* Print a memory operand whose address is ADDR. */
13976 ix86_print_operand_address (FILE *file, rtx addr)
13978 struct ix86_address parts;
13979 rtx base, index, disp;
13981 int ok = ix86_decompose_address (addr, &parts);
13986 index = parts.index;
13988 scale = parts.scale;
13996 if (ASSEMBLER_DIALECT == ASM_ATT)
13998 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14001 gcc_unreachable ();
14004 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14005 if (TARGET_64BIT && !base && !index)
14009 if (GET_CODE (disp) == CONST
14010 && GET_CODE (XEXP (disp, 0)) == PLUS
14011 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14012 symbol = XEXP (XEXP (disp, 0), 0);
14014 if (GET_CODE (symbol) == LABEL_REF
14015 || (GET_CODE (symbol) == SYMBOL_REF
14016 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14019 if (!base && !index)
14021 /* Displacement only requires special attention. */
14023 if (CONST_INT_P (disp))
14025 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14026 fputs ("ds:", file);
14027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14030 output_pic_addr_const (file, disp, 0);
14032 output_addr_const (file, disp);
14036 if (ASSEMBLER_DIALECT == ASM_ATT)
14041 output_pic_addr_const (file, disp, 0);
14042 else if (GET_CODE (disp) == LABEL_REF)
14043 output_asm_label (disp);
14045 output_addr_const (file, disp);
14050 print_reg (base, 0, file);
14054 print_reg (index, 0, file);
14056 fprintf (file, ",%d", scale);
14062 rtx offset = NULL_RTX;
14066 /* Pull out the offset of a symbol; print any symbol itself. */
14067 if (GET_CODE (disp) == CONST
14068 && GET_CODE (XEXP (disp, 0)) == PLUS
14069 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14071 offset = XEXP (XEXP (disp, 0), 1);
14072 disp = gen_rtx_CONST (VOIDmode,
14073 XEXP (XEXP (disp, 0), 0));
14077 output_pic_addr_const (file, disp, 0);
14078 else if (GET_CODE (disp) == LABEL_REF)
14079 output_asm_label (disp);
14080 else if (CONST_INT_P (disp))
14083 output_addr_const (file, disp);
14089 print_reg (base, 0, file);
14092 if (INTVAL (offset) >= 0)
14094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14098 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14105 print_reg (index, 0, file);
14107 fprintf (file, "*%d", scale);
14114 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14117 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14121 if (GET_CODE (x) != UNSPEC)
14124 op = XVECEXP (x, 0, 0);
14125 switch (XINT (x, 1))
14127 case UNSPEC_GOTTPOFF:
14128 output_addr_const (file, op);
14129 /* FIXME: This might be @TPOFF in Sun ld. */
14130 fputs ("@gottpoff", file);
14133 output_addr_const (file, op);
14134 fputs ("@tpoff", file);
14136 case UNSPEC_NTPOFF:
14137 output_addr_const (file, op);
14139 fputs ("@tpoff", file);
14141 fputs ("@ntpoff", file);
14143 case UNSPEC_DTPOFF:
14144 output_addr_const (file, op);
14145 fputs ("@dtpoff", file);
14147 case UNSPEC_GOTNTPOFF:
14148 output_addr_const (file, op);
14150 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14151 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14153 fputs ("@gotntpoff", file);
14155 case UNSPEC_INDNTPOFF:
14156 output_addr_const (file, op);
14157 fputs ("@indntpoff", file);
14160 case UNSPEC_MACHOPIC_OFFSET:
14161 output_addr_const (file, op);
14163 machopic_output_function_base_name (file);
14167 case UNSPEC_STACK_CHECK:
14171 gcc_assert (flag_split_stack);
14173 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14174 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14176 gcc_unreachable ();
14179 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14190 /* Split one or more double-mode RTL references into pairs of half-mode
14191 references. The RTL can be REG, offsettable MEM, integer constant, or
14192 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14193 split and "num" is its length. lo_half and hi_half are output arrays
14194 that parallel "operands". */
14197 split_double_mode (enum machine_mode mode, rtx operands[],
14198 int num, rtx lo_half[], rtx hi_half[])
14200 enum machine_mode half_mode;
14206 half_mode = DImode;
14209 half_mode = SImode;
14212 gcc_unreachable ();
14215 byte = GET_MODE_SIZE (half_mode);
14219 rtx op = operands[num];
14221 /* simplify_subreg refuse to split volatile memory addresses,
14222 but we still have to handle it. */
14225 lo_half[num] = adjust_address (op, half_mode, 0);
14226 hi_half[num] = adjust_address (op, half_mode, byte);
14230 lo_half[num] = simplify_gen_subreg (half_mode, op,
14231 GET_MODE (op) == VOIDmode
14232 ? mode : GET_MODE (op), 0);
14233 hi_half[num] = simplify_gen_subreg (half_mode, op,
14234 GET_MODE (op) == VOIDmode
14235 ? mode : GET_MODE (op), byte);
14240 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14241 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14242 is the expression of the binary operation. The output may either be
14243 emitted here, or returned to the caller, like all output_* functions.
14245 There is no guarantee that the operands are the same mode, as they
14246 might be within FLOAT or FLOAT_EXTEND expressions. */
14248 #ifndef SYSV386_COMPAT
14249 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14250 wants to fix the assemblers because that causes incompatibility
14251 with gcc. No-one wants to fix gcc because that causes
14252 incompatibility with assemblers... You can use the option of
14253 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14254 #define SYSV386_COMPAT 1
14258 output_387_binary_op (rtx insn, rtx *operands)
14260 static char buf[40];
14263 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14265 #ifdef ENABLE_CHECKING
14266 /* Even if we do not want to check the inputs, this documents input
14267 constraints. Which helps in understanding the following code. */
14268 if (STACK_REG_P (operands[0])
14269 && ((REG_P (operands[1])
14270 && REGNO (operands[0]) == REGNO (operands[1])
14271 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14272 || (REG_P (operands[2])
14273 && REGNO (operands[0]) == REGNO (operands[2])
14274 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14275 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14278 gcc_assert (is_sse);
14281 switch (GET_CODE (operands[3]))
14284 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14285 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14293 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14294 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14302 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14303 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14311 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14312 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14320 gcc_unreachable ();
14327 strcpy (buf, ssep);
14328 if (GET_MODE (operands[0]) == SFmode)
14329 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14331 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14335 strcpy (buf, ssep + 1);
14336 if (GET_MODE (operands[0]) == SFmode)
14337 strcat (buf, "ss\t{%2, %0|%0, %2}");
14339 strcat (buf, "sd\t{%2, %0|%0, %2}");
14345 switch (GET_CODE (operands[3]))
14349 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14351 rtx temp = operands[2];
14352 operands[2] = operands[1];
14353 operands[1] = temp;
14356 /* know operands[0] == operands[1]. */
14358 if (MEM_P (operands[2]))
14364 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14366 if (STACK_TOP_P (operands[0]))
14367 /* How is it that we are storing to a dead operand[2]?
14368 Well, presumably operands[1] is dead too. We can't
14369 store the result to st(0) as st(0) gets popped on this
14370 instruction. Instead store to operands[2] (which I
14371 think has to be st(1)). st(1) will be popped later.
14372 gcc <= 2.8.1 didn't have this check and generated
14373 assembly code that the Unixware assembler rejected. */
14374 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14376 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14380 if (STACK_TOP_P (operands[0]))
14381 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14383 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14388 if (MEM_P (operands[1]))
14394 if (MEM_P (operands[2]))
14400 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14403 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14404 derived assemblers, confusingly reverse the direction of
14405 the operation for fsub{r} and fdiv{r} when the
14406 destination register is not st(0). The Intel assembler
14407 doesn't have this brain damage. Read !SYSV386_COMPAT to
14408 figure out what the hardware really does. */
14409 if (STACK_TOP_P (operands[0]))
14410 p = "{p\t%0, %2|rp\t%2, %0}";
14412 p = "{rp\t%2, %0|p\t%0, %2}";
14414 if (STACK_TOP_P (operands[0]))
14415 /* As above for fmul/fadd, we can't store to st(0). */
14416 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14418 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14423 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14426 if (STACK_TOP_P (operands[0]))
14427 p = "{rp\t%0, %1|p\t%1, %0}";
14429 p = "{p\t%1, %0|rp\t%0, %1}";
14431 if (STACK_TOP_P (operands[0]))
14432 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14434 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14439 if (STACK_TOP_P (operands[0]))
14441 if (STACK_TOP_P (operands[1]))
14442 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14444 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14447 else if (STACK_TOP_P (operands[1]))
14450 p = "{\t%1, %0|r\t%0, %1}";
14452 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14458 p = "{r\t%2, %0|\t%0, %2}";
14460 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14466 gcc_unreachable ();
14473 /* Return needed mode for entity in optimize_mode_switching pass. */
14476 ix86_mode_needed (int entity, rtx insn)
14478 enum attr_i387_cw mode;
14480 /* The mode UNINITIALIZED is used to store control word after a
14481 function call or ASM pattern. The mode ANY specify that function
14482 has no requirements on the control word and make no changes in the
14483 bits we are interested in. */
14486 || (NONJUMP_INSN_P (insn)
14487 && (asm_noperands (PATTERN (insn)) >= 0
14488 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14489 return I387_CW_UNINITIALIZED;
14491 if (recog_memoized (insn) < 0)
14492 return I387_CW_ANY;
14494 mode = get_attr_i387_cw (insn);
14499 if (mode == I387_CW_TRUNC)
14504 if (mode == I387_CW_FLOOR)
14509 if (mode == I387_CW_CEIL)
14514 if (mode == I387_CW_MASK_PM)
14519 gcc_unreachable ();
14522 return I387_CW_ANY;
14525 /* Output code to initialize control word copies used by trunc?f?i and
14526 rounding patterns. CURRENT_MODE is set to current control word,
14527 while NEW_MODE is set to new control word. */
14530 emit_i387_cw_initialization (int mode)
14532 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14535 enum ix86_stack_slot slot;
14537 rtx reg = gen_reg_rtx (HImode);
14539 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14540 emit_move_insn (reg, copy_rtx (stored_mode));
14542 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14543 || optimize_function_for_size_p (cfun))
14547 case I387_CW_TRUNC:
14548 /* round toward zero (truncate) */
14549 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14550 slot = SLOT_CW_TRUNC;
14553 case I387_CW_FLOOR:
14554 /* round down toward -oo */
14555 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14556 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14557 slot = SLOT_CW_FLOOR;
14561 /* round up toward +oo */
14562 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14563 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14564 slot = SLOT_CW_CEIL;
14567 case I387_CW_MASK_PM:
14568 /* mask precision exception for nearbyint() */
14569 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14570 slot = SLOT_CW_MASK_PM;
14574 gcc_unreachable ();
14581 case I387_CW_TRUNC:
14582 /* round toward zero (truncate) */
14583 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14584 slot = SLOT_CW_TRUNC;
14587 case I387_CW_FLOOR:
14588 /* round down toward -oo */
14589 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14590 slot = SLOT_CW_FLOOR;
14594 /* round up toward +oo */
14595 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14596 slot = SLOT_CW_CEIL;
14599 case I387_CW_MASK_PM:
14600 /* mask precision exception for nearbyint() */
14601 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14602 slot = SLOT_CW_MASK_PM;
14606 gcc_unreachable ();
14610 gcc_assert (slot < MAX_386_STACK_LOCALS);
14612 new_mode = assign_386_stack_local (HImode, slot);
14613 emit_move_insn (new_mode, reg);
14616 /* Output code for INSN to convert a float to a signed int. OPERANDS
14617 are the insn operands. The output may be [HSD]Imode and the input
14618 operand may be [SDX]Fmode. */
14621 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14623 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14624 int dimode_p = GET_MODE (operands[0]) == DImode;
14625 int round_mode = get_attr_i387_cw (insn);
14627 /* Jump through a hoop or two for DImode, since the hardware has no
14628 non-popping instruction. We used to do this a different way, but
14629 that was somewhat fragile and broke with post-reload splitters. */
14630 if ((dimode_p || fisttp) && !stack_top_dies)
14631 output_asm_insn ("fld\t%y1", operands);
14633 gcc_assert (STACK_TOP_P (operands[1]));
14634 gcc_assert (MEM_P (operands[0]));
14635 gcc_assert (GET_MODE (operands[1]) != TFmode);
14638 output_asm_insn ("fisttp%Z0\t%0", operands);
14641 if (round_mode != I387_CW_ANY)
14642 output_asm_insn ("fldcw\t%3", operands);
14643 if (stack_top_dies || dimode_p)
14644 output_asm_insn ("fistp%Z0\t%0", operands);
14646 output_asm_insn ("fist%Z0\t%0", operands);
14647 if (round_mode != I387_CW_ANY)
14648 output_asm_insn ("fldcw\t%2", operands);
14654 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14655 have the values zero or one, indicates the ffreep insn's operand
14656 from the OPERANDS array. */
14658 static const char *
14659 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14661 if (TARGET_USE_FFREEP)
14662 #ifdef HAVE_AS_IX86_FFREEP
14663 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14666 static char retval[32];
14667 int regno = REGNO (operands[opno]);
14669 gcc_assert (FP_REGNO_P (regno));
14671 regno -= FIRST_STACK_REG;
14673 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14678 return opno ? "fstp\t%y1" : "fstp\t%y0";
14682 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14683 should be used. UNORDERED_P is true when fucom should be used. */
14686 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14688 int stack_top_dies;
14689 rtx cmp_op0, cmp_op1;
14690 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14694 cmp_op0 = operands[0];
14695 cmp_op1 = operands[1];
14699 cmp_op0 = operands[1];
14700 cmp_op1 = operands[2];
14705 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14706 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14707 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14708 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14710 if (GET_MODE (operands[0]) == SFmode)
14712 return &ucomiss[TARGET_AVX ? 0 : 1];
14714 return &comiss[TARGET_AVX ? 0 : 1];
14717 return &ucomisd[TARGET_AVX ? 0 : 1];
14719 return &comisd[TARGET_AVX ? 0 : 1];
14722 gcc_assert (STACK_TOP_P (cmp_op0));
14724 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14726 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14728 if (stack_top_dies)
14730 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14731 return output_387_ffreep (operands, 1);
14734 return "ftst\n\tfnstsw\t%0";
14737 if (STACK_REG_P (cmp_op1)
14739 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14740 && REGNO (cmp_op1) != FIRST_STACK_REG)
14742 /* If both the top of the 387 stack dies, and the other operand
14743 is also a stack register that dies, then this must be a
14744 `fcompp' float compare */
14748 /* There is no double popping fcomi variant. Fortunately,
14749 eflags is immune from the fstp's cc clobbering. */
14751 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14753 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14754 return output_387_ffreep (operands, 0);
14759 return "fucompp\n\tfnstsw\t%0";
14761 return "fcompp\n\tfnstsw\t%0";
14766 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14768 static const char * const alt[16] =
14770 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14771 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14772 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14773 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14775 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14776 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14780 "fcomi\t{%y1, %0|%0, %y1}",
14781 "fcomip\t{%y1, %0|%0, %y1}",
14782 "fucomi\t{%y1, %0|%0, %y1}",
14783 "fucomip\t{%y1, %0|%0, %y1}",
14794 mask = eflags_p << 3;
14795 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14796 mask |= unordered_p << 1;
14797 mask |= stack_top_dies;
14799 gcc_assert (mask < 16);
14808 ix86_output_addr_vec_elt (FILE *file, int value)
14810 const char *directive = ASM_LONG;
14814 directive = ASM_QUAD;
14816 gcc_assert (!TARGET_64BIT);
14819 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14823 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14825 const char *directive = ASM_LONG;
14828 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14829 directive = ASM_QUAD;
14831 gcc_assert (!TARGET_64BIT);
14833 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14834 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14835 fprintf (file, "%s%s%d-%s%d\n",
14836 directive, LPREFIX, value, LPREFIX, rel);
14837 else if (HAVE_AS_GOTOFF_IN_DATA)
14838 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14840 else if (TARGET_MACHO)
14842 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14843 machopic_output_function_base_name (file);
14848 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14849 GOT_SYMBOL_NAME, LPREFIX, value);
14852 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14856 ix86_expand_clear (rtx dest)
14860 /* We play register width games, which are only valid after reload. */
14861 gcc_assert (reload_completed);
14863 /* Avoid HImode and its attendant prefix byte. */
14864 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14865 dest = gen_rtx_REG (SImode, REGNO (dest));
14866 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14868 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14869 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14871 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14872 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14878 /* X is an unchanging MEM. If it is a constant pool reference, return
14879 the constant pool rtx, else NULL. */
14882 maybe_get_pool_constant (rtx x)
14884 x = ix86_delegitimize_address (XEXP (x, 0));
14886 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14887 return get_pool_constant (x);
14893 ix86_expand_move (enum machine_mode mode, rtx operands[])
14896 enum tls_model model;
14901 if (GET_CODE (op1) == SYMBOL_REF)
14903 model = SYMBOL_REF_TLS_MODEL (op1);
14906 op1 = legitimize_tls_address (op1, model, true);
14907 op1 = force_operand (op1, op0);
14911 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14912 && SYMBOL_REF_DLLIMPORT_P (op1))
14913 op1 = legitimize_dllimport_symbol (op1, false);
14915 else if (GET_CODE (op1) == CONST
14916 && GET_CODE (XEXP (op1, 0)) == PLUS
14917 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14919 rtx addend = XEXP (XEXP (op1, 0), 1);
14920 rtx symbol = XEXP (XEXP (op1, 0), 0);
14923 model = SYMBOL_REF_TLS_MODEL (symbol);
14925 tmp = legitimize_tls_address (symbol, model, true);
14926 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14927 && SYMBOL_REF_DLLIMPORT_P (symbol))
14928 tmp = legitimize_dllimport_symbol (symbol, true);
14932 tmp = force_operand (tmp, NULL);
14933 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14934 op0, 1, OPTAB_DIRECT);
14940 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14942 if (TARGET_MACHO && !TARGET_64BIT)
14947 rtx temp = ((reload_in_progress
14948 || ((op0 && REG_P (op0))
14950 ? op0 : gen_reg_rtx (Pmode));
14951 op1 = machopic_indirect_data_reference (op1, temp);
14952 op1 = machopic_legitimize_pic_address (op1, mode,
14953 temp == op1 ? 0 : temp);
14955 else if (MACHOPIC_INDIRECT)
14956 op1 = machopic_indirect_data_reference (op1, 0);
14964 op1 = force_reg (Pmode, op1);
14965 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14967 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14968 op1 = legitimize_pic_address (op1, reg);
14977 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14978 || !push_operand (op0, mode))
14980 op1 = force_reg (mode, op1);
14982 if (push_operand (op0, mode)
14983 && ! general_no_elim_operand (op1, mode))
14984 op1 = copy_to_mode_reg (mode, op1);
14986 /* Force large constants in 64bit compilation into register
14987 to get them CSEed. */
14988 if (can_create_pseudo_p ()
14989 && (mode == DImode) && TARGET_64BIT
14990 && immediate_operand (op1, mode)
14991 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14992 && !register_operand (op0, mode)
14994 op1 = copy_to_mode_reg (mode, op1);
14996 if (can_create_pseudo_p ()
14997 && FLOAT_MODE_P (mode)
14998 && GET_CODE (op1) == CONST_DOUBLE)
15000 /* If we are loading a floating point constant to a register,
15001 force the value to memory now, since we'll get better code
15002 out the back end. */
15004 op1 = validize_mem (force_const_mem (mode, op1));
15005 if (!register_operand (op0, mode))
15007 rtx temp = gen_reg_rtx (mode);
15008 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15009 emit_move_insn (op0, temp);
15015 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15019 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15021 rtx op0 = operands[0], op1 = operands[1];
15022 unsigned int align = GET_MODE_ALIGNMENT (mode);
15024 /* Force constants other than zero into memory. We do not know how
15025 the instructions used to build constants modify the upper 64 bits
15026 of the register, once we have that information we may be able
15027 to handle some of them more efficiently. */
15028 if (can_create_pseudo_p ()
15029 && register_operand (op0, mode)
15030 && (CONSTANT_P (op1)
15031 || (GET_CODE (op1) == SUBREG
15032 && CONSTANT_P (SUBREG_REG (op1))))
15033 && !standard_sse_constant_p (op1))
15034 op1 = validize_mem (force_const_mem (mode, op1));
15036 /* We need to check memory alignment for SSE mode since attribute
15037 can make operands unaligned. */
15038 if (can_create_pseudo_p ()
15039 && SSE_REG_MODE_P (mode)
15040 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15041 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15045 /* ix86_expand_vector_move_misalign() does not like constants ... */
15046 if (CONSTANT_P (op1)
15047 || (GET_CODE (op1) == SUBREG
15048 && CONSTANT_P (SUBREG_REG (op1))))
15049 op1 = validize_mem (force_const_mem (mode, op1));
15051 /* ... nor both arguments in memory. */
15052 if (!register_operand (op0, mode)
15053 && !register_operand (op1, mode))
15054 op1 = force_reg (mode, op1);
15056 tmp[0] = op0; tmp[1] = op1;
15057 ix86_expand_vector_move_misalign (mode, tmp);
15061 /* Make operand1 a register if it isn't already. */
15062 if (can_create_pseudo_p ()
15063 && !register_operand (op0, mode)
15064 && !register_operand (op1, mode))
15066 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15070 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15073 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15074 straight to ix86_expand_vector_move. */
15075 /* Code generation for scalar reg-reg moves of single and double precision data:
15076 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15080 if (x86_sse_partial_reg_dependency == true)
15085 Code generation for scalar loads of double precision data:
15086 if (x86_sse_split_regs == true)
15087 movlpd mem, reg (gas syntax)
15091 Code generation for unaligned packed loads of single precision data
15092 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15093 if (x86_sse_unaligned_move_optimal)
15096 if (x86_sse_partial_reg_dependency == true)
15108 Code generation for unaligned packed loads of double precision data
15109 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15110 if (x86_sse_unaligned_move_optimal)
15113 if (x86_sse_split_regs == true)
15126 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15135 switch (GET_MODE_CLASS (mode))
15137 case MODE_VECTOR_INT:
15139 switch (GET_MODE_SIZE (mode))
15142 /* If we're optimizing for size, movups is the smallest. */
15143 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15145 op0 = gen_lowpart (V4SFmode, op0);
15146 op1 = gen_lowpart (V4SFmode, op1);
15147 emit_insn (gen_avx_movups (op0, op1));
15150 op0 = gen_lowpart (V16QImode, op0);
15151 op1 = gen_lowpart (V16QImode, op1);
15152 emit_insn (gen_avx_movdqu (op0, op1));
15155 op0 = gen_lowpart (V32QImode, op0);
15156 op1 = gen_lowpart (V32QImode, op1);
15157 emit_insn (gen_avx_movdqu256 (op0, op1));
15160 gcc_unreachable ();
15163 case MODE_VECTOR_FLOAT:
15164 op0 = gen_lowpart (mode, op0);
15165 op1 = gen_lowpart (mode, op1);
15170 emit_insn (gen_avx_movups (op0, op1));
15173 emit_insn (gen_avx_movups256 (op0, op1));
15176 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15178 op0 = gen_lowpart (V4SFmode, op0);
15179 op1 = gen_lowpart (V4SFmode, op1);
15180 emit_insn (gen_avx_movups (op0, op1));
15183 emit_insn (gen_avx_movupd (op0, op1));
15186 emit_insn (gen_avx_movupd256 (op0, op1));
15189 gcc_unreachable ();
15194 gcc_unreachable ();
15202 /* If we're optimizing for size, movups is the smallest. */
15203 if (optimize_insn_for_size_p ()
15204 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15206 op0 = gen_lowpart (V4SFmode, op0);
15207 op1 = gen_lowpart (V4SFmode, op1);
15208 emit_insn (gen_sse_movups (op0, op1));
15212 /* ??? If we have typed data, then it would appear that using
15213 movdqu is the only way to get unaligned data loaded with
15215 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15217 op0 = gen_lowpart (V16QImode, op0);
15218 op1 = gen_lowpart (V16QImode, op1);
15219 emit_insn (gen_sse2_movdqu (op0, op1));
15223 if (TARGET_SSE2 && mode == V2DFmode)
15227 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15229 op0 = gen_lowpart (V2DFmode, op0);
15230 op1 = gen_lowpart (V2DFmode, op1);
15231 emit_insn (gen_sse2_movupd (op0, op1));
15235 /* When SSE registers are split into halves, we can avoid
15236 writing to the top half twice. */
15237 if (TARGET_SSE_SPLIT_REGS)
15239 emit_clobber (op0);
15244 /* ??? Not sure about the best option for the Intel chips.
15245 The following would seem to satisfy; the register is
15246 entirely cleared, breaking the dependency chain. We
15247 then store to the upper half, with a dependency depth
15248 of one. A rumor has it that Intel recommends two movsd
15249 followed by an unpacklpd, but this is unconfirmed. And
15250 given that the dependency depth of the unpacklpd would
15251 still be one, I'm not sure why this would be better. */
15252 zero = CONST0_RTX (V2DFmode);
15255 m = adjust_address (op1, DFmode, 0);
15256 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15257 m = adjust_address (op1, DFmode, 8);
15258 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15262 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15264 op0 = gen_lowpart (V4SFmode, op0);
15265 op1 = gen_lowpart (V4SFmode, op1);
15266 emit_insn (gen_sse_movups (op0, op1));
15270 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15271 emit_move_insn (op0, CONST0_RTX (mode));
15273 emit_clobber (op0);
15275 if (mode != V4SFmode)
15276 op0 = gen_lowpart (V4SFmode, op0);
15277 m = adjust_address (op1, V2SFmode, 0);
15278 emit_insn (gen_sse_loadlps (op0, op0, m));
15279 m = adjust_address (op1, V2SFmode, 8);
15280 emit_insn (gen_sse_loadhps (op0, op0, m));
15283 else if (MEM_P (op0))
15285 /* If we're optimizing for size, movups is the smallest. */
15286 if (optimize_insn_for_size_p ()
15287 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15289 op0 = gen_lowpart (V4SFmode, op0);
15290 op1 = gen_lowpart (V4SFmode, op1);
15291 emit_insn (gen_sse_movups (op0, op1));
15295 /* ??? Similar to above, only less clear because of quote
15296 typeless stores unquote. */
15297 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15298 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15300 op0 = gen_lowpart (V16QImode, op0);
15301 op1 = gen_lowpart (V16QImode, op1);
15302 emit_insn (gen_sse2_movdqu (op0, op1));
15306 if (TARGET_SSE2 && mode == V2DFmode)
15308 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15310 op0 = gen_lowpart (V2DFmode, op0);
15311 op1 = gen_lowpart (V2DFmode, op1);
15312 emit_insn (gen_sse2_movupd (op0, op1));
15316 m = adjust_address (op0, DFmode, 0);
15317 emit_insn (gen_sse2_storelpd (m, op1));
15318 m = adjust_address (op0, DFmode, 8);
15319 emit_insn (gen_sse2_storehpd (m, op1));
15324 if (mode != V4SFmode)
15325 op1 = gen_lowpart (V4SFmode, op1);
15327 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15329 op0 = gen_lowpart (V4SFmode, op0);
15330 emit_insn (gen_sse_movups (op0, op1));
15334 m = adjust_address (op0, V2SFmode, 0);
15335 emit_insn (gen_sse_storelps (m, op1));
15336 m = adjust_address (op0, V2SFmode, 8);
15337 emit_insn (gen_sse_storehps (m, op1));
15342 gcc_unreachable ();
15345 /* Expand a push in MODE. This is some mode for which we do not support
15346 proper push instructions, at least from the registers that we expect
15347 the value to live in. */
15350 ix86_expand_push (enum machine_mode mode, rtx x)
15354 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15355 GEN_INT (-GET_MODE_SIZE (mode)),
15356 stack_pointer_rtx, 1, OPTAB_DIRECT);
15357 if (tmp != stack_pointer_rtx)
15358 emit_move_insn (stack_pointer_rtx, tmp);
15360 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15362 /* When we push an operand onto stack, it has to be aligned at least
15363 at the function argument boundary. However since we don't have
15364 the argument type, we can't determine the actual argument
15366 emit_move_insn (tmp, x);
15369 /* Helper function of ix86_fixup_binary_operands to canonicalize
15370 operand order. Returns true if the operands should be swapped. */
15373 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15376 rtx dst = operands[0];
15377 rtx src1 = operands[1];
15378 rtx src2 = operands[2];
15380 /* If the operation is not commutative, we can't do anything. */
15381 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15384 /* Highest priority is that src1 should match dst. */
15385 if (rtx_equal_p (dst, src1))
15387 if (rtx_equal_p (dst, src2))
15390 /* Next highest priority is that immediate constants come second. */
15391 if (immediate_operand (src2, mode))
15393 if (immediate_operand (src1, mode))
15396 /* Lowest priority is that memory references should come second. */
15406 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15407 destination to use for the operation. If different from the true
15408 destination in operands[0], a copy operation will be required. */
15411 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15414 rtx dst = operands[0];
15415 rtx src1 = operands[1];
15416 rtx src2 = operands[2];
15418 /* Canonicalize operand order. */
15419 if (ix86_swap_binary_operands_p (code, mode, operands))
15423 /* It is invalid to swap operands of different modes. */
15424 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15431 /* Both source operands cannot be in memory. */
15432 if (MEM_P (src1) && MEM_P (src2))
15434 /* Optimization: Only read from memory once. */
15435 if (rtx_equal_p (src1, src2))
15437 src2 = force_reg (mode, src2);
15441 src2 = force_reg (mode, src2);
15444 /* If the destination is memory, and we do not have matching source
15445 operands, do things in registers. */
15446 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15447 dst = gen_reg_rtx (mode);
15449 /* Source 1 cannot be a constant. */
15450 if (CONSTANT_P (src1))
15451 src1 = force_reg (mode, src1);
15453 /* Source 1 cannot be a non-matching memory. */
15454 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15455 src1 = force_reg (mode, src1);
15457 operands[1] = src1;
15458 operands[2] = src2;
15462 /* Similarly, but assume that the destination has already been
15463 set up properly. */
15466 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15467 enum machine_mode mode, rtx operands[])
15469 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15470 gcc_assert (dst == operands[0]);
15473 /* Attempt to expand a binary operator. Make the expansion closer to the
15474 actual machine, then just general_operand, which will allow 3 separate
15475 memory references (one output, two input) in a single insn. */
15478 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15481 rtx src1, src2, dst, op, clob;
15483 dst = ix86_fixup_binary_operands (code, mode, operands);
15484 src1 = operands[1];
15485 src2 = operands[2];
15487 /* Emit the instruction. */
15489 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15490 if (reload_in_progress)
15492 /* Reload doesn't know about the flags register, and doesn't know that
15493 it doesn't want to clobber it. We can only do this with PLUS. */
15494 gcc_assert (code == PLUS);
15499 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15500 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15503 /* Fix up the destination if needed. */
15504 if (dst != operands[0])
15505 emit_move_insn (operands[0], dst);
15508 /* Return TRUE or FALSE depending on whether the binary operator meets the
15509 appropriate constraints. */
15512 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15515 rtx dst = operands[0];
15516 rtx src1 = operands[1];
15517 rtx src2 = operands[2];
15519 /* Both source operands cannot be in memory. */
15520 if (MEM_P (src1) && MEM_P (src2))
15523 /* Canonicalize operand order for commutative operators. */
15524 if (ix86_swap_binary_operands_p (code, mode, operands))
15531 /* If the destination is memory, we must have a matching source operand. */
15532 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15535 /* Source 1 cannot be a constant. */
15536 if (CONSTANT_P (src1))
15539 /* Source 1 cannot be a non-matching memory. */
15540 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15542 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15543 return (code == AND
15546 || (TARGET_64BIT && mode == DImode))
15547 && CONST_INT_P (src2)
15548 && (INTVAL (src2) == 0xff
15549 || INTVAL (src2) == 0xffff));
15555 /* Attempt to expand a unary operator. Make the expansion closer to the
15556 actual machine, then just general_operand, which will allow 2 separate
15557 memory references (one output, one input) in a single insn. */
15560 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15563 int matching_memory;
15564 rtx src, dst, op, clob;
15569 /* If the destination is memory, and we do not have matching source
15570 operands, do things in registers. */
15571 matching_memory = 0;
15574 if (rtx_equal_p (dst, src))
15575 matching_memory = 1;
15577 dst = gen_reg_rtx (mode);
15580 /* When source operand is memory, destination must match. */
15581 if (MEM_P (src) && !matching_memory)
15582 src = force_reg (mode, src);
15584 /* Emit the instruction. */
15586 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15587 if (reload_in_progress || code == NOT)
15589 /* Reload doesn't know about the flags register, and doesn't know that
15590 it doesn't want to clobber it. */
15591 gcc_assert (code == NOT);
15596 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15597 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15600 /* Fix up the destination if needed. */
15601 if (dst != operands[0])
15602 emit_move_insn (operands[0], dst);
15605 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15606 divisor are within the the range [0-255]. */
15609 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15612 rtx end_label, qimode_label;
15613 rtx insn, div, mod;
15614 rtx scratch, tmp0, tmp1, tmp2;
15615 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15616 rtx (*gen_zero_extend) (rtx, rtx);
15617 rtx (*gen_test_ccno_1) (rtx, rtx);
15622 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15623 gen_test_ccno_1 = gen_testsi_ccno_1;
15624 gen_zero_extend = gen_zero_extendqisi2;
15627 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15628 gen_test_ccno_1 = gen_testdi_ccno_1;
15629 gen_zero_extend = gen_zero_extendqidi2;
15632 gcc_unreachable ();
15635 end_label = gen_label_rtx ();
15636 qimode_label = gen_label_rtx ();
15638 scratch = gen_reg_rtx (mode);
15640 /* Use 8bit unsigned divimod if dividend and divisor are within the
15641 the range [0-255]. */
15642 emit_move_insn (scratch, operands[2]);
15643 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15644 scratch, 1, OPTAB_DIRECT);
15645 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15646 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15647 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15648 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15649 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15651 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15652 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15653 JUMP_LABEL (insn) = qimode_label;
15655 /* Generate original signed/unsigned divimod. */
15656 div = gen_divmod4_1 (operands[0], operands[1],
15657 operands[2], operands[3]);
15660 /* Branch to the end. */
15661 emit_jump_insn (gen_jump (end_label));
15664 /* Generate 8bit unsigned divide. */
15665 emit_label (qimode_label);
15666 /* Don't use operands[0] for result of 8bit divide since not all
15667 registers support QImode ZERO_EXTRACT. */
15668 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15669 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15670 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15671 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15675 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15676 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15680 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15681 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15684 /* Extract remainder from AH. */
15685 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15686 if (REG_P (operands[1]))
15687 insn = emit_move_insn (operands[1], tmp1);
15690 /* Need a new scratch register since the old one has result
15692 scratch = gen_reg_rtx (mode);
15693 emit_move_insn (scratch, tmp1);
15694 insn = emit_move_insn (operands[1], scratch);
15696 set_unique_reg_note (insn, REG_EQUAL, mod);
15698 /* Zero extend quotient from AL. */
15699 tmp1 = gen_lowpart (QImode, tmp0);
15700 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15701 set_unique_reg_note (insn, REG_EQUAL, div);
15703 emit_label (end_label);
15706 #define LEA_SEARCH_THRESHOLD 12
15708 /* Search backward for non-agu definition of register number REGNO1
15709 or register number REGNO2 in INSN's basic block until
15710 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15711 2. Reach BB boundary, or
15712 3. Reach agu definition.
15713 Returns the distance between the non-agu definition point and INSN.
15714 If no definition point, returns -1. */
15717 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15720 basic_block bb = BLOCK_FOR_INSN (insn);
15723 enum attr_type insn_type;
15725 if (insn != BB_HEAD (bb))
15727 rtx prev = PREV_INSN (insn);
15728 while (prev && distance < LEA_SEARCH_THRESHOLD)
15730 if (NONDEBUG_INSN_P (prev))
15733 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15734 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15735 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15736 && (regno1 == DF_REF_REGNO (*def_rec)
15737 || regno2 == DF_REF_REGNO (*def_rec)))
15739 insn_type = get_attr_type (prev);
15740 if (insn_type != TYPE_LEA)
15744 if (prev == BB_HEAD (bb))
15746 prev = PREV_INSN (prev);
15750 if (distance < LEA_SEARCH_THRESHOLD)
15754 bool simple_loop = false;
15756 FOR_EACH_EDGE (e, ei, bb->preds)
15759 simple_loop = true;
15765 rtx prev = BB_END (bb);
15768 && distance < LEA_SEARCH_THRESHOLD)
15770 if (NONDEBUG_INSN_P (prev))
15773 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15774 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15775 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15776 && (regno1 == DF_REF_REGNO (*def_rec)
15777 || regno2 == DF_REF_REGNO (*def_rec)))
15779 insn_type = get_attr_type (prev);
15780 if (insn_type != TYPE_LEA)
15784 prev = PREV_INSN (prev);
15792 /* get_attr_type may modify recog data. We want to make sure
15793 that recog data is valid for instruction INSN, on which
15794 distance_non_agu_define is called. INSN is unchanged here. */
15795 extract_insn_cached (insn);
15799 /* Return the distance between INSN and the next insn that uses
15800 register number REGNO0 in memory address. Return -1 if no such
15801 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15804 distance_agu_use (unsigned int regno0, rtx insn)
15806 basic_block bb = BLOCK_FOR_INSN (insn);
15811 if (insn != BB_END (bb))
15813 rtx next = NEXT_INSN (insn);
15814 while (next && distance < LEA_SEARCH_THRESHOLD)
15816 if (NONDEBUG_INSN_P (next))
15820 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15821 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15822 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15823 && regno0 == DF_REF_REGNO (*use_rec))
15825 /* Return DISTANCE if OP0 is used in memory
15826 address in NEXT. */
15830 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15831 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15832 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15833 && regno0 == DF_REF_REGNO (*def_rec))
15835 /* Return -1 if OP0 is set in NEXT. */
15839 if (next == BB_END (bb))
15841 next = NEXT_INSN (next);
15845 if (distance < LEA_SEARCH_THRESHOLD)
15849 bool simple_loop = false;
15851 FOR_EACH_EDGE (e, ei, bb->succs)
15854 simple_loop = true;
15860 rtx next = BB_HEAD (bb);
15863 && distance < LEA_SEARCH_THRESHOLD)
15865 if (NONDEBUG_INSN_P (next))
15869 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15870 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15871 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15872 && regno0 == DF_REF_REGNO (*use_rec))
15874 /* Return DISTANCE if OP0 is used in memory
15875 address in NEXT. */
15879 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15880 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15881 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15882 && regno0 == DF_REF_REGNO (*def_rec))
15884 /* Return -1 if OP0 is set in NEXT. */
15889 next = NEXT_INSN (next);
15897 /* Define this macro to tune LEA priority vs ADD, it take effect when
15898 there is a dilemma of choicing LEA or ADD
15899 Negative value: ADD is more preferred than LEA
15901 Positive value: LEA is more preferred than ADD*/
15902 #define IX86_LEA_PRIORITY 2
15904 /* Return true if it is ok to optimize an ADD operation to LEA
15905 operation to avoid flag register consumation. For most processors,
15906 ADD is faster than LEA. For the processors like ATOM, if the
15907 destination register of LEA holds an actual address which will be
15908 used soon, LEA is better and otherwise ADD is better. */
15911 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15913 unsigned int regno0 = true_regnum (operands[0]);
15914 unsigned int regno1 = true_regnum (operands[1]);
15915 unsigned int regno2 = true_regnum (operands[2]);
15917 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15918 if (regno0 != regno1 && regno0 != regno2)
15921 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15925 int dist_define, dist_use;
15927 /* Return false if REGNO0 isn't used in memory address. */
15928 dist_use = distance_agu_use (regno0, insn);
15932 dist_define = distance_non_agu_define (regno1, regno2, insn);
15933 if (dist_define <= 0)
15936 /* If this insn has both backward non-agu dependence and forward
15937 agu dependence, the one with short distance take effect. */
15938 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15945 /* Return true if destination reg of SET_BODY is shift count of
15949 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15955 /* Retrieve destination of SET_BODY. */
15956 switch (GET_CODE (set_body))
15959 set_dest = SET_DEST (set_body);
15960 if (!set_dest || !REG_P (set_dest))
15964 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15965 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15973 /* Retrieve shift count of USE_BODY. */
15974 switch (GET_CODE (use_body))
15977 shift_rtx = XEXP (use_body, 1);
15980 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15981 if (ix86_dep_by_shift_count_body (set_body,
15982 XVECEXP (use_body, 0, i)))
15990 && (GET_CODE (shift_rtx) == ASHIFT
15991 || GET_CODE (shift_rtx) == LSHIFTRT
15992 || GET_CODE (shift_rtx) == ASHIFTRT
15993 || GET_CODE (shift_rtx) == ROTATE
15994 || GET_CODE (shift_rtx) == ROTATERT))
15996 rtx shift_count = XEXP (shift_rtx, 1);
15998 /* Return true if shift count is dest of SET_BODY. */
15999 if (REG_P (shift_count)
16000 && true_regnum (set_dest) == true_regnum (shift_count))
16007 /* Return true if destination reg of SET_INSN is shift count of
16011 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16013 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16014 PATTERN (use_insn));
16017 /* Return TRUE or FALSE depending on whether the unary operator meets the
16018 appropriate constraints. */
16021 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16022 enum machine_mode mode ATTRIBUTE_UNUSED,
16023 rtx operands[2] ATTRIBUTE_UNUSED)
16025 /* If one of operands is memory, source and destination must match. */
16026 if ((MEM_P (operands[0])
16027 || MEM_P (operands[1]))
16028 && ! rtx_equal_p (operands[0], operands[1]))
16033 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16034 are ok, keeping in mind the possible movddup alternative. */
16037 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16039 if (MEM_P (operands[0]))
16040 return rtx_equal_p (operands[0], operands[1 + high]);
16041 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16042 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16046 /* Post-reload splitter for converting an SF or DFmode value in an
16047 SSE register into an unsigned SImode. */
16050 ix86_split_convert_uns_si_sse (rtx operands[])
16052 enum machine_mode vecmode;
16053 rtx value, large, zero_or_two31, input, two31, x;
16055 large = operands[1];
16056 zero_or_two31 = operands[2];
16057 input = operands[3];
16058 two31 = operands[4];
16059 vecmode = GET_MODE (large);
16060 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16062 /* Load up the value into the low element. We must ensure that the other
16063 elements are valid floats -- zero is the easiest such value. */
16066 if (vecmode == V4SFmode)
16067 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16069 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16073 input = gen_rtx_REG (vecmode, REGNO (input));
16074 emit_move_insn (value, CONST0_RTX (vecmode));
16075 if (vecmode == V4SFmode)
16076 emit_insn (gen_sse_movss (value, value, input));
16078 emit_insn (gen_sse2_movsd (value, value, input));
16081 emit_move_insn (large, two31);
16082 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16084 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16085 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16087 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16088 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16090 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16091 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16093 large = gen_rtx_REG (V4SImode, REGNO (large));
16094 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16096 x = gen_rtx_REG (V4SImode, REGNO (value));
16097 if (vecmode == V4SFmode)
16098 emit_insn (gen_sse2_cvttps2dq (x, value));
16100 emit_insn (gen_sse2_cvttpd2dq (x, value));
16103 emit_insn (gen_xorv4si3 (value, value, large));
16106 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16107 Expects the 64-bit DImode to be supplied in a pair of integral
16108 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16109 -mfpmath=sse, !optimize_size only. */
16112 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16114 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16115 rtx int_xmm, fp_xmm;
16116 rtx biases, exponents;
16119 int_xmm = gen_reg_rtx (V4SImode);
16120 if (TARGET_INTER_UNIT_MOVES)
16121 emit_insn (gen_movdi_to_sse (int_xmm, input));
16122 else if (TARGET_SSE_SPLIT_REGS)
16124 emit_clobber (int_xmm);
16125 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16129 x = gen_reg_rtx (V2DImode);
16130 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16131 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16134 x = gen_rtx_CONST_VECTOR (V4SImode,
16135 gen_rtvec (4, GEN_INT (0x43300000UL),
16136 GEN_INT (0x45300000UL),
16137 const0_rtx, const0_rtx));
16138 exponents = validize_mem (force_const_mem (V4SImode, x));
16140 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16141 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16143 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16144 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16145 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16146 (0x1.0p84 + double(fp_value_hi_xmm)).
16147 Note these exponents differ by 32. */
16149 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16151 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16152 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16153 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16154 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16155 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16156 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16157 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16158 biases = validize_mem (force_const_mem (V2DFmode, biases));
16159 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16161 /* Add the upper and lower DFmode values together. */
16163 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16166 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16167 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16168 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16171 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16174 /* Not used, but eases macroization of patterns. */
16176 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16177 rtx input ATTRIBUTE_UNUSED)
16179 gcc_unreachable ();
16182 /* Convert an unsigned SImode value into a DFmode. Only currently used
16183 for SSE, but applicable anywhere. */
16186 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16188 REAL_VALUE_TYPE TWO31r;
16191 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16192 NULL, 1, OPTAB_DIRECT);
16194 fp = gen_reg_rtx (DFmode);
16195 emit_insn (gen_floatsidf2 (fp, x));
16197 real_ldexp (&TWO31r, &dconst1, 31);
16198 x = const_double_from_real_value (TWO31r, DFmode);
16200 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16202 emit_move_insn (target, x);
16205 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16206 32-bit mode; otherwise we have a direct convert instruction. */
16209 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16211 REAL_VALUE_TYPE TWO32r;
16212 rtx fp_lo, fp_hi, x;
16214 fp_lo = gen_reg_rtx (DFmode);
16215 fp_hi = gen_reg_rtx (DFmode);
16217 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16219 real_ldexp (&TWO32r, &dconst1, 32);
16220 x = const_double_from_real_value (TWO32r, DFmode);
16221 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16223 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16225 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16228 emit_move_insn (target, x);
16231 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16232 For x86_32, -mfpmath=sse, !optimize_size only. */
16234 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16236 REAL_VALUE_TYPE ONE16r;
16237 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16239 real_ldexp (&ONE16r, &dconst1, 16);
16240 x = const_double_from_real_value (ONE16r, SFmode);
16241 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16242 NULL, 0, OPTAB_DIRECT);
16243 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16244 NULL, 0, OPTAB_DIRECT);
16245 fp_hi = gen_reg_rtx (SFmode);
16246 fp_lo = gen_reg_rtx (SFmode);
16247 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16248 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16249 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16251 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16253 if (!rtx_equal_p (target, fp_hi))
16254 emit_move_insn (target, fp_hi);
16257 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16258 then replicate the value for all elements of the vector
16262 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16269 v = gen_rtvec (4, value, value, value, value);
16270 return gen_rtx_CONST_VECTOR (V4SImode, v);
16274 v = gen_rtvec (2, value, value);
16275 return gen_rtx_CONST_VECTOR (V2DImode, v);
16279 v = gen_rtvec (8, value, value, value, value,
16280 value, value, value, value);
16282 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16283 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16284 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16285 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16286 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16290 v = gen_rtvec (4, value, value, value, value);
16292 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16293 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16294 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16298 v = gen_rtvec (4, value, value, value, value);
16300 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16301 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16302 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16306 v = gen_rtvec (2, value, value);
16308 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16309 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16312 gcc_unreachable ();
16316 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16317 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16318 for an SSE register. If VECT is true, then replicate the mask for
16319 all elements of the vector register. If INVERT is true, then create
16320 a mask excluding the sign bit. */
16323 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16325 enum machine_mode vec_mode, imode;
16326 HOST_WIDE_INT hi, lo;
16331 /* Find the sign bit, sign extended to 2*HWI. */
16338 mode = GET_MODE_INNER (mode);
16340 lo = 0x80000000, hi = lo < 0;
16347 mode = GET_MODE_INNER (mode);
16349 if (HOST_BITS_PER_WIDE_INT >= 64)
16350 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16352 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16357 vec_mode = VOIDmode;
16358 if (HOST_BITS_PER_WIDE_INT >= 64)
16361 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16368 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16372 lo = ~lo, hi = ~hi;
16378 mask = immed_double_const (lo, hi, imode);
16380 vec = gen_rtvec (2, v, mask);
16381 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16382 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16389 gcc_unreachable ();
16393 lo = ~lo, hi = ~hi;
16395 /* Force this value into the low part of a fp vector constant. */
16396 mask = immed_double_const (lo, hi, imode);
16397 mask = gen_lowpart (mode, mask);
16399 if (vec_mode == VOIDmode)
16400 return force_reg (mode, mask);
16402 v = ix86_build_const_vector (vec_mode, vect, mask);
16403 return force_reg (vec_mode, v);
16406 /* Generate code for floating point ABS or NEG. */
16409 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16412 rtx mask, set, dst, src;
16413 bool use_sse = false;
16414 bool vector_mode = VECTOR_MODE_P (mode);
16415 enum machine_mode vmode = mode;
16419 else if (mode == TFmode)
16421 else if (TARGET_SSE_MATH)
16423 use_sse = SSE_FLOAT_MODE_P (mode);
16424 if (mode == SFmode)
16426 else if (mode == DFmode)
16430 /* NEG and ABS performed with SSE use bitwise mask operations.
16431 Create the appropriate mask now. */
16433 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16440 set = gen_rtx_fmt_e (code, mode, src);
16441 set = gen_rtx_SET (VOIDmode, dst, set);
16448 use = gen_rtx_USE (VOIDmode, mask);
16450 par = gen_rtvec (2, set, use);
16453 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16454 par = gen_rtvec (3, set, use, clob);
16456 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16462 /* Expand a copysign operation. Special case operand 0 being a constant. */
16465 ix86_expand_copysign (rtx operands[])
16467 enum machine_mode mode, vmode;
16468 rtx dest, op0, op1, mask, nmask;
16470 dest = operands[0];
16474 mode = GET_MODE (dest);
16476 if (mode == SFmode)
16478 else if (mode == DFmode)
16483 if (GET_CODE (op0) == CONST_DOUBLE)
16485 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16487 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16488 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16490 if (mode == SFmode || mode == DFmode)
16492 if (op0 == CONST0_RTX (mode))
16493 op0 = CONST0_RTX (vmode);
16496 rtx v = ix86_build_const_vector (vmode, false, op0);
16498 op0 = force_reg (vmode, v);
16501 else if (op0 != CONST0_RTX (mode))
16502 op0 = force_reg (mode, op0);
16504 mask = ix86_build_signbit_mask (vmode, 0, 0);
16506 if (mode == SFmode)
16507 copysign_insn = gen_copysignsf3_const;
16508 else if (mode == DFmode)
16509 copysign_insn = gen_copysigndf3_const;
16511 copysign_insn = gen_copysigntf3_const;
16513 emit_insn (copysign_insn (dest, op0, op1, mask));
16517 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16519 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16520 mask = ix86_build_signbit_mask (vmode, 0, 0);
16522 if (mode == SFmode)
16523 copysign_insn = gen_copysignsf3_var;
16524 else if (mode == DFmode)
16525 copysign_insn = gen_copysigndf3_var;
16527 copysign_insn = gen_copysigntf3_var;
16529 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16533 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16534 be a constant, and so has already been expanded into a vector constant. */
16537 ix86_split_copysign_const (rtx operands[])
16539 enum machine_mode mode, vmode;
16540 rtx dest, op0, mask, x;
16542 dest = operands[0];
16544 mask = operands[3];
16546 mode = GET_MODE (dest);
16547 vmode = GET_MODE (mask);
16549 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16550 x = gen_rtx_AND (vmode, dest, mask);
16551 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16553 if (op0 != CONST0_RTX (vmode))
16555 x = gen_rtx_IOR (vmode, dest, op0);
16556 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16560 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16561 so we have to do two masks. */
16564 ix86_split_copysign_var (rtx operands[])
16566 enum machine_mode mode, vmode;
16567 rtx dest, scratch, op0, op1, mask, nmask, x;
16569 dest = operands[0];
16570 scratch = operands[1];
16573 nmask = operands[4];
16574 mask = operands[5];
16576 mode = GET_MODE (dest);
16577 vmode = GET_MODE (mask);
16579 if (rtx_equal_p (op0, op1))
16581 /* Shouldn't happen often (it's useless, obviously), but when it does
16582 we'd generate incorrect code if we continue below. */
16583 emit_move_insn (dest, op0);
16587 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16589 gcc_assert (REGNO (op1) == REGNO (scratch));
16591 x = gen_rtx_AND (vmode, scratch, mask);
16592 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16595 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16596 x = gen_rtx_NOT (vmode, dest);
16597 x = gen_rtx_AND (vmode, x, op0);
16598 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16602 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16604 x = gen_rtx_AND (vmode, scratch, mask);
16606 else /* alternative 2,4 */
16608 gcc_assert (REGNO (mask) == REGNO (scratch));
16609 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16610 x = gen_rtx_AND (vmode, scratch, op1);
16612 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16614 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16616 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16617 x = gen_rtx_AND (vmode, dest, nmask);
16619 else /* alternative 3,4 */
16621 gcc_assert (REGNO (nmask) == REGNO (dest));
16623 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16624 x = gen_rtx_AND (vmode, dest, op0);
16626 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16629 x = gen_rtx_IOR (vmode, dest, scratch);
16630 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16633 /* Return TRUE or FALSE depending on whether the first SET in INSN
16634 has source and destination with matching CC modes, and that the
16635 CC mode is at least as constrained as REQ_MODE. */
16638 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16641 enum machine_mode set_mode;
16643 set = PATTERN (insn);
16644 if (GET_CODE (set) == PARALLEL)
16645 set = XVECEXP (set, 0, 0);
16646 gcc_assert (GET_CODE (set) == SET);
16647 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16649 set_mode = GET_MODE (SET_DEST (set));
16653 if (req_mode != CCNOmode
16654 && (req_mode != CCmode
16655 || XEXP (SET_SRC (set), 1) != const0_rtx))
16659 if (req_mode == CCGCmode)
16663 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16667 if (req_mode == CCZmode)
16678 gcc_unreachable ();
16681 return GET_MODE (SET_SRC (set)) == set_mode;
16684 /* Generate insn patterns to do an integer compare of OPERANDS. */
16687 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16689 enum machine_mode cmpmode;
16692 cmpmode = SELECT_CC_MODE (code, op0, op1);
16693 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16695 /* This is very simple, but making the interface the same as in the
16696 FP case makes the rest of the code easier. */
16697 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16698 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16700 /* Return the test that should be put into the flags user, i.e.
16701 the bcc, scc, or cmov instruction. */
16702 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16705 /* Figure out whether to use ordered or unordered fp comparisons.
16706 Return the appropriate mode to use. */
16709 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16711 /* ??? In order to make all comparisons reversible, we do all comparisons
16712 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16713 all forms trapping and nontrapping comparisons, we can make inequality
16714 comparisons trapping again, since it results in better code when using
16715 FCOM based compares. */
16716 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16720 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16722 enum machine_mode mode = GET_MODE (op0);
16724 if (SCALAR_FLOAT_MODE_P (mode))
16726 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16727 return ix86_fp_compare_mode (code);
16732 /* Only zero flag is needed. */
16733 case EQ: /* ZF=0 */
16734 case NE: /* ZF!=0 */
16736 /* Codes needing carry flag. */
16737 case GEU: /* CF=0 */
16738 case LTU: /* CF=1 */
16739 /* Detect overflow checks. They need just the carry flag. */
16740 if (GET_CODE (op0) == PLUS
16741 && rtx_equal_p (op1, XEXP (op0, 0)))
16745 case GTU: /* CF=0 & ZF=0 */
16746 case LEU: /* CF=1 | ZF=1 */
16747 /* Detect overflow checks. They need just the carry flag. */
16748 if (GET_CODE (op0) == MINUS
16749 && rtx_equal_p (op1, XEXP (op0, 0)))
16753 /* Codes possibly doable only with sign flag when
16754 comparing against zero. */
16755 case GE: /* SF=OF or SF=0 */
16756 case LT: /* SF<>OF or SF=1 */
16757 if (op1 == const0_rtx)
16760 /* For other cases Carry flag is not required. */
16762 /* Codes doable only with sign flag when comparing
16763 against zero, but we miss jump instruction for it
16764 so we need to use relational tests against overflow
16765 that thus needs to be zero. */
16766 case GT: /* ZF=0 & SF=OF */
16767 case LE: /* ZF=1 | SF<>OF */
16768 if (op1 == const0_rtx)
16772 /* strcmp pattern do (use flags) and combine may ask us for proper
16777 gcc_unreachable ();
16781 /* Return the fixed registers used for condition codes. */
16784 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16791 /* If two condition code modes are compatible, return a condition code
16792 mode which is compatible with both. Otherwise, return
16795 static enum machine_mode
16796 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16801 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16804 if ((m1 == CCGCmode && m2 == CCGOCmode)
16805 || (m1 == CCGOCmode && m2 == CCGCmode))
16811 gcc_unreachable ();
16841 /* These are only compatible with themselves, which we already
16848 /* Return a comparison we can do and that it is equivalent to
16849 swap_condition (code) apart possibly from orderedness.
16850 But, never change orderedness if TARGET_IEEE_FP, returning
16851 UNKNOWN in that case if necessary. */
16853 static enum rtx_code
16854 ix86_fp_swap_condition (enum rtx_code code)
16858 case GT: /* GTU - CF=0 & ZF=0 */
16859 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16860 case GE: /* GEU - CF=0 */
16861 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16862 case UNLT: /* LTU - CF=1 */
16863 return TARGET_IEEE_FP ? UNKNOWN : GT;
16864 case UNLE: /* LEU - CF=1 | ZF=1 */
16865 return TARGET_IEEE_FP ? UNKNOWN : GE;
16867 return swap_condition (code);
16871 /* Return cost of comparison CODE using the best strategy for performance.
16872 All following functions do use number of instructions as a cost metrics.
16873 In future this should be tweaked to compute bytes for optimize_size and
16874 take into account performance of various instructions on various CPUs. */
16877 ix86_fp_comparison_cost (enum rtx_code code)
16881 /* The cost of code using bit-twiddling on %ah. */
16898 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16902 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16905 gcc_unreachable ();
16908 switch (ix86_fp_comparison_strategy (code))
16910 case IX86_FPCMP_COMI:
16911 return arith_cost > 4 ? 3 : 2;
16912 case IX86_FPCMP_SAHF:
16913 return arith_cost > 4 ? 4 : 3;
16919 /* Return strategy to use for floating-point. We assume that fcomi is always
16920 preferrable where available, since that is also true when looking at size
16921 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16923 enum ix86_fpcmp_strategy
16924 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16926 /* Do fcomi/sahf based test when profitable. */
16929 return IX86_FPCMP_COMI;
16931 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16932 return IX86_FPCMP_SAHF;
16934 return IX86_FPCMP_ARITH;
16937 /* Swap, force into registers, or otherwise massage the two operands
16938 to a fp comparison. The operands are updated in place; the new
16939 comparison code is returned. */
16941 static enum rtx_code
16942 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16944 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16945 rtx op0 = *pop0, op1 = *pop1;
16946 enum machine_mode op_mode = GET_MODE (op0);
16947 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16949 /* All of the unordered compare instructions only work on registers.
16950 The same is true of the fcomi compare instructions. The XFmode
16951 compare instructions require registers except when comparing
16952 against zero or when converting operand 1 from fixed point to
16956 && (fpcmp_mode == CCFPUmode
16957 || (op_mode == XFmode
16958 && ! (standard_80387_constant_p (op0) == 1
16959 || standard_80387_constant_p (op1) == 1)
16960 && GET_CODE (op1) != FLOAT)
16961 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16963 op0 = force_reg (op_mode, op0);
16964 op1 = force_reg (op_mode, op1);
16968 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16969 things around if they appear profitable, otherwise force op0
16970 into a register. */
16972 if (standard_80387_constant_p (op0) == 0
16974 && ! (standard_80387_constant_p (op1) == 0
16977 enum rtx_code new_code = ix86_fp_swap_condition (code);
16978 if (new_code != UNKNOWN)
16981 tmp = op0, op0 = op1, op1 = tmp;
16987 op0 = force_reg (op_mode, op0);
16989 if (CONSTANT_P (op1))
16991 int tmp = standard_80387_constant_p (op1);
16993 op1 = validize_mem (force_const_mem (op_mode, op1));
16997 op1 = force_reg (op_mode, op1);
17000 op1 = force_reg (op_mode, op1);
17004 /* Try to rearrange the comparison to make it cheaper. */
17005 if (ix86_fp_comparison_cost (code)
17006 > ix86_fp_comparison_cost (swap_condition (code))
17007 && (REG_P (op1) || can_create_pseudo_p ()))
17010 tmp = op0, op0 = op1, op1 = tmp;
17011 code = swap_condition (code);
17013 op0 = force_reg (op_mode, op0);
17021 /* Convert comparison codes we use to represent FP comparison to integer
17022 code that will result in proper branch. Return UNKNOWN if no such code
17026 ix86_fp_compare_code_to_integer (enum rtx_code code)
17055 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17058 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17060 enum machine_mode fpcmp_mode, intcmp_mode;
17063 fpcmp_mode = ix86_fp_compare_mode (code);
17064 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17066 /* Do fcomi/sahf based test when profitable. */
17067 switch (ix86_fp_comparison_strategy (code))
17069 case IX86_FPCMP_COMI:
17070 intcmp_mode = fpcmp_mode;
17071 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17072 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17077 case IX86_FPCMP_SAHF:
17078 intcmp_mode = fpcmp_mode;
17079 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17080 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17084 scratch = gen_reg_rtx (HImode);
17085 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17086 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17089 case IX86_FPCMP_ARITH:
17090 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17091 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17092 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17094 scratch = gen_reg_rtx (HImode);
17095 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17097 /* In the unordered case, we have to check C2 for NaN's, which
17098 doesn't happen to work out to anything nice combination-wise.
17099 So do some bit twiddling on the value we've got in AH to come
17100 up with an appropriate set of condition codes. */
17102 intcmp_mode = CCNOmode;
17107 if (code == GT || !TARGET_IEEE_FP)
17109 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17114 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17115 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17116 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17117 intcmp_mode = CCmode;
17123 if (code == LT && TARGET_IEEE_FP)
17125 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17126 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17127 intcmp_mode = CCmode;
17132 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17138 if (code == GE || !TARGET_IEEE_FP)
17140 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17145 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17146 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17152 if (code == LE && TARGET_IEEE_FP)
17154 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17155 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17156 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17157 intcmp_mode = CCmode;
17162 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17168 if (code == EQ && TARGET_IEEE_FP)
17170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17172 intcmp_mode = CCmode;
17177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17183 if (code == NE && TARGET_IEEE_FP)
17185 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17186 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17202 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17207 gcc_unreachable ();
17215 /* Return the test that should be put into the flags user, i.e.
17216 the bcc, scc, or cmov instruction. */
17217 return gen_rtx_fmt_ee (code, VOIDmode,
17218 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17223 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17227 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17228 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17230 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17232 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17233 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17236 ret = ix86_expand_int_compare (code, op0, op1);
17242 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17244 enum machine_mode mode = GET_MODE (op0);
17256 tmp = ix86_expand_compare (code, op0, op1);
17257 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17258 gen_rtx_LABEL_REF (VOIDmode, label),
17260 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17267 /* Expand DImode branch into multiple compare+branch. */
17269 rtx lo[2], hi[2], label2;
17270 enum rtx_code code1, code2, code3;
17271 enum machine_mode submode;
17273 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17275 tmp = op0, op0 = op1, op1 = tmp;
17276 code = swap_condition (code);
17279 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17280 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17282 submode = mode == DImode ? SImode : DImode;
17284 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17285 avoid two branches. This costs one extra insn, so disable when
17286 optimizing for size. */
17288 if ((code == EQ || code == NE)
17289 && (!optimize_insn_for_size_p ()
17290 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17295 if (hi[1] != const0_rtx)
17296 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17297 NULL_RTX, 0, OPTAB_WIDEN);
17300 if (lo[1] != const0_rtx)
17301 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17302 NULL_RTX, 0, OPTAB_WIDEN);
17304 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17305 NULL_RTX, 0, OPTAB_WIDEN);
17307 ix86_expand_branch (code, tmp, const0_rtx, label);
17311 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17312 op1 is a constant and the low word is zero, then we can just
17313 examine the high word. Similarly for low word -1 and
17314 less-or-equal-than or greater-than. */
17316 if (CONST_INT_P (hi[1]))
17319 case LT: case LTU: case GE: case GEU:
17320 if (lo[1] == const0_rtx)
17322 ix86_expand_branch (code, hi[0], hi[1], label);
17326 case LE: case LEU: case GT: case GTU:
17327 if (lo[1] == constm1_rtx)
17329 ix86_expand_branch (code, hi[0], hi[1], label);
17337 /* Otherwise, we need two or three jumps. */
17339 label2 = gen_label_rtx ();
17342 code2 = swap_condition (code);
17343 code3 = unsigned_condition (code);
17347 case LT: case GT: case LTU: case GTU:
17350 case LE: code1 = LT; code2 = GT; break;
17351 case GE: code1 = GT; code2 = LT; break;
17352 case LEU: code1 = LTU; code2 = GTU; break;
17353 case GEU: code1 = GTU; code2 = LTU; break;
17355 case EQ: code1 = UNKNOWN; code2 = NE; break;
17356 case NE: code2 = UNKNOWN; break;
17359 gcc_unreachable ();
17364 * if (hi(a) < hi(b)) goto true;
17365 * if (hi(a) > hi(b)) goto false;
17366 * if (lo(a) < lo(b)) goto true;
17370 if (code1 != UNKNOWN)
17371 ix86_expand_branch (code1, hi[0], hi[1], label);
17372 if (code2 != UNKNOWN)
17373 ix86_expand_branch (code2, hi[0], hi[1], label2);
17375 ix86_expand_branch (code3, lo[0], lo[1], label);
17377 if (code2 != UNKNOWN)
17378 emit_label (label2);
17383 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17388 /* Split branch based on floating point condition. */
17390 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17391 rtx target1, rtx target2, rtx tmp, rtx pushed)
17396 if (target2 != pc_rtx)
17399 code = reverse_condition_maybe_unordered (code);
17404 condition = ix86_expand_fp_compare (code, op1, op2,
17407 /* Remove pushed operand from stack. */
17409 ix86_free_from_memory (GET_MODE (pushed));
17411 i = emit_jump_insn (gen_rtx_SET
17413 gen_rtx_IF_THEN_ELSE (VOIDmode,
17414 condition, target1, target2)));
17415 if (split_branch_probability >= 0)
17416 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17420 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17424 gcc_assert (GET_MODE (dest) == QImode);
17426 ret = ix86_expand_compare (code, op0, op1);
17427 PUT_MODE (ret, QImode);
17428 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17431 /* Expand comparison setting or clearing carry flag. Return true when
17432 successful and set pop for the operation. */
17434 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17436 enum machine_mode mode =
17437 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17439 /* Do not handle double-mode compares that go through special path. */
17440 if (mode == (TARGET_64BIT ? TImode : DImode))
17443 if (SCALAR_FLOAT_MODE_P (mode))
17445 rtx compare_op, compare_seq;
17447 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17449 /* Shortcut: following common codes never translate
17450 into carry flag compares. */
17451 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17452 || code == ORDERED || code == UNORDERED)
17455 /* These comparisons require zero flag; swap operands so they won't. */
17456 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17457 && !TARGET_IEEE_FP)
17462 code = swap_condition (code);
17465 /* Try to expand the comparison and verify that we end up with
17466 carry flag based comparison. This fails to be true only when
17467 we decide to expand comparison using arithmetic that is not
17468 too common scenario. */
17470 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17471 compare_seq = get_insns ();
17474 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17475 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17476 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17478 code = GET_CODE (compare_op);
17480 if (code != LTU && code != GEU)
17483 emit_insn (compare_seq);
17488 if (!INTEGRAL_MODE_P (mode))
17497 /* Convert a==0 into (unsigned)a<1. */
17500 if (op1 != const0_rtx)
17503 code = (code == EQ ? LTU : GEU);
17506 /* Convert a>b into b<a or a>=b-1. */
17509 if (CONST_INT_P (op1))
17511 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17512 /* Bail out on overflow. We still can swap operands but that
17513 would force loading of the constant into register. */
17514 if (op1 == const0_rtx
17515 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17517 code = (code == GTU ? GEU : LTU);
17524 code = (code == GTU ? LTU : GEU);
17528 /* Convert a>=0 into (unsigned)a<0x80000000. */
17531 if (mode == DImode || op1 != const0_rtx)
17533 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17534 code = (code == LT ? GEU : LTU);
17538 if (mode == DImode || op1 != constm1_rtx)
17540 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17541 code = (code == LE ? GEU : LTU);
17547 /* Swapping operands may cause constant to appear as first operand. */
17548 if (!nonimmediate_operand (op0, VOIDmode))
17550 if (!can_create_pseudo_p ())
17552 op0 = force_reg (mode, op0);
17554 *pop = ix86_expand_compare (code, op0, op1);
17555 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17560 ix86_expand_int_movcc (rtx operands[])
17562 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17563 rtx compare_seq, compare_op;
17564 enum machine_mode mode = GET_MODE (operands[0]);
17565 bool sign_bit_compare_p = false;
17566 rtx op0 = XEXP (operands[1], 0);
17567 rtx op1 = XEXP (operands[1], 1);
17570 compare_op = ix86_expand_compare (code, op0, op1);
17571 compare_seq = get_insns ();
17574 compare_code = GET_CODE (compare_op);
17576 if ((op1 == const0_rtx && (code == GE || code == LT))
17577 || (op1 == constm1_rtx && (code == GT || code == LE)))
17578 sign_bit_compare_p = true;
17580 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17581 HImode insns, we'd be swallowed in word prefix ops. */
17583 if ((mode != HImode || TARGET_FAST_PREFIX)
17584 && (mode != (TARGET_64BIT ? TImode : DImode))
17585 && CONST_INT_P (operands[2])
17586 && CONST_INT_P (operands[3]))
17588 rtx out = operands[0];
17589 HOST_WIDE_INT ct = INTVAL (operands[2]);
17590 HOST_WIDE_INT cf = INTVAL (operands[3]);
17591 HOST_WIDE_INT diff;
17594 /* Sign bit compares are better done using shifts than we do by using
17596 if (sign_bit_compare_p
17597 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17599 /* Detect overlap between destination and compare sources. */
17602 if (!sign_bit_compare_p)
17605 bool fpcmp = false;
17607 compare_code = GET_CODE (compare_op);
17609 flags = XEXP (compare_op, 0);
17611 if (GET_MODE (flags) == CCFPmode
17612 || GET_MODE (flags) == CCFPUmode)
17616 = ix86_fp_compare_code_to_integer (compare_code);
17619 /* To simplify rest of code, restrict to the GEU case. */
17620 if (compare_code == LTU)
17622 HOST_WIDE_INT tmp = ct;
17625 compare_code = reverse_condition (compare_code);
17626 code = reverse_condition (code);
17631 PUT_CODE (compare_op,
17632 reverse_condition_maybe_unordered
17633 (GET_CODE (compare_op)));
17635 PUT_CODE (compare_op,
17636 reverse_condition (GET_CODE (compare_op)));
17640 if (reg_overlap_mentioned_p (out, op0)
17641 || reg_overlap_mentioned_p (out, op1))
17642 tmp = gen_reg_rtx (mode);
17644 if (mode == DImode)
17645 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17647 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17648 flags, compare_op));
17652 if (code == GT || code == GE)
17653 code = reverse_condition (code);
17656 HOST_WIDE_INT tmp = ct;
17661 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17674 tmp = expand_simple_binop (mode, PLUS,
17676 copy_rtx (tmp), 1, OPTAB_DIRECT);
17687 tmp = expand_simple_binop (mode, IOR,
17689 copy_rtx (tmp), 1, OPTAB_DIRECT);
17691 else if (diff == -1 && ct)
17701 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17703 tmp = expand_simple_binop (mode, PLUS,
17704 copy_rtx (tmp), GEN_INT (cf),
17705 copy_rtx (tmp), 1, OPTAB_DIRECT);
17713 * andl cf - ct, dest
17723 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17726 tmp = expand_simple_binop (mode, AND,
17728 gen_int_mode (cf - ct, mode),
17729 copy_rtx (tmp), 1, OPTAB_DIRECT);
17731 tmp = expand_simple_binop (mode, PLUS,
17732 copy_rtx (tmp), GEN_INT (ct),
17733 copy_rtx (tmp), 1, OPTAB_DIRECT);
17736 if (!rtx_equal_p (tmp, out))
17737 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17744 enum machine_mode cmp_mode = GET_MODE (op0);
17747 tmp = ct, ct = cf, cf = tmp;
17750 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17752 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17754 /* We may be reversing unordered compare to normal compare, that
17755 is not valid in general (we may convert non-trapping condition
17756 to trapping one), however on i386 we currently emit all
17757 comparisons unordered. */
17758 compare_code = reverse_condition_maybe_unordered (compare_code);
17759 code = reverse_condition_maybe_unordered (code);
17763 compare_code = reverse_condition (compare_code);
17764 code = reverse_condition (code);
17768 compare_code = UNKNOWN;
17769 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17770 && CONST_INT_P (op1))
17772 if (op1 == const0_rtx
17773 && (code == LT || code == GE))
17774 compare_code = code;
17775 else if (op1 == constm1_rtx)
17779 else if (code == GT)
17784 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17785 if (compare_code != UNKNOWN
17786 && GET_MODE (op0) == GET_MODE (out)
17787 && (cf == -1 || ct == -1))
17789 /* If lea code below could be used, only optimize
17790 if it results in a 2 insn sequence. */
17792 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17793 || diff == 3 || diff == 5 || diff == 9)
17794 || (compare_code == LT && ct == -1)
17795 || (compare_code == GE && cf == -1))
17798 * notl op1 (if necessary)
17806 code = reverse_condition (code);
17809 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17811 out = expand_simple_binop (mode, IOR,
17813 out, 1, OPTAB_DIRECT);
17814 if (out != operands[0])
17815 emit_move_insn (operands[0], out);
17822 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17823 || diff == 3 || diff == 5 || diff == 9)
17824 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17826 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17832 * lea cf(dest*(ct-cf)),dest
17836 * This also catches the degenerate setcc-only case.
17842 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17845 /* On x86_64 the lea instruction operates on Pmode, so we need
17846 to get arithmetics done in proper mode to match. */
17848 tmp = copy_rtx (out);
17852 out1 = copy_rtx (out);
17853 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17857 tmp = gen_rtx_PLUS (mode, tmp, out1);
17863 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17866 if (!rtx_equal_p (tmp, out))
17869 out = force_operand (tmp, copy_rtx (out));
17871 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17873 if (!rtx_equal_p (out, operands[0]))
17874 emit_move_insn (operands[0], copy_rtx (out));
17880 * General case: Jumpful:
17881 * xorl dest,dest cmpl op1, op2
17882 * cmpl op1, op2 movl ct, dest
17883 * setcc dest jcc 1f
17884 * decl dest movl cf, dest
17885 * andl (cf-ct),dest 1:
17888 * Size 20. Size 14.
17890 * This is reasonably steep, but branch mispredict costs are
17891 * high on modern cpus, so consider failing only if optimizing
17895 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17896 && BRANCH_COST (optimize_insn_for_speed_p (),
17901 enum machine_mode cmp_mode = GET_MODE (op0);
17906 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17908 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17910 /* We may be reversing unordered compare to normal compare,
17911 that is not valid in general (we may convert non-trapping
17912 condition to trapping one), however on i386 we currently
17913 emit all comparisons unordered. */
17914 code = reverse_condition_maybe_unordered (code);
17918 code = reverse_condition (code);
17919 if (compare_code != UNKNOWN)
17920 compare_code = reverse_condition (compare_code);
17924 if (compare_code != UNKNOWN)
17926 /* notl op1 (if needed)
17931 For x < 0 (resp. x <= -1) there will be no notl,
17932 so if possible swap the constants to get rid of the
17934 True/false will be -1/0 while code below (store flag
17935 followed by decrement) is 0/-1, so the constants need
17936 to be exchanged once more. */
17938 if (compare_code == GE || !cf)
17940 code = reverse_condition (code);
17945 HOST_WIDE_INT tmp = cf;
17950 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17954 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17956 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17958 copy_rtx (out), 1, OPTAB_DIRECT);
17961 out = expand_simple_binop (mode, AND, copy_rtx (out),
17962 gen_int_mode (cf - ct, mode),
17963 copy_rtx (out), 1, OPTAB_DIRECT);
17965 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17966 copy_rtx (out), 1, OPTAB_DIRECT);
17967 if (!rtx_equal_p (out, operands[0]))
17968 emit_move_insn (operands[0], copy_rtx (out));
17974 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17976 /* Try a few things more with specific constants and a variable. */
17979 rtx var, orig_out, out, tmp;
17981 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17984 /* If one of the two operands is an interesting constant, load a
17985 constant with the above and mask it in with a logical operation. */
17987 if (CONST_INT_P (operands[2]))
17990 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17991 operands[3] = constm1_rtx, op = and_optab;
17992 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17993 operands[3] = const0_rtx, op = ior_optab;
17997 else if (CONST_INT_P (operands[3]))
18000 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18001 operands[2] = constm1_rtx, op = and_optab;
18002 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18003 operands[2] = const0_rtx, op = ior_optab;
18010 orig_out = operands[0];
18011 tmp = gen_reg_rtx (mode);
18014 /* Recurse to get the constant loaded. */
18015 if (ix86_expand_int_movcc (operands) == 0)
18018 /* Mask in the interesting variable. */
18019 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18021 if (!rtx_equal_p (out, orig_out))
18022 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18028 * For comparison with above,
18038 if (! nonimmediate_operand (operands[2], mode))
18039 operands[2] = force_reg (mode, operands[2]);
18040 if (! nonimmediate_operand (operands[3], mode))
18041 operands[3] = force_reg (mode, operands[3]);
18043 if (! register_operand (operands[2], VOIDmode)
18045 || ! register_operand (operands[3], VOIDmode)))
18046 operands[2] = force_reg (mode, operands[2]);
18049 && ! register_operand (operands[3], VOIDmode))
18050 operands[3] = force_reg (mode, operands[3]);
18052 emit_insn (compare_seq);
18053 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18054 gen_rtx_IF_THEN_ELSE (mode,
18055 compare_op, operands[2],
18060 /* Swap, force into registers, or otherwise massage the two operands
18061 to an sse comparison with a mask result. Thus we differ a bit from
18062 ix86_prepare_fp_compare_args which expects to produce a flags result.
18064 The DEST operand exists to help determine whether to commute commutative
18065 operators. The POP0/POP1 operands are updated in place. The new
18066 comparison code is returned, or UNKNOWN if not implementable. */
18068 static enum rtx_code
18069 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18070 rtx *pop0, rtx *pop1)
18078 /* We have no LTGT as an operator. We could implement it with
18079 NE & ORDERED, but this requires an extra temporary. It's
18080 not clear that it's worth it. */
18087 /* These are supported directly. */
18094 /* For commutative operators, try to canonicalize the destination
18095 operand to be first in the comparison - this helps reload to
18096 avoid extra moves. */
18097 if (!dest || !rtx_equal_p (dest, *pop1))
18105 /* These are not supported directly. Swap the comparison operands
18106 to transform into something that is supported. */
18110 code = swap_condition (code);
18114 gcc_unreachable ();
18120 /* Detect conditional moves that exactly match min/max operational
18121 semantics. Note that this is IEEE safe, as long as we don't
18122 interchange the operands.
18124 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18125 and TRUE if the operation is successful and instructions are emitted. */
18128 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18129 rtx cmp_op1, rtx if_true, rtx if_false)
18131 enum machine_mode mode;
18137 else if (code == UNGE)
18140 if_true = if_false;
18146 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18148 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18153 mode = GET_MODE (dest);
18155 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18156 but MODE may be a vector mode and thus not appropriate. */
18157 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18159 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18162 if_true = force_reg (mode, if_true);
18163 v = gen_rtvec (2, if_true, if_false);
18164 tmp = gen_rtx_UNSPEC (mode, v, u);
18168 code = is_min ? SMIN : SMAX;
18169 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18172 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18176 /* Expand an sse vector comparison. Return the register with the result. */
18179 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18180 rtx op_true, rtx op_false)
18182 enum machine_mode mode = GET_MODE (dest);
18185 cmp_op0 = force_reg (mode, cmp_op0);
18186 if (!nonimmediate_operand (cmp_op1, mode))
18187 cmp_op1 = force_reg (mode, cmp_op1);
18190 || reg_overlap_mentioned_p (dest, op_true)
18191 || reg_overlap_mentioned_p (dest, op_false))
18192 dest = gen_reg_rtx (mode);
18194 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18195 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18200 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18201 operations. This is used for both scalar and vector conditional moves. */
18204 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18206 enum machine_mode mode = GET_MODE (dest);
18209 if (op_false == CONST0_RTX (mode))
18211 op_true = force_reg (mode, op_true);
18212 x = gen_rtx_AND (mode, cmp, op_true);
18213 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18215 else if (op_true == CONST0_RTX (mode))
18217 op_false = force_reg (mode, op_false);
18218 x = gen_rtx_NOT (mode, cmp);
18219 x = gen_rtx_AND (mode, x, op_false);
18220 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18222 else if (TARGET_XOP)
18224 rtx pcmov = gen_rtx_SET (mode, dest,
18225 gen_rtx_IF_THEN_ELSE (mode, cmp,
18232 op_true = force_reg (mode, op_true);
18233 op_false = force_reg (mode, op_false);
18235 t2 = gen_reg_rtx (mode);
18237 t3 = gen_reg_rtx (mode);
18241 x = gen_rtx_AND (mode, op_true, cmp);
18242 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18244 x = gen_rtx_NOT (mode, cmp);
18245 x = gen_rtx_AND (mode, x, op_false);
18246 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18248 x = gen_rtx_IOR (mode, t3, t2);
18249 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18253 /* Expand a floating-point conditional move. Return true if successful. */
18256 ix86_expand_fp_movcc (rtx operands[])
18258 enum machine_mode mode = GET_MODE (operands[0]);
18259 enum rtx_code code = GET_CODE (operands[1]);
18260 rtx tmp, compare_op;
18261 rtx op0 = XEXP (operands[1], 0);
18262 rtx op1 = XEXP (operands[1], 1);
18264 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18266 enum machine_mode cmode;
18268 /* Since we've no cmove for sse registers, don't force bad register
18269 allocation just to gain access to it. Deny movcc when the
18270 comparison mode doesn't match the move mode. */
18271 cmode = GET_MODE (op0);
18272 if (cmode == VOIDmode)
18273 cmode = GET_MODE (op1);
18277 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18278 if (code == UNKNOWN)
18281 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18282 operands[2], operands[3]))
18285 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18286 operands[2], operands[3]);
18287 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18291 /* The floating point conditional move instructions don't directly
18292 support conditions resulting from a signed integer comparison. */
18294 compare_op = ix86_expand_compare (code, op0, op1);
18295 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18297 tmp = gen_reg_rtx (QImode);
18298 ix86_expand_setcc (tmp, code, op0, op1);
18300 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18303 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18304 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18305 operands[2], operands[3])));
18310 /* Expand a floating-point vector conditional move; a vcond operation
18311 rather than a movcc operation. */
18314 ix86_expand_fp_vcond (rtx operands[])
18316 enum rtx_code code = GET_CODE (operands[3]);
18319 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18320 &operands[4], &operands[5]);
18321 if (code == UNKNOWN)
18324 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18325 operands[5], operands[1], operands[2]))
18328 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18329 operands[1], operands[2]);
18330 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18334 /* Expand a signed/unsigned integral vector conditional move. */
18337 ix86_expand_int_vcond (rtx operands[])
18339 enum machine_mode mode = GET_MODE (operands[0]);
18340 enum rtx_code code = GET_CODE (operands[3]);
18341 bool negate = false;
18344 cop0 = operands[4];
18345 cop1 = operands[5];
18347 /* XOP supports all of the comparisons on all vector int types. */
18350 /* Canonicalize the comparison to EQ, GT, GTU. */
18361 code = reverse_condition (code);
18367 code = reverse_condition (code);
18373 code = swap_condition (code);
18374 x = cop0, cop0 = cop1, cop1 = x;
18378 gcc_unreachable ();
18381 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18382 if (mode == V2DImode)
18387 /* SSE4.1 supports EQ. */
18388 if (!TARGET_SSE4_1)
18394 /* SSE4.2 supports GT/GTU. */
18395 if (!TARGET_SSE4_2)
18400 gcc_unreachable ();
18404 /* Unsigned parallel compare is not supported by the hardware.
18405 Play some tricks to turn this into a signed comparison
18409 cop0 = force_reg (mode, cop0);
18417 rtx (*gen_sub3) (rtx, rtx, rtx);
18419 /* Subtract (-(INT MAX) - 1) from both operands to make
18421 mask = ix86_build_signbit_mask (mode, true, false);
18422 gen_sub3 = (mode == V4SImode
18423 ? gen_subv4si3 : gen_subv2di3);
18424 t1 = gen_reg_rtx (mode);
18425 emit_insn (gen_sub3 (t1, cop0, mask));
18427 t2 = gen_reg_rtx (mode);
18428 emit_insn (gen_sub3 (t2, cop1, mask));
18438 /* Perform a parallel unsigned saturating subtraction. */
18439 x = gen_reg_rtx (mode);
18440 emit_insn (gen_rtx_SET (VOIDmode, x,
18441 gen_rtx_US_MINUS (mode, cop0, cop1)));
18444 cop1 = CONST0_RTX (mode);
18450 gcc_unreachable ();
18455 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18456 operands[1+negate], operands[2-negate]);
18458 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18459 operands[2-negate]);
18463 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18464 true if we should do zero extension, else sign extension. HIGH_P is
18465 true if we want the N/2 high elements, else the low elements. */
18468 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18470 enum machine_mode imode = GET_MODE (operands[1]);
18471 rtx (*unpack)(rtx, rtx, rtx);
18478 unpack = gen_vec_interleave_highv16qi;
18480 unpack = gen_vec_interleave_lowv16qi;
18484 unpack = gen_vec_interleave_highv8hi;
18486 unpack = gen_vec_interleave_lowv8hi;
18490 unpack = gen_vec_interleave_highv4si;
18492 unpack = gen_vec_interleave_lowv4si;
18495 gcc_unreachable ();
18498 dest = gen_lowpart (imode, operands[0]);
18501 se = force_reg (imode, CONST0_RTX (imode));
18503 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18504 operands[1], pc_rtx, pc_rtx);
18506 emit_insn (unpack (dest, operands[1], se));
18509 /* This function performs the same task as ix86_expand_sse_unpack,
18510 but with SSE4.1 instructions. */
18513 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18515 enum machine_mode imode = GET_MODE (operands[1]);
18516 rtx (*unpack)(rtx, rtx);
18523 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18525 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18529 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18531 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18535 unpack = gen_sse4_1_zero_extendv2siv2di2;
18537 unpack = gen_sse4_1_sign_extendv2siv2di2;
18540 gcc_unreachable ();
18543 dest = operands[0];
18546 /* Shift higher 8 bytes to lower 8 bytes. */
18547 src = gen_reg_rtx (imode);
18548 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18549 gen_lowpart (V1TImode, operands[1]),
18555 emit_insn (unpack (dest, src));
18558 /* Expand conditional increment or decrement using adb/sbb instructions.
18559 The default case using setcc followed by the conditional move can be
18560 done by generic code. */
18562 ix86_expand_int_addcc (rtx operands[])
18564 enum rtx_code code = GET_CODE (operands[1]);
18566 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18568 rtx val = const0_rtx;
18569 bool fpcmp = false;
18570 enum machine_mode mode;
18571 rtx op0 = XEXP (operands[1], 0);
18572 rtx op1 = XEXP (operands[1], 1);
18574 if (operands[3] != const1_rtx
18575 && operands[3] != constm1_rtx)
18577 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18579 code = GET_CODE (compare_op);
18581 flags = XEXP (compare_op, 0);
18583 if (GET_MODE (flags) == CCFPmode
18584 || GET_MODE (flags) == CCFPUmode)
18587 code = ix86_fp_compare_code_to_integer (code);
18594 PUT_CODE (compare_op,
18595 reverse_condition_maybe_unordered
18596 (GET_CODE (compare_op)));
18598 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18601 mode = GET_MODE (operands[0]);
18603 /* Construct either adc or sbb insn. */
18604 if ((code == LTU) == (operands[3] == constm1_rtx))
18609 insn = gen_subqi3_carry;
18612 insn = gen_subhi3_carry;
18615 insn = gen_subsi3_carry;
18618 insn = gen_subdi3_carry;
18621 gcc_unreachable ();
18629 insn = gen_addqi3_carry;
18632 insn = gen_addhi3_carry;
18635 insn = gen_addsi3_carry;
18638 insn = gen_adddi3_carry;
18641 gcc_unreachable ();
18644 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18650 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18651 but works for floating pointer parameters and nonoffsetable memories.
18652 For pushes, it returns just stack offsets; the values will be saved
18653 in the right order. Maximally three parts are generated. */
18656 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18661 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18663 size = (GET_MODE_SIZE (mode) + 4) / 8;
18665 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18666 gcc_assert (size >= 2 && size <= 4);
18668 /* Optimize constant pool reference to immediates. This is used by fp
18669 moves, that force all constants to memory to allow combining. */
18670 if (MEM_P (operand) && MEM_READONLY_P (operand))
18672 rtx tmp = maybe_get_pool_constant (operand);
18677 if (MEM_P (operand) && !offsettable_memref_p (operand))
18679 /* The only non-offsetable memories we handle are pushes. */
18680 int ok = push_operand (operand, VOIDmode);
18684 operand = copy_rtx (operand);
18685 PUT_MODE (operand, Pmode);
18686 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18690 if (GET_CODE (operand) == CONST_VECTOR)
18692 enum machine_mode imode = int_mode_for_mode (mode);
18693 /* Caution: if we looked through a constant pool memory above,
18694 the operand may actually have a different mode now. That's
18695 ok, since we want to pun this all the way back to an integer. */
18696 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18697 gcc_assert (operand != NULL);
18703 if (mode == DImode)
18704 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18709 if (REG_P (operand))
18711 gcc_assert (reload_completed);
18712 for (i = 0; i < size; i++)
18713 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18715 else if (offsettable_memref_p (operand))
18717 operand = adjust_address (operand, SImode, 0);
18718 parts[0] = operand;
18719 for (i = 1; i < size; i++)
18720 parts[i] = adjust_address (operand, SImode, 4 * i);
18722 else if (GET_CODE (operand) == CONST_DOUBLE)
18727 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18731 real_to_target (l, &r, mode);
18732 parts[3] = gen_int_mode (l[3], SImode);
18733 parts[2] = gen_int_mode (l[2], SImode);
18736 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18737 parts[2] = gen_int_mode (l[2], SImode);
18740 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18743 gcc_unreachable ();
18745 parts[1] = gen_int_mode (l[1], SImode);
18746 parts[0] = gen_int_mode (l[0], SImode);
18749 gcc_unreachable ();
18754 if (mode == TImode)
18755 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18756 if (mode == XFmode || mode == TFmode)
18758 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18759 if (REG_P (operand))
18761 gcc_assert (reload_completed);
18762 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18763 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18765 else if (offsettable_memref_p (operand))
18767 operand = adjust_address (operand, DImode, 0);
18768 parts[0] = operand;
18769 parts[1] = adjust_address (operand, upper_mode, 8);
18771 else if (GET_CODE (operand) == CONST_DOUBLE)
18776 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18777 real_to_target (l, &r, mode);
18779 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18780 if (HOST_BITS_PER_WIDE_INT >= 64)
18783 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18784 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18787 parts[0] = immed_double_const (l[0], l[1], DImode);
18789 if (upper_mode == SImode)
18790 parts[1] = gen_int_mode (l[2], SImode);
18791 else if (HOST_BITS_PER_WIDE_INT >= 64)
18794 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18795 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18798 parts[1] = immed_double_const (l[2], l[3], DImode);
18801 gcc_unreachable ();
18808 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18809 Return false when normal moves are needed; true when all required
18810 insns have been emitted. Operands 2-4 contain the input values
18811 int the correct order; operands 5-7 contain the output values. */
18814 ix86_split_long_move (rtx operands[])
18819 int collisions = 0;
18820 enum machine_mode mode = GET_MODE (operands[0]);
18821 bool collisionparts[4];
18823 /* The DFmode expanders may ask us to move double.
18824 For 64bit target this is single move. By hiding the fact
18825 here we simplify i386.md splitters. */
18826 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18828 /* Optimize constant pool reference to immediates. This is used by
18829 fp moves, that force all constants to memory to allow combining. */
18831 if (MEM_P (operands[1])
18832 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18833 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18834 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18835 if (push_operand (operands[0], VOIDmode))
18837 operands[0] = copy_rtx (operands[0]);
18838 PUT_MODE (operands[0], Pmode);
18841 operands[0] = gen_lowpart (DImode, operands[0]);
18842 operands[1] = gen_lowpart (DImode, operands[1]);
18843 emit_move_insn (operands[0], operands[1]);
18847 /* The only non-offsettable memory we handle is push. */
18848 if (push_operand (operands[0], VOIDmode))
18851 gcc_assert (!MEM_P (operands[0])
18852 || offsettable_memref_p (operands[0]));
18854 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18855 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18857 /* When emitting push, take care for source operands on the stack. */
18858 if (push && MEM_P (operands[1])
18859 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18861 rtx src_base = XEXP (part[1][nparts - 1], 0);
18863 /* Compensate for the stack decrement by 4. */
18864 if (!TARGET_64BIT && nparts == 3
18865 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18866 src_base = plus_constant (src_base, 4);
18868 /* src_base refers to the stack pointer and is
18869 automatically decreased by emitted push. */
18870 for (i = 0; i < nparts; i++)
18871 part[1][i] = change_address (part[1][i],
18872 GET_MODE (part[1][i]), src_base);
18875 /* We need to do copy in the right order in case an address register
18876 of the source overlaps the destination. */
18877 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18881 for (i = 0; i < nparts; i++)
18884 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18885 if (collisionparts[i])
18889 /* Collision in the middle part can be handled by reordering. */
18890 if (collisions == 1 && nparts == 3 && collisionparts [1])
18892 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18893 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18895 else if (collisions == 1
18897 && (collisionparts [1] || collisionparts [2]))
18899 if (collisionparts [1])
18901 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18902 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18906 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18907 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18911 /* If there are more collisions, we can't handle it by reordering.
18912 Do an lea to the last part and use only one colliding move. */
18913 else if (collisions > 1)
18919 base = part[0][nparts - 1];
18921 /* Handle the case when the last part isn't valid for lea.
18922 Happens in 64-bit mode storing the 12-byte XFmode. */
18923 if (GET_MODE (base) != Pmode)
18924 base = gen_rtx_REG (Pmode, REGNO (base));
18926 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18927 part[1][0] = replace_equiv_address (part[1][0], base);
18928 for (i = 1; i < nparts; i++)
18930 tmp = plus_constant (base, UNITS_PER_WORD * i);
18931 part[1][i] = replace_equiv_address (part[1][i], tmp);
18942 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18943 emit_insn (gen_addsi3 (stack_pointer_rtx,
18944 stack_pointer_rtx, GEN_INT (-4)));
18945 emit_move_insn (part[0][2], part[1][2]);
18947 else if (nparts == 4)
18949 emit_move_insn (part[0][3], part[1][3]);
18950 emit_move_insn (part[0][2], part[1][2]);
18955 /* In 64bit mode we don't have 32bit push available. In case this is
18956 register, it is OK - we will just use larger counterpart. We also
18957 retype memory - these comes from attempt to avoid REX prefix on
18958 moving of second half of TFmode value. */
18959 if (GET_MODE (part[1][1]) == SImode)
18961 switch (GET_CODE (part[1][1]))
18964 part[1][1] = adjust_address (part[1][1], DImode, 0);
18968 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18972 gcc_unreachable ();
18975 if (GET_MODE (part[1][0]) == SImode)
18976 part[1][0] = part[1][1];
18979 emit_move_insn (part[0][1], part[1][1]);
18980 emit_move_insn (part[0][0], part[1][0]);
18984 /* Choose correct order to not overwrite the source before it is copied. */
18985 if ((REG_P (part[0][0])
18986 && REG_P (part[1][1])
18987 && (REGNO (part[0][0]) == REGNO (part[1][1])
18989 && REGNO (part[0][0]) == REGNO (part[1][2]))
18991 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18993 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18995 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18997 operands[2 + i] = part[0][j];
18998 operands[6 + i] = part[1][j];
19003 for (i = 0; i < nparts; i++)
19005 operands[2 + i] = part[0][i];
19006 operands[6 + i] = part[1][i];
19010 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19011 if (optimize_insn_for_size_p ())
19013 for (j = 0; j < nparts - 1; j++)
19014 if (CONST_INT_P (operands[6 + j])
19015 && operands[6 + j] != const0_rtx
19016 && REG_P (operands[2 + j]))
19017 for (i = j; i < nparts - 1; i++)
19018 if (CONST_INT_P (operands[7 + i])
19019 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19020 operands[7 + i] = operands[2 + j];
19023 for (i = 0; i < nparts; i++)
19024 emit_move_insn (operands[2 + i], operands[6 + i]);
19029 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19030 left shift by a constant, either using a single shift or
19031 a sequence of add instructions. */
19034 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19036 rtx (*insn)(rtx, rtx, rtx);
19039 || (count * ix86_cost->add <= ix86_cost->shift_const
19040 && !optimize_insn_for_size_p ()))
19042 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19043 while (count-- > 0)
19044 emit_insn (insn (operand, operand, operand));
19048 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19049 emit_insn (insn (operand, operand, GEN_INT (count)));
19054 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19056 rtx (*gen_ashl3)(rtx, rtx, rtx);
19057 rtx (*gen_shld)(rtx, rtx, rtx);
19058 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19060 rtx low[2], high[2];
19063 if (CONST_INT_P (operands[2]))
19065 split_double_mode (mode, operands, 2, low, high);
19066 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19068 if (count >= half_width)
19070 emit_move_insn (high[0], low[1]);
19071 emit_move_insn (low[0], const0_rtx);
19073 if (count > half_width)
19074 ix86_expand_ashl_const (high[0], count - half_width, mode);
19078 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19080 if (!rtx_equal_p (operands[0], operands[1]))
19081 emit_move_insn (operands[0], operands[1]);
19083 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19084 ix86_expand_ashl_const (low[0], count, mode);
19089 split_double_mode (mode, operands, 1, low, high);
19091 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19093 if (operands[1] == const1_rtx)
19095 /* Assuming we've chosen a QImode capable registers, then 1 << N
19096 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19097 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19099 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19101 ix86_expand_clear (low[0]);
19102 ix86_expand_clear (high[0]);
19103 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19105 d = gen_lowpart (QImode, low[0]);
19106 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19107 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19108 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19110 d = gen_lowpart (QImode, high[0]);
19111 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19112 s = gen_rtx_NE (QImode, flags, const0_rtx);
19113 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19116 /* Otherwise, we can get the same results by manually performing
19117 a bit extract operation on bit 5/6, and then performing the two
19118 shifts. The two methods of getting 0/1 into low/high are exactly
19119 the same size. Avoiding the shift in the bit extract case helps
19120 pentium4 a bit; no one else seems to care much either way. */
19123 enum machine_mode half_mode;
19124 rtx (*gen_lshr3)(rtx, rtx, rtx);
19125 rtx (*gen_and3)(rtx, rtx, rtx);
19126 rtx (*gen_xor3)(rtx, rtx, rtx);
19127 HOST_WIDE_INT bits;
19130 if (mode == DImode)
19132 half_mode = SImode;
19133 gen_lshr3 = gen_lshrsi3;
19134 gen_and3 = gen_andsi3;
19135 gen_xor3 = gen_xorsi3;
19140 half_mode = DImode;
19141 gen_lshr3 = gen_lshrdi3;
19142 gen_and3 = gen_anddi3;
19143 gen_xor3 = gen_xordi3;
19147 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19148 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19150 x = gen_lowpart (half_mode, operands[2]);
19151 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19153 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19154 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19155 emit_move_insn (low[0], high[0]);
19156 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19159 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19160 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19164 if (operands[1] == constm1_rtx)
19166 /* For -1 << N, we can avoid the shld instruction, because we
19167 know that we're shifting 0...31/63 ones into a -1. */
19168 emit_move_insn (low[0], constm1_rtx);
19169 if (optimize_insn_for_size_p ())
19170 emit_move_insn (high[0], low[0]);
19172 emit_move_insn (high[0], constm1_rtx);
19176 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19178 if (!rtx_equal_p (operands[0], operands[1]))
19179 emit_move_insn (operands[0], operands[1]);
19181 split_double_mode (mode, operands, 1, low, high);
19182 emit_insn (gen_shld (high[0], low[0], operands[2]));
19185 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19187 if (TARGET_CMOVE && scratch)
19189 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19190 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19192 ix86_expand_clear (scratch);
19193 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19197 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19198 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19200 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19205 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19207 rtx (*gen_ashr3)(rtx, rtx, rtx)
19208 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19209 rtx (*gen_shrd)(rtx, rtx, rtx);
19210 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19212 rtx low[2], high[2];
19215 if (CONST_INT_P (operands[2]))
19217 split_double_mode (mode, operands, 2, low, high);
19218 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19220 if (count == GET_MODE_BITSIZE (mode) - 1)
19222 emit_move_insn (high[0], high[1]);
19223 emit_insn (gen_ashr3 (high[0], high[0],
19224 GEN_INT (half_width - 1)));
19225 emit_move_insn (low[0], high[0]);
19228 else if (count >= half_width)
19230 emit_move_insn (low[0], high[1]);
19231 emit_move_insn (high[0], low[0]);
19232 emit_insn (gen_ashr3 (high[0], high[0],
19233 GEN_INT (half_width - 1)));
19235 if (count > half_width)
19236 emit_insn (gen_ashr3 (low[0], low[0],
19237 GEN_INT (count - half_width)));
19241 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19243 if (!rtx_equal_p (operands[0], operands[1]))
19244 emit_move_insn (operands[0], operands[1]);
19246 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19247 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19252 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19254 if (!rtx_equal_p (operands[0], operands[1]))
19255 emit_move_insn (operands[0], operands[1]);
19257 split_double_mode (mode, operands, 1, low, high);
19259 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19260 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19262 if (TARGET_CMOVE && scratch)
19264 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19265 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19267 emit_move_insn (scratch, high[0]);
19268 emit_insn (gen_ashr3 (scratch, scratch,
19269 GEN_INT (half_width - 1)));
19270 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19275 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19276 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19278 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19284 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19286 rtx (*gen_lshr3)(rtx, rtx, rtx)
19287 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19288 rtx (*gen_shrd)(rtx, rtx, rtx);
19289 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19291 rtx low[2], high[2];
19294 if (CONST_INT_P (operands[2]))
19296 split_double_mode (mode, operands, 2, low, high);
19297 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19299 if (count >= half_width)
19301 emit_move_insn (low[0], high[1]);
19302 ix86_expand_clear (high[0]);
19304 if (count > half_width)
19305 emit_insn (gen_lshr3 (low[0], low[0],
19306 GEN_INT (count - half_width)));
19310 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19312 if (!rtx_equal_p (operands[0], operands[1]))
19313 emit_move_insn (operands[0], operands[1]);
19315 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19316 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19321 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19323 if (!rtx_equal_p (operands[0], operands[1]))
19324 emit_move_insn (operands[0], operands[1]);
19326 split_double_mode (mode, operands, 1, low, high);
19328 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19329 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19331 if (TARGET_CMOVE && scratch)
19333 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19334 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19336 ix86_expand_clear (scratch);
19337 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19342 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19343 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19345 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19350 /* Predict just emitted jump instruction to be taken with probability PROB. */
19352 predict_jump (int prob)
19354 rtx insn = get_last_insn ();
19355 gcc_assert (JUMP_P (insn));
19356 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19359 /* Helper function for the string operations below. Dest VARIABLE whether
19360 it is aligned to VALUE bytes. If true, jump to the label. */
19362 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19364 rtx label = gen_label_rtx ();
19365 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19366 if (GET_MODE (variable) == DImode)
19367 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19369 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19370 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19373 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19375 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19379 /* Adjust COUNTER by the VALUE. */
19381 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19383 rtx (*gen_add)(rtx, rtx, rtx)
19384 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19386 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19389 /* Zero extend possibly SImode EXP to Pmode register. */
19391 ix86_zero_extend_to_Pmode (rtx exp)
19394 if (GET_MODE (exp) == VOIDmode)
19395 return force_reg (Pmode, exp);
19396 if (GET_MODE (exp) == Pmode)
19397 return copy_to_mode_reg (Pmode, exp);
19398 r = gen_reg_rtx (Pmode);
19399 emit_insn (gen_zero_extendsidi2 (r, exp));
19403 /* Divide COUNTREG by SCALE. */
19405 scale_counter (rtx countreg, int scale)
19411 if (CONST_INT_P (countreg))
19412 return GEN_INT (INTVAL (countreg) / scale);
19413 gcc_assert (REG_P (countreg));
19415 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19416 GEN_INT (exact_log2 (scale)),
19417 NULL, 1, OPTAB_DIRECT);
19421 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19422 DImode for constant loop counts. */
19424 static enum machine_mode
19425 counter_mode (rtx count_exp)
19427 if (GET_MODE (count_exp) != VOIDmode)
19428 return GET_MODE (count_exp);
19429 if (!CONST_INT_P (count_exp))
19431 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19436 /* When SRCPTR is non-NULL, output simple loop to move memory
19437 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19438 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19439 equivalent loop to set memory by VALUE (supposed to be in MODE).
19441 The size is rounded down to whole number of chunk size moved at once.
19442 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19446 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19447 rtx destptr, rtx srcptr, rtx value,
19448 rtx count, enum machine_mode mode, int unroll,
19451 rtx out_label, top_label, iter, tmp;
19452 enum machine_mode iter_mode = counter_mode (count);
19453 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19454 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19460 top_label = gen_label_rtx ();
19461 out_label = gen_label_rtx ();
19462 iter = gen_reg_rtx (iter_mode);
19464 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19465 NULL, 1, OPTAB_DIRECT);
19466 /* Those two should combine. */
19467 if (piece_size == const1_rtx)
19469 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19471 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19473 emit_move_insn (iter, const0_rtx);
19475 emit_label (top_label);
19477 tmp = convert_modes (Pmode, iter_mode, iter, true);
19478 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19479 destmem = change_address (destmem, mode, x_addr);
19483 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19484 srcmem = change_address (srcmem, mode, y_addr);
19486 /* When unrolling for chips that reorder memory reads and writes,
19487 we can save registers by using single temporary.
19488 Also using 4 temporaries is overkill in 32bit mode. */
19489 if (!TARGET_64BIT && 0)
19491 for (i = 0; i < unroll; i++)
19496 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19498 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19500 emit_move_insn (destmem, srcmem);
19506 gcc_assert (unroll <= 4);
19507 for (i = 0; i < unroll; i++)
19509 tmpreg[i] = gen_reg_rtx (mode);
19513 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19515 emit_move_insn (tmpreg[i], srcmem);
19517 for (i = 0; i < unroll; i++)
19522 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19524 emit_move_insn (destmem, tmpreg[i]);
19529 for (i = 0; i < unroll; i++)
19533 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19534 emit_move_insn (destmem, value);
19537 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19538 true, OPTAB_LIB_WIDEN);
19540 emit_move_insn (iter, tmp);
19542 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19544 if (expected_size != -1)
19546 expected_size /= GET_MODE_SIZE (mode) * unroll;
19547 if (expected_size == 0)
19549 else if (expected_size > REG_BR_PROB_BASE)
19550 predict_jump (REG_BR_PROB_BASE - 1);
19552 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19555 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19556 iter = ix86_zero_extend_to_Pmode (iter);
19557 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19558 true, OPTAB_LIB_WIDEN);
19559 if (tmp != destptr)
19560 emit_move_insn (destptr, tmp);
19563 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19564 true, OPTAB_LIB_WIDEN);
19566 emit_move_insn (srcptr, tmp);
19568 emit_label (out_label);
19571 /* Output "rep; mov" instruction.
19572 Arguments have same meaning as for previous function */
19574 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19575 rtx destptr, rtx srcptr,
19577 enum machine_mode mode)
19583 /* If the size is known, it is shorter to use rep movs. */
19584 if (mode == QImode && CONST_INT_P (count)
19585 && !(INTVAL (count) & 3))
19588 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19589 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19590 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19591 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19592 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19593 if (mode != QImode)
19595 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19596 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19597 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19598 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19599 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19600 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19604 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19605 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19607 if (CONST_INT_P (count))
19609 count = GEN_INT (INTVAL (count)
19610 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19611 destmem = shallow_copy_rtx (destmem);
19612 srcmem = shallow_copy_rtx (srcmem);
19613 set_mem_size (destmem, count);
19614 set_mem_size (srcmem, count);
19618 if (MEM_SIZE (destmem))
19619 set_mem_size (destmem, NULL_RTX);
19620 if (MEM_SIZE (srcmem))
19621 set_mem_size (srcmem, NULL_RTX);
19623 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19627 /* Output "rep; stos" instruction.
19628 Arguments have same meaning as for previous function */
19630 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19631 rtx count, enum machine_mode mode,
19637 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19638 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19639 value = force_reg (mode, gen_lowpart (mode, value));
19640 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19641 if (mode != QImode)
19643 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19644 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19645 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19648 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19649 if (orig_value == const0_rtx && CONST_INT_P (count))
19651 count = GEN_INT (INTVAL (count)
19652 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19653 destmem = shallow_copy_rtx (destmem);
19654 set_mem_size (destmem, count);
19656 else if (MEM_SIZE (destmem))
19657 set_mem_size (destmem, NULL_RTX);
19658 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19662 emit_strmov (rtx destmem, rtx srcmem,
19663 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19665 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19666 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19667 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19670 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19672 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19673 rtx destptr, rtx srcptr, rtx count, int max_size)
19676 if (CONST_INT_P (count))
19678 HOST_WIDE_INT countval = INTVAL (count);
19681 if ((countval & 0x10) && max_size > 16)
19685 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19686 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19689 gcc_unreachable ();
19692 if ((countval & 0x08) && max_size > 8)
19695 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19698 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19699 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19703 if ((countval & 0x04) && max_size > 4)
19705 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19708 if ((countval & 0x02) && max_size > 2)
19710 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19713 if ((countval & 0x01) && max_size > 1)
19715 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19722 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19723 count, 1, OPTAB_DIRECT);
19724 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19725 count, QImode, 1, 4);
19729 /* When there are stringops, we can cheaply increase dest and src pointers.
19730 Otherwise we save code size by maintaining offset (zero is readily
19731 available from preceding rep operation) and using x86 addressing modes.
19733 if (TARGET_SINGLE_STRINGOP)
19737 rtx label = ix86_expand_aligntest (count, 4, true);
19738 src = change_address (srcmem, SImode, srcptr);
19739 dest = change_address (destmem, SImode, destptr);
19740 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19741 emit_label (label);
19742 LABEL_NUSES (label) = 1;
19746 rtx label = ix86_expand_aligntest (count, 2, true);
19747 src = change_address (srcmem, HImode, srcptr);
19748 dest = change_address (destmem, HImode, destptr);
19749 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19750 emit_label (label);
19751 LABEL_NUSES (label) = 1;
19755 rtx label = ix86_expand_aligntest (count, 1, true);
19756 src = change_address (srcmem, QImode, srcptr);
19757 dest = change_address (destmem, QImode, destptr);
19758 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19759 emit_label (label);
19760 LABEL_NUSES (label) = 1;
19765 rtx offset = force_reg (Pmode, const0_rtx);
19770 rtx label = ix86_expand_aligntest (count, 4, true);
19771 src = change_address (srcmem, SImode, srcptr);
19772 dest = change_address (destmem, SImode, destptr);
19773 emit_move_insn (dest, src);
19774 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19775 true, OPTAB_LIB_WIDEN);
19777 emit_move_insn (offset, tmp);
19778 emit_label (label);
19779 LABEL_NUSES (label) = 1;
19783 rtx label = ix86_expand_aligntest (count, 2, true);
19784 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19785 src = change_address (srcmem, HImode, tmp);
19786 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19787 dest = change_address (destmem, HImode, tmp);
19788 emit_move_insn (dest, src);
19789 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19790 true, OPTAB_LIB_WIDEN);
19792 emit_move_insn (offset, tmp);
19793 emit_label (label);
19794 LABEL_NUSES (label) = 1;
19798 rtx label = ix86_expand_aligntest (count, 1, true);
19799 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19800 src = change_address (srcmem, QImode, tmp);
19801 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19802 dest = change_address (destmem, QImode, tmp);
19803 emit_move_insn (dest, src);
19804 emit_label (label);
19805 LABEL_NUSES (label) = 1;
19810 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19812 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19813 rtx count, int max_size)
19816 expand_simple_binop (counter_mode (count), AND, count,
19817 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19818 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19819 gen_lowpart (QImode, value), count, QImode,
19823 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19825 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19829 if (CONST_INT_P (count))
19831 HOST_WIDE_INT countval = INTVAL (count);
19834 if ((countval & 0x10) && max_size > 16)
19838 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19839 emit_insn (gen_strset (destptr, dest, value));
19840 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19841 emit_insn (gen_strset (destptr, dest, value));
19844 gcc_unreachable ();
19847 if ((countval & 0x08) && max_size > 8)
19851 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19852 emit_insn (gen_strset (destptr, dest, value));
19856 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19857 emit_insn (gen_strset (destptr, dest, value));
19858 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19859 emit_insn (gen_strset (destptr, dest, value));
19863 if ((countval & 0x04) && max_size > 4)
19865 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19866 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19869 if ((countval & 0x02) && max_size > 2)
19871 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19872 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19875 if ((countval & 0x01) && max_size > 1)
19877 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19878 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19885 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19890 rtx label = ix86_expand_aligntest (count, 16, true);
19893 dest = change_address (destmem, DImode, destptr);
19894 emit_insn (gen_strset (destptr, dest, value));
19895 emit_insn (gen_strset (destptr, dest, value));
19899 dest = change_address (destmem, SImode, destptr);
19900 emit_insn (gen_strset (destptr, dest, value));
19901 emit_insn (gen_strset (destptr, dest, value));
19902 emit_insn (gen_strset (destptr, dest, value));
19903 emit_insn (gen_strset (destptr, dest, value));
19905 emit_label (label);
19906 LABEL_NUSES (label) = 1;
19910 rtx label = ix86_expand_aligntest (count, 8, true);
19913 dest = change_address (destmem, DImode, destptr);
19914 emit_insn (gen_strset (destptr, dest, value));
19918 dest = change_address (destmem, SImode, destptr);
19919 emit_insn (gen_strset (destptr, dest, value));
19920 emit_insn (gen_strset (destptr, dest, value));
19922 emit_label (label);
19923 LABEL_NUSES (label) = 1;
19927 rtx label = ix86_expand_aligntest (count, 4, true);
19928 dest = change_address (destmem, SImode, destptr);
19929 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19930 emit_label (label);
19931 LABEL_NUSES (label) = 1;
19935 rtx label = ix86_expand_aligntest (count, 2, true);
19936 dest = change_address (destmem, HImode, destptr);
19937 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19938 emit_label (label);
19939 LABEL_NUSES (label) = 1;
19943 rtx label = ix86_expand_aligntest (count, 1, true);
19944 dest = change_address (destmem, QImode, destptr);
19945 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19946 emit_label (label);
19947 LABEL_NUSES (label) = 1;
19951 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19952 DESIRED_ALIGNMENT. */
19954 expand_movmem_prologue (rtx destmem, rtx srcmem,
19955 rtx destptr, rtx srcptr, rtx count,
19956 int align, int desired_alignment)
19958 if (align <= 1 && desired_alignment > 1)
19960 rtx label = ix86_expand_aligntest (destptr, 1, false);
19961 srcmem = change_address (srcmem, QImode, srcptr);
19962 destmem = change_address (destmem, QImode, destptr);
19963 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19964 ix86_adjust_counter (count, 1);
19965 emit_label (label);
19966 LABEL_NUSES (label) = 1;
19968 if (align <= 2 && desired_alignment > 2)
19970 rtx label = ix86_expand_aligntest (destptr, 2, false);
19971 srcmem = change_address (srcmem, HImode, srcptr);
19972 destmem = change_address (destmem, HImode, destptr);
19973 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19974 ix86_adjust_counter (count, 2);
19975 emit_label (label);
19976 LABEL_NUSES (label) = 1;
19978 if (align <= 4 && desired_alignment > 4)
19980 rtx label = ix86_expand_aligntest (destptr, 4, false);
19981 srcmem = change_address (srcmem, SImode, srcptr);
19982 destmem = change_address (destmem, SImode, destptr);
19983 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19984 ix86_adjust_counter (count, 4);
19985 emit_label (label);
19986 LABEL_NUSES (label) = 1;
19988 gcc_assert (desired_alignment <= 8);
19991 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19992 ALIGN_BYTES is how many bytes need to be copied. */
19994 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19995 int desired_align, int align_bytes)
19998 rtx src_size, dst_size;
20000 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20001 if (src_align_bytes >= 0)
20002 src_align_bytes = desired_align - src_align_bytes;
20003 src_size = MEM_SIZE (src);
20004 dst_size = MEM_SIZE (dst);
20005 if (align_bytes & 1)
20007 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20008 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20010 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20012 if (align_bytes & 2)
20014 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20015 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20016 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20017 set_mem_align (dst, 2 * BITS_PER_UNIT);
20018 if (src_align_bytes >= 0
20019 && (src_align_bytes & 1) == (align_bytes & 1)
20020 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20021 set_mem_align (src, 2 * BITS_PER_UNIT);
20023 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20025 if (align_bytes & 4)
20027 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20028 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20029 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20030 set_mem_align (dst, 4 * BITS_PER_UNIT);
20031 if (src_align_bytes >= 0)
20033 unsigned int src_align = 0;
20034 if ((src_align_bytes & 3) == (align_bytes & 3))
20036 else if ((src_align_bytes & 1) == (align_bytes & 1))
20038 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20039 set_mem_align (src, src_align * BITS_PER_UNIT);
20042 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20044 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20045 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20046 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20047 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20048 if (src_align_bytes >= 0)
20050 unsigned int src_align = 0;
20051 if ((src_align_bytes & 7) == (align_bytes & 7))
20053 else if ((src_align_bytes & 3) == (align_bytes & 3))
20055 else if ((src_align_bytes & 1) == (align_bytes & 1))
20057 if (src_align > (unsigned int) desired_align)
20058 src_align = desired_align;
20059 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20060 set_mem_align (src, src_align * BITS_PER_UNIT);
20063 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20065 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20070 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20071 DESIRED_ALIGNMENT. */
20073 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20074 int align, int desired_alignment)
20076 if (align <= 1 && desired_alignment > 1)
20078 rtx label = ix86_expand_aligntest (destptr, 1, false);
20079 destmem = change_address (destmem, QImode, destptr);
20080 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20081 ix86_adjust_counter (count, 1);
20082 emit_label (label);
20083 LABEL_NUSES (label) = 1;
20085 if (align <= 2 && desired_alignment > 2)
20087 rtx label = ix86_expand_aligntest (destptr, 2, false);
20088 destmem = change_address (destmem, HImode, destptr);
20089 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20090 ix86_adjust_counter (count, 2);
20091 emit_label (label);
20092 LABEL_NUSES (label) = 1;
20094 if (align <= 4 && desired_alignment > 4)
20096 rtx label = ix86_expand_aligntest (destptr, 4, false);
20097 destmem = change_address (destmem, SImode, destptr);
20098 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20099 ix86_adjust_counter (count, 4);
20100 emit_label (label);
20101 LABEL_NUSES (label) = 1;
20103 gcc_assert (desired_alignment <= 8);
20106 /* Set enough from DST to align DST known to by aligned by ALIGN to
20107 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20109 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20110 int desired_align, int align_bytes)
20113 rtx dst_size = MEM_SIZE (dst);
20114 if (align_bytes & 1)
20116 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20118 emit_insn (gen_strset (destreg, dst,
20119 gen_lowpart (QImode, value)));
20121 if (align_bytes & 2)
20123 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20124 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20125 set_mem_align (dst, 2 * BITS_PER_UNIT);
20127 emit_insn (gen_strset (destreg, dst,
20128 gen_lowpart (HImode, value)));
20130 if (align_bytes & 4)
20132 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20133 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20134 set_mem_align (dst, 4 * BITS_PER_UNIT);
20136 emit_insn (gen_strset (destreg, dst,
20137 gen_lowpart (SImode, value)));
20139 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20140 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20141 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20143 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20147 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20148 static enum stringop_alg
20149 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20150 int *dynamic_check)
20152 const struct stringop_algs * algs;
20153 bool optimize_for_speed;
20154 /* Algorithms using the rep prefix want at least edi and ecx;
20155 additionally, memset wants eax and memcpy wants esi. Don't
20156 consider such algorithms if the user has appropriated those
20157 registers for their own purposes. */
20158 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20160 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20162 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20163 || (alg != rep_prefix_1_byte \
20164 && alg != rep_prefix_4_byte \
20165 && alg != rep_prefix_8_byte))
20166 const struct processor_costs *cost;
20168 /* Even if the string operation call is cold, we still might spend a lot
20169 of time processing large blocks. */
20170 if (optimize_function_for_size_p (cfun)
20171 || (optimize_insn_for_size_p ()
20172 && expected_size != -1 && expected_size < 256))
20173 optimize_for_speed = false;
20175 optimize_for_speed = true;
20177 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20179 *dynamic_check = -1;
20181 algs = &cost->memset[TARGET_64BIT != 0];
20183 algs = &cost->memcpy[TARGET_64BIT != 0];
20184 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20185 return stringop_alg;
20186 /* rep; movq or rep; movl is the smallest variant. */
20187 else if (!optimize_for_speed)
20189 if (!count || (count & 3))
20190 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20192 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20194 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20196 else if (expected_size != -1 && expected_size < 4)
20197 return loop_1_byte;
20198 else if (expected_size != -1)
20201 enum stringop_alg alg = libcall;
20202 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20204 /* We get here if the algorithms that were not libcall-based
20205 were rep-prefix based and we are unable to use rep prefixes
20206 based on global register usage. Break out of the loop and
20207 use the heuristic below. */
20208 if (algs->size[i].max == 0)
20210 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20212 enum stringop_alg candidate = algs->size[i].alg;
20214 if (candidate != libcall && ALG_USABLE_P (candidate))
20216 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20217 last non-libcall inline algorithm. */
20218 if (TARGET_INLINE_ALL_STRINGOPS)
20220 /* When the current size is best to be copied by a libcall,
20221 but we are still forced to inline, run the heuristic below
20222 that will pick code for medium sized blocks. */
20223 if (alg != libcall)
20227 else if (ALG_USABLE_P (candidate))
20231 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20233 /* When asked to inline the call anyway, try to pick meaningful choice.
20234 We look for maximal size of block that is faster to copy by hand and
20235 take blocks of at most of that size guessing that average size will
20236 be roughly half of the block.
20238 If this turns out to be bad, we might simply specify the preferred
20239 choice in ix86_costs. */
20240 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20241 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20244 enum stringop_alg alg;
20246 bool any_alg_usable_p = true;
20248 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20250 enum stringop_alg candidate = algs->size[i].alg;
20251 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20253 if (candidate != libcall && candidate
20254 && ALG_USABLE_P (candidate))
20255 max = algs->size[i].max;
20257 /* If there aren't any usable algorithms, then recursing on
20258 smaller sizes isn't going to find anything. Just return the
20259 simple byte-at-a-time copy loop. */
20260 if (!any_alg_usable_p)
20262 /* Pick something reasonable. */
20263 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20264 *dynamic_check = 128;
20265 return loop_1_byte;
20269 alg = decide_alg (count, max / 2, memset, dynamic_check);
20270 gcc_assert (*dynamic_check == -1);
20271 gcc_assert (alg != libcall);
20272 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20273 *dynamic_check = max;
20276 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20277 #undef ALG_USABLE_P
20280 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20281 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20283 decide_alignment (int align,
20284 enum stringop_alg alg,
20287 int desired_align = 0;
20291 gcc_unreachable ();
20293 case unrolled_loop:
20294 desired_align = GET_MODE_SIZE (Pmode);
20296 case rep_prefix_8_byte:
20299 case rep_prefix_4_byte:
20300 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20301 copying whole cacheline at once. */
20302 if (TARGET_PENTIUMPRO)
20307 case rep_prefix_1_byte:
20308 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20309 copying whole cacheline at once. */
20310 if (TARGET_PENTIUMPRO)
20324 if (desired_align < align)
20325 desired_align = align;
20326 if (expected_size != -1 && expected_size < 4)
20327 desired_align = align;
20328 return desired_align;
20331 /* Return the smallest power of 2 greater than VAL. */
20333 smallest_pow2_greater_than (int val)
20341 /* Expand string move (memcpy) operation. Use i386 string operations when
20342 profitable. expand_setmem contains similar code. The code depends upon
20343 architecture, block size and alignment, but always has the same
20346 1) Prologue guard: Conditional that jumps up to epilogues for small
20347 blocks that can be handled by epilogue alone. This is faster but
20348 also needed for correctness, since prologue assume the block is larger
20349 than the desired alignment.
20351 Optional dynamic check for size and libcall for large
20352 blocks is emitted here too, with -minline-stringops-dynamically.
20354 2) Prologue: copy first few bytes in order to get destination aligned
20355 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20356 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20357 We emit either a jump tree on power of two sized blocks, or a byte loop.
20359 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20360 with specified algorithm.
20362 4) Epilogue: code copying tail of the block that is too small to be
20363 handled by main body (or up to size guarded by prologue guard). */
20366 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20367 rtx expected_align_exp, rtx expected_size_exp)
20373 rtx jump_around_label = NULL;
20374 HOST_WIDE_INT align = 1;
20375 unsigned HOST_WIDE_INT count = 0;
20376 HOST_WIDE_INT expected_size = -1;
20377 int size_needed = 0, epilogue_size_needed;
20378 int desired_align = 0, align_bytes = 0;
20379 enum stringop_alg alg;
20381 bool need_zero_guard = false;
20383 if (CONST_INT_P (align_exp))
20384 align = INTVAL (align_exp);
20385 /* i386 can do misaligned access on reasonably increased cost. */
20386 if (CONST_INT_P (expected_align_exp)
20387 && INTVAL (expected_align_exp) > align)
20388 align = INTVAL (expected_align_exp);
20389 /* ALIGN is the minimum of destination and source alignment, but we care here
20390 just about destination alignment. */
20391 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20392 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20394 if (CONST_INT_P (count_exp))
20395 count = expected_size = INTVAL (count_exp);
20396 if (CONST_INT_P (expected_size_exp) && count == 0)
20397 expected_size = INTVAL (expected_size_exp);
20399 /* Make sure we don't need to care about overflow later on. */
20400 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20403 /* Step 0: Decide on preferred algorithm, desired alignment and
20404 size of chunks to be copied by main loop. */
20406 alg = decide_alg (count, expected_size, false, &dynamic_check);
20407 desired_align = decide_alignment (align, alg, expected_size);
20409 if (!TARGET_ALIGN_STRINGOPS)
20410 align = desired_align;
20412 if (alg == libcall)
20414 gcc_assert (alg != no_stringop);
20416 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20417 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20418 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20423 gcc_unreachable ();
20425 need_zero_guard = true;
20426 size_needed = GET_MODE_SIZE (Pmode);
20428 case unrolled_loop:
20429 need_zero_guard = true;
20430 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20432 case rep_prefix_8_byte:
20435 case rep_prefix_4_byte:
20438 case rep_prefix_1_byte:
20442 need_zero_guard = true;
20447 epilogue_size_needed = size_needed;
20449 /* Step 1: Prologue guard. */
20451 /* Alignment code needs count to be in register. */
20452 if (CONST_INT_P (count_exp) && desired_align > align)
20454 if (INTVAL (count_exp) > desired_align
20455 && INTVAL (count_exp) > size_needed)
20458 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20459 if (align_bytes <= 0)
20462 align_bytes = desired_align - align_bytes;
20464 if (align_bytes == 0)
20465 count_exp = force_reg (counter_mode (count_exp), count_exp);
20467 gcc_assert (desired_align >= 1 && align >= 1);
20469 /* Ensure that alignment prologue won't copy past end of block. */
20470 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20472 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20473 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20474 Make sure it is power of 2. */
20475 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20479 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20481 /* If main algorithm works on QImode, no epilogue is needed.
20482 For small sizes just don't align anything. */
20483 if (size_needed == 1)
20484 desired_align = align;
20491 label = gen_label_rtx ();
20492 emit_cmp_and_jump_insns (count_exp,
20493 GEN_INT (epilogue_size_needed),
20494 LTU, 0, counter_mode (count_exp), 1, label);
20495 if (expected_size == -1 || expected_size < epilogue_size_needed)
20496 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20498 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20502 /* Emit code to decide on runtime whether library call or inline should be
20504 if (dynamic_check != -1)
20506 if (CONST_INT_P (count_exp))
20508 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20510 emit_block_move_via_libcall (dst, src, count_exp, false);
20511 count_exp = const0_rtx;
20517 rtx hot_label = gen_label_rtx ();
20518 jump_around_label = gen_label_rtx ();
20519 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20520 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20521 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20522 emit_block_move_via_libcall (dst, src, count_exp, false);
20523 emit_jump (jump_around_label);
20524 emit_label (hot_label);
20528 /* Step 2: Alignment prologue. */
20530 if (desired_align > align)
20532 if (align_bytes == 0)
20534 /* Except for the first move in epilogue, we no longer know
20535 constant offset in aliasing info. It don't seems to worth
20536 the pain to maintain it for the first move, so throw away
20538 src = change_address (src, BLKmode, srcreg);
20539 dst = change_address (dst, BLKmode, destreg);
20540 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20545 /* If we know how many bytes need to be stored before dst is
20546 sufficiently aligned, maintain aliasing info accurately. */
20547 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20548 desired_align, align_bytes);
20549 count_exp = plus_constant (count_exp, -align_bytes);
20550 count -= align_bytes;
20552 if (need_zero_guard
20553 && (count < (unsigned HOST_WIDE_INT) size_needed
20554 || (align_bytes == 0
20555 && count < ((unsigned HOST_WIDE_INT) size_needed
20556 + desired_align - align))))
20558 /* It is possible that we copied enough so the main loop will not
20560 gcc_assert (size_needed > 1);
20561 if (label == NULL_RTX)
20562 label = gen_label_rtx ();
20563 emit_cmp_and_jump_insns (count_exp,
20564 GEN_INT (size_needed),
20565 LTU, 0, counter_mode (count_exp), 1, label);
20566 if (expected_size == -1
20567 || expected_size < (desired_align - align) / 2 + size_needed)
20568 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20570 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20573 if (label && size_needed == 1)
20575 emit_label (label);
20576 LABEL_NUSES (label) = 1;
20578 epilogue_size_needed = 1;
20580 else if (label == NULL_RTX)
20581 epilogue_size_needed = size_needed;
20583 /* Step 3: Main loop. */
20589 gcc_unreachable ();
20591 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20592 count_exp, QImode, 1, expected_size);
20595 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20596 count_exp, Pmode, 1, expected_size);
20598 case unrolled_loop:
20599 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20600 registers for 4 temporaries anyway. */
20601 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20602 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20605 case rep_prefix_8_byte:
20606 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20609 case rep_prefix_4_byte:
20610 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20613 case rep_prefix_1_byte:
20614 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20618 /* Adjust properly the offset of src and dest memory for aliasing. */
20619 if (CONST_INT_P (count_exp))
20621 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20622 (count / size_needed) * size_needed);
20623 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20624 (count / size_needed) * size_needed);
20628 src = change_address (src, BLKmode, srcreg);
20629 dst = change_address (dst, BLKmode, destreg);
20632 /* Step 4: Epilogue to copy the remaining bytes. */
20636 /* When the main loop is done, COUNT_EXP might hold original count,
20637 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20638 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20639 bytes. Compensate if needed. */
20641 if (size_needed < epilogue_size_needed)
20644 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20645 GEN_INT (size_needed - 1), count_exp, 1,
20647 if (tmp != count_exp)
20648 emit_move_insn (count_exp, tmp);
20650 emit_label (label);
20651 LABEL_NUSES (label) = 1;
20654 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20655 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20656 epilogue_size_needed);
20657 if (jump_around_label)
20658 emit_label (jump_around_label);
20662 /* Helper function for memcpy. For QImode value 0xXY produce
20663 0xXYXYXYXY of wide specified by MODE. This is essentially
20664 a * 0x10101010, but we can do slightly better than
20665 synth_mult by unwinding the sequence by hand on CPUs with
20668 promote_duplicated_reg (enum machine_mode mode, rtx val)
20670 enum machine_mode valmode = GET_MODE (val);
20672 int nops = mode == DImode ? 3 : 2;
20674 gcc_assert (mode == SImode || mode == DImode);
20675 if (val == const0_rtx)
20676 return copy_to_mode_reg (mode, const0_rtx);
20677 if (CONST_INT_P (val))
20679 HOST_WIDE_INT v = INTVAL (val) & 255;
20683 if (mode == DImode)
20684 v |= (v << 16) << 16;
20685 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20688 if (valmode == VOIDmode)
20690 if (valmode != QImode)
20691 val = gen_lowpart (QImode, val);
20692 if (mode == QImode)
20694 if (!TARGET_PARTIAL_REG_STALL)
20696 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20697 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20698 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20699 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20701 rtx reg = convert_modes (mode, QImode, val, true);
20702 tmp = promote_duplicated_reg (mode, const1_rtx);
20703 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20708 rtx reg = convert_modes (mode, QImode, val, true);
20710 if (!TARGET_PARTIAL_REG_STALL)
20711 if (mode == SImode)
20712 emit_insn (gen_movsi_insv_1 (reg, reg));
20714 emit_insn (gen_movdi_insv_1 (reg, reg));
20717 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20718 NULL, 1, OPTAB_DIRECT);
20720 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20722 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20723 NULL, 1, OPTAB_DIRECT);
20724 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20725 if (mode == SImode)
20727 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20728 NULL, 1, OPTAB_DIRECT);
20729 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20734 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20735 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20736 alignment from ALIGN to DESIRED_ALIGN. */
20738 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20743 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20744 promoted_val = promote_duplicated_reg (DImode, val);
20745 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20746 promoted_val = promote_duplicated_reg (SImode, val);
20747 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20748 promoted_val = promote_duplicated_reg (HImode, val);
20750 promoted_val = val;
20752 return promoted_val;
20755 /* Expand string clear operation (bzero). Use i386 string operations when
20756 profitable. See expand_movmem comment for explanation of individual
20757 steps performed. */
20759 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20760 rtx expected_align_exp, rtx expected_size_exp)
20765 rtx jump_around_label = NULL;
20766 HOST_WIDE_INT align = 1;
20767 unsigned HOST_WIDE_INT count = 0;
20768 HOST_WIDE_INT expected_size = -1;
20769 int size_needed = 0, epilogue_size_needed;
20770 int desired_align = 0, align_bytes = 0;
20771 enum stringop_alg alg;
20772 rtx promoted_val = NULL;
20773 bool force_loopy_epilogue = false;
20775 bool need_zero_guard = false;
20777 if (CONST_INT_P (align_exp))
20778 align = INTVAL (align_exp);
20779 /* i386 can do misaligned access on reasonably increased cost. */
20780 if (CONST_INT_P (expected_align_exp)
20781 && INTVAL (expected_align_exp) > align)
20782 align = INTVAL (expected_align_exp);
20783 if (CONST_INT_P (count_exp))
20784 count = expected_size = INTVAL (count_exp);
20785 if (CONST_INT_P (expected_size_exp) && count == 0)
20786 expected_size = INTVAL (expected_size_exp);
20788 /* Make sure we don't need to care about overflow later on. */
20789 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20792 /* Step 0: Decide on preferred algorithm, desired alignment and
20793 size of chunks to be copied by main loop. */
20795 alg = decide_alg (count, expected_size, true, &dynamic_check);
20796 desired_align = decide_alignment (align, alg, expected_size);
20798 if (!TARGET_ALIGN_STRINGOPS)
20799 align = desired_align;
20801 if (alg == libcall)
20803 gcc_assert (alg != no_stringop);
20805 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20806 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20811 gcc_unreachable ();
20813 need_zero_guard = true;
20814 size_needed = GET_MODE_SIZE (Pmode);
20816 case unrolled_loop:
20817 need_zero_guard = true;
20818 size_needed = GET_MODE_SIZE (Pmode) * 4;
20820 case rep_prefix_8_byte:
20823 case rep_prefix_4_byte:
20826 case rep_prefix_1_byte:
20830 need_zero_guard = true;
20834 epilogue_size_needed = size_needed;
20836 /* Step 1: Prologue guard. */
20838 /* Alignment code needs count to be in register. */
20839 if (CONST_INT_P (count_exp) && desired_align > align)
20841 if (INTVAL (count_exp) > desired_align
20842 && INTVAL (count_exp) > size_needed)
20845 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20846 if (align_bytes <= 0)
20849 align_bytes = desired_align - align_bytes;
20851 if (align_bytes == 0)
20853 enum machine_mode mode = SImode;
20854 if (TARGET_64BIT && (count & ~0xffffffff))
20856 count_exp = force_reg (mode, count_exp);
20859 /* Do the cheap promotion to allow better CSE across the
20860 main loop and epilogue (ie one load of the big constant in the
20861 front of all code. */
20862 if (CONST_INT_P (val_exp))
20863 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20864 desired_align, align);
20865 /* Ensure that alignment prologue won't copy past end of block. */
20866 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20868 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20869 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20870 Make sure it is power of 2. */
20871 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20873 /* To improve performance of small blocks, we jump around the VAL
20874 promoting mode. This mean that if the promoted VAL is not constant,
20875 we might not use it in the epilogue and have to use byte
20877 if (epilogue_size_needed > 2 && !promoted_val)
20878 force_loopy_epilogue = true;
20881 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20883 /* If main algorithm works on QImode, no epilogue is needed.
20884 For small sizes just don't align anything. */
20885 if (size_needed == 1)
20886 desired_align = align;
20893 label = gen_label_rtx ();
20894 emit_cmp_and_jump_insns (count_exp,
20895 GEN_INT (epilogue_size_needed),
20896 LTU, 0, counter_mode (count_exp), 1, label);
20897 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20898 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20900 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20903 if (dynamic_check != -1)
20905 rtx hot_label = gen_label_rtx ();
20906 jump_around_label = gen_label_rtx ();
20907 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20908 LEU, 0, counter_mode (count_exp), 1, hot_label);
20909 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20910 set_storage_via_libcall (dst, count_exp, val_exp, false);
20911 emit_jump (jump_around_label);
20912 emit_label (hot_label);
20915 /* Step 2: Alignment prologue. */
20917 /* Do the expensive promotion once we branched off the small blocks. */
20919 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20920 desired_align, align);
20921 gcc_assert (desired_align >= 1 && align >= 1);
20923 if (desired_align > align)
20925 if (align_bytes == 0)
20927 /* Except for the first move in epilogue, we no longer know
20928 constant offset in aliasing info. It don't seems to worth
20929 the pain to maintain it for the first move, so throw away
20931 dst = change_address (dst, BLKmode, destreg);
20932 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20937 /* If we know how many bytes need to be stored before dst is
20938 sufficiently aligned, maintain aliasing info accurately. */
20939 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20940 desired_align, align_bytes);
20941 count_exp = plus_constant (count_exp, -align_bytes);
20942 count -= align_bytes;
20944 if (need_zero_guard
20945 && (count < (unsigned HOST_WIDE_INT) size_needed
20946 || (align_bytes == 0
20947 && count < ((unsigned HOST_WIDE_INT) size_needed
20948 + desired_align - align))))
20950 /* It is possible that we copied enough so the main loop will not
20952 gcc_assert (size_needed > 1);
20953 if (label == NULL_RTX)
20954 label = gen_label_rtx ();
20955 emit_cmp_and_jump_insns (count_exp,
20956 GEN_INT (size_needed),
20957 LTU, 0, counter_mode (count_exp), 1, label);
20958 if (expected_size == -1
20959 || expected_size < (desired_align - align) / 2 + size_needed)
20960 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20962 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20965 if (label && size_needed == 1)
20967 emit_label (label);
20968 LABEL_NUSES (label) = 1;
20970 promoted_val = val_exp;
20971 epilogue_size_needed = 1;
20973 else if (label == NULL_RTX)
20974 epilogue_size_needed = size_needed;
20976 /* Step 3: Main loop. */
20982 gcc_unreachable ();
20984 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20985 count_exp, QImode, 1, expected_size);
20988 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20989 count_exp, Pmode, 1, expected_size);
20991 case unrolled_loop:
20992 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20993 count_exp, Pmode, 4, expected_size);
20995 case rep_prefix_8_byte:
20996 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20999 case rep_prefix_4_byte:
21000 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21003 case rep_prefix_1_byte:
21004 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21008 /* Adjust properly the offset of src and dest memory for aliasing. */
21009 if (CONST_INT_P (count_exp))
21010 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21011 (count / size_needed) * size_needed);
21013 dst = change_address (dst, BLKmode, destreg);
21015 /* Step 4: Epilogue to copy the remaining bytes. */
21019 /* When the main loop is done, COUNT_EXP might hold original count,
21020 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21021 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21022 bytes. Compensate if needed. */
21024 if (size_needed < epilogue_size_needed)
21027 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21028 GEN_INT (size_needed - 1), count_exp, 1,
21030 if (tmp != count_exp)
21031 emit_move_insn (count_exp, tmp);
21033 emit_label (label);
21034 LABEL_NUSES (label) = 1;
21037 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21039 if (force_loopy_epilogue)
21040 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21041 epilogue_size_needed);
21043 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21044 epilogue_size_needed);
21046 if (jump_around_label)
21047 emit_label (jump_around_label);
21051 /* Expand the appropriate insns for doing strlen if not just doing
21054 out = result, initialized with the start address
21055 align_rtx = alignment of the address.
21056 scratch = scratch register, initialized with the startaddress when
21057 not aligned, otherwise undefined
21059 This is just the body. It needs the initializations mentioned above and
21060 some address computing at the end. These things are done in i386.md. */
21063 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21067 rtx align_2_label = NULL_RTX;
21068 rtx align_3_label = NULL_RTX;
21069 rtx align_4_label = gen_label_rtx ();
21070 rtx end_0_label = gen_label_rtx ();
21072 rtx tmpreg = gen_reg_rtx (SImode);
21073 rtx scratch = gen_reg_rtx (SImode);
21077 if (CONST_INT_P (align_rtx))
21078 align = INTVAL (align_rtx);
21080 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21082 /* Is there a known alignment and is it less than 4? */
21085 rtx scratch1 = gen_reg_rtx (Pmode);
21086 emit_move_insn (scratch1, out);
21087 /* Is there a known alignment and is it not 2? */
21090 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21091 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21093 /* Leave just the 3 lower bits. */
21094 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21095 NULL_RTX, 0, OPTAB_WIDEN);
21097 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21098 Pmode, 1, align_4_label);
21099 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21100 Pmode, 1, align_2_label);
21101 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21102 Pmode, 1, align_3_label);
21106 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21107 check if is aligned to 4 - byte. */
21109 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21110 NULL_RTX, 0, OPTAB_WIDEN);
21112 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21113 Pmode, 1, align_4_label);
21116 mem = change_address (src, QImode, out);
21118 /* Now compare the bytes. */
21120 /* Compare the first n unaligned byte on a byte per byte basis. */
21121 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21122 QImode, 1, end_0_label);
21124 /* Increment the address. */
21125 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21127 /* Not needed with an alignment of 2 */
21130 emit_label (align_2_label);
21132 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21135 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21137 emit_label (align_3_label);
21140 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21143 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21146 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21147 align this loop. It gives only huge programs, but does not help to
21149 emit_label (align_4_label);
21151 mem = change_address (src, SImode, out);
21152 emit_move_insn (scratch, mem);
21153 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21155 /* This formula yields a nonzero result iff one of the bytes is zero.
21156 This saves three branches inside loop and many cycles. */
21158 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21159 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21160 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21161 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21162 gen_int_mode (0x80808080, SImode)));
21163 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21168 rtx reg = gen_reg_rtx (SImode);
21169 rtx reg2 = gen_reg_rtx (Pmode);
21170 emit_move_insn (reg, tmpreg);
21171 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21173 /* If zero is not in the first two bytes, move two bytes forward. */
21174 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21175 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21176 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21177 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21178 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21181 /* Emit lea manually to avoid clobbering of flags. */
21182 emit_insn (gen_rtx_SET (SImode, reg2,
21183 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21185 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21186 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21187 emit_insn (gen_rtx_SET (VOIDmode, out,
21188 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21194 rtx end_2_label = gen_label_rtx ();
21195 /* Is zero in the first two bytes? */
21197 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21198 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21199 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21200 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21201 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21203 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21204 JUMP_LABEL (tmp) = end_2_label;
21206 /* Not in the first two. Move two bytes forward. */
21207 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21208 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21210 emit_label (end_2_label);
21214 /* Avoid branch in fixing the byte. */
21215 tmpreg = gen_lowpart (QImode, tmpreg);
21216 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21217 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21218 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21219 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21221 emit_label (end_0_label);
21224 /* Expand strlen. */
21227 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21229 rtx addr, scratch1, scratch2, scratch3, scratch4;
21231 /* The generic case of strlen expander is long. Avoid it's
21232 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21234 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21235 && !TARGET_INLINE_ALL_STRINGOPS
21236 && !optimize_insn_for_size_p ()
21237 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21240 addr = force_reg (Pmode, XEXP (src, 0));
21241 scratch1 = gen_reg_rtx (Pmode);
21243 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21244 && !optimize_insn_for_size_p ())
21246 /* Well it seems that some optimizer does not combine a call like
21247 foo(strlen(bar), strlen(bar));
21248 when the move and the subtraction is done here. It does calculate
21249 the length just once when these instructions are done inside of
21250 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21251 often used and I use one fewer register for the lifetime of
21252 output_strlen_unroll() this is better. */
21254 emit_move_insn (out, addr);
21256 ix86_expand_strlensi_unroll_1 (out, src, align);
21258 /* strlensi_unroll_1 returns the address of the zero at the end of
21259 the string, like memchr(), so compute the length by subtracting
21260 the start address. */
21261 emit_insn (ix86_gen_sub3 (out, out, addr));
21267 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21268 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21271 scratch2 = gen_reg_rtx (Pmode);
21272 scratch3 = gen_reg_rtx (Pmode);
21273 scratch4 = force_reg (Pmode, constm1_rtx);
21275 emit_move_insn (scratch3, addr);
21276 eoschar = force_reg (QImode, eoschar);
21278 src = replace_equiv_address_nv (src, scratch3);
21280 /* If .md starts supporting :P, this can be done in .md. */
21281 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21282 scratch4), UNSPEC_SCAS);
21283 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21284 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21285 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21290 /* For given symbol (function) construct code to compute address of it's PLT
21291 entry in large x86-64 PIC model. */
21293 construct_plt_address (rtx symbol)
21295 rtx tmp = gen_reg_rtx (Pmode);
21296 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21298 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21299 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21301 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21302 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21307 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21309 rtx pop, int sibcall)
21311 rtx use = NULL, call;
21313 if (pop == const0_rtx)
21315 gcc_assert (!TARGET_64BIT || !pop);
21317 if (TARGET_MACHO && !TARGET_64BIT)
21320 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21321 fnaddr = machopic_indirect_call_target (fnaddr);
21326 /* Static functions and indirect calls don't need the pic register. */
21327 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21328 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21329 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21330 use_reg (&use, pic_offset_table_rtx);
21333 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21335 rtx al = gen_rtx_REG (QImode, AX_REG);
21336 emit_move_insn (al, callarg2);
21337 use_reg (&use, al);
21340 if (ix86_cmodel == CM_LARGE_PIC
21342 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21343 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21344 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21346 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21347 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21349 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21350 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21353 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21355 call = gen_rtx_SET (VOIDmode, retval, call);
21358 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21359 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21360 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21363 && ix86_cfun_abi () == MS_ABI
21364 && (!callarg2 || INTVAL (callarg2) != -2))
21366 /* We need to represent that SI and DI registers are clobbered
21368 static int clobbered_registers[] = {
21369 XMM6_REG, XMM7_REG, XMM8_REG,
21370 XMM9_REG, XMM10_REG, XMM11_REG,
21371 XMM12_REG, XMM13_REG, XMM14_REG,
21372 XMM15_REG, SI_REG, DI_REG
21375 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21376 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21377 UNSPEC_MS_TO_SYSV_CALL);
21381 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21382 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21385 (SSE_REGNO_P (clobbered_registers[i])
21387 clobbered_registers[i]));
21389 call = gen_rtx_PARALLEL (VOIDmode,
21390 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21394 /* Emit vzeroupper if needed. */
21395 if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
21398 cfun->machine->use_vzeroupper_p = 1;
21399 if (cfun->machine->callee_pass_avx256_p)
21401 if (cfun->machine->callee_return_avx256_p)
21402 avx256 = callee_return_pass_avx256;
21404 avx256 = callee_pass_avx256;
21406 else if (cfun->machine->callee_return_avx256_p)
21407 avx256 = callee_return_avx256;
21409 avx256 = call_no_avx256;
21410 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21413 call = emit_call_insn (call);
21415 CALL_INSN_FUNCTION_USAGE (call) = use;
21421 /* Clear stack slot assignments remembered from previous functions.
21422 This is called from INIT_EXPANDERS once before RTL is emitted for each
21425 static struct machine_function *
21426 ix86_init_machine_status (void)
21428 struct machine_function *f;
21430 f = ggc_alloc_cleared_machine_function ();
21431 f->use_fast_prologue_epilogue_nregs = -1;
21432 f->tls_descriptor_call_expanded_p = 0;
21433 f->call_abi = ix86_abi;
21438 /* Return a MEM corresponding to a stack slot with mode MODE.
21439 Allocate a new slot if necessary.
21441 The RTL for a function can have several slots available: N is
21442 which slot to use. */
21445 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21447 struct stack_local_entry *s;
21449 gcc_assert (n < MAX_386_STACK_LOCALS);
21451 /* Virtual slot is valid only before vregs are instantiated. */
21452 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21454 for (s = ix86_stack_locals; s; s = s->next)
21455 if (s->mode == mode && s->n == n)
21456 return copy_rtx (s->rtl);
21458 s = ggc_alloc_stack_local_entry ();
21461 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21463 s->next = ix86_stack_locals;
21464 ix86_stack_locals = s;
21468 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21470 static GTY(()) rtx ix86_tls_symbol;
21472 ix86_tls_get_addr (void)
21475 if (!ix86_tls_symbol)
21477 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
21478 (TARGET_ANY_GNU_TLS
21480 ? "___tls_get_addr"
21481 : "__tls_get_addr");
21484 return ix86_tls_symbol;
21487 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21489 static GTY(()) rtx ix86_tls_module_base_symbol;
21491 ix86_tls_module_base (void)
21494 if (!ix86_tls_module_base_symbol)
21496 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
21497 "_TLS_MODULE_BASE_");
21498 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
21499 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
21502 return ix86_tls_module_base_symbol;
21505 /* Calculate the length of the memory address in the instruction
21506 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21509 memory_address_length (rtx addr)
21511 struct ix86_address parts;
21512 rtx base, index, disp;
21516 if (GET_CODE (addr) == PRE_DEC
21517 || GET_CODE (addr) == POST_INC
21518 || GET_CODE (addr) == PRE_MODIFY
21519 || GET_CODE (addr) == POST_MODIFY)
21522 ok = ix86_decompose_address (addr, &parts);
21525 if (parts.base && GET_CODE (parts.base) == SUBREG)
21526 parts.base = SUBREG_REG (parts.base);
21527 if (parts.index && GET_CODE (parts.index) == SUBREG)
21528 parts.index = SUBREG_REG (parts.index);
21531 index = parts.index;
21536 - esp as the base always wants an index,
21537 - ebp as the base always wants a displacement,
21538 - r12 as the base always wants an index,
21539 - r13 as the base always wants a displacement. */
21541 /* Register Indirect. */
21542 if (base && !index && !disp)
21544 /* esp (for its index) and ebp (for its displacement) need
21545 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21548 && (addr == arg_pointer_rtx
21549 || addr == frame_pointer_rtx
21550 || REGNO (addr) == SP_REG
21551 || REGNO (addr) == BP_REG
21552 || REGNO (addr) == R12_REG
21553 || REGNO (addr) == R13_REG))
21557 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21558 is not disp32, but disp32(%rip), so for disp32
21559 SIB byte is needed, unless print_operand_address
21560 optimizes it into disp32(%rip) or (%rip) is implied
21562 else if (disp && !base && !index)
21569 if (GET_CODE (disp) == CONST)
21570 symbol = XEXP (disp, 0);
21571 if (GET_CODE (symbol) == PLUS
21572 && CONST_INT_P (XEXP (symbol, 1)))
21573 symbol = XEXP (symbol, 0);
21575 if (GET_CODE (symbol) != LABEL_REF
21576 && (GET_CODE (symbol) != SYMBOL_REF
21577 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21578 && (GET_CODE (symbol) != UNSPEC
21579 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21580 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21587 /* Find the length of the displacement constant. */
21590 if (base && satisfies_constraint_K (disp))
21595 /* ebp always wants a displacement. Similarly r13. */
21596 else if (base && REG_P (base)
21597 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21600 /* An index requires the two-byte modrm form.... */
21602 /* ...like esp (or r12), which always wants an index. */
21603 || base == arg_pointer_rtx
21604 || base == frame_pointer_rtx
21605 || (base && REG_P (base)
21606 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21623 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21624 is set, expect that insn have 8bit immediate alternative. */
21626 ix86_attr_length_immediate_default (rtx insn, int shortform)
21630 extract_insn_cached (insn);
21631 for (i = recog_data.n_operands - 1; i >= 0; --i)
21632 if (CONSTANT_P (recog_data.operand[i]))
21634 enum attr_mode mode = get_attr_mode (insn);
21637 if (shortform && CONST_INT_P (recog_data.operand[i]))
21639 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21646 ival = trunc_int_for_mode (ival, HImode);
21649 ival = trunc_int_for_mode (ival, SImode);
21654 if (IN_RANGE (ival, -128, 127))
21671 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21676 fatal_insn ("unknown insn mode", insn);
21681 /* Compute default value for "length_address" attribute. */
21683 ix86_attr_length_address_default (rtx insn)
21687 if (get_attr_type (insn) == TYPE_LEA)
21689 rtx set = PATTERN (insn), addr;
21691 if (GET_CODE (set) == PARALLEL)
21692 set = XVECEXP (set, 0, 0);
21694 gcc_assert (GET_CODE (set) == SET);
21696 addr = SET_SRC (set);
21697 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21699 if (GET_CODE (addr) == ZERO_EXTEND)
21700 addr = XEXP (addr, 0);
21701 if (GET_CODE (addr) == SUBREG)
21702 addr = SUBREG_REG (addr);
21705 return memory_address_length (addr);
21708 extract_insn_cached (insn);
21709 for (i = recog_data.n_operands - 1; i >= 0; --i)
21710 if (MEM_P (recog_data.operand[i]))
21712 constrain_operands_cached (reload_completed);
21713 if (which_alternative != -1)
21715 const char *constraints = recog_data.constraints[i];
21716 int alt = which_alternative;
21718 while (*constraints == '=' || *constraints == '+')
21721 while (*constraints++ != ',')
21723 /* Skip ignored operands. */
21724 if (*constraints == 'X')
21727 return memory_address_length (XEXP (recog_data.operand[i], 0));
21732 /* Compute default value for "length_vex" attribute. It includes
21733 2 or 3 byte VEX prefix and 1 opcode byte. */
21736 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21741 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21742 byte VEX prefix. */
21743 if (!has_0f_opcode || has_vex_w)
21746 /* We can always use 2 byte VEX prefix in 32bit. */
21750 extract_insn_cached (insn);
21752 for (i = recog_data.n_operands - 1; i >= 0; --i)
21753 if (REG_P (recog_data.operand[i]))
21755 /* REX.W bit uses 3 byte VEX prefix. */
21756 if (GET_MODE (recog_data.operand[i]) == DImode
21757 && GENERAL_REG_P (recog_data.operand[i]))
21762 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21763 if (MEM_P (recog_data.operand[i])
21764 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21771 /* Return the maximum number of instructions a cpu can issue. */
21774 ix86_issue_rate (void)
21778 case PROCESSOR_PENTIUM:
21779 case PROCESSOR_ATOM:
21783 case PROCESSOR_PENTIUMPRO:
21784 case PROCESSOR_PENTIUM4:
21785 case PROCESSOR_ATHLON:
21787 case PROCESSOR_AMDFAM10:
21788 case PROCESSOR_NOCONA:
21789 case PROCESSOR_GENERIC32:
21790 case PROCESSOR_GENERIC64:
21791 case PROCESSOR_BDVER1:
21794 case PROCESSOR_CORE2:
21802 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21803 by DEP_INSN and nothing set by DEP_INSN. */
21806 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21810 /* Simplify the test for uninteresting insns. */
21811 if (insn_type != TYPE_SETCC
21812 && insn_type != TYPE_ICMOV
21813 && insn_type != TYPE_FCMOV
21814 && insn_type != TYPE_IBR)
21817 if ((set = single_set (dep_insn)) != 0)
21819 set = SET_DEST (set);
21822 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21823 && XVECLEN (PATTERN (dep_insn), 0) == 2
21824 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21825 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21827 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21828 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21833 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21836 /* This test is true if the dependent insn reads the flags but
21837 not any other potentially set register. */
21838 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21841 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21847 /* Return true iff USE_INSN has a memory address with operands set by
21851 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21854 extract_insn_cached (use_insn);
21855 for (i = recog_data.n_operands - 1; i >= 0; --i)
21856 if (MEM_P (recog_data.operand[i]))
21858 rtx addr = XEXP (recog_data.operand[i], 0);
21859 return modified_in_p (addr, set_insn) != 0;
21865 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21867 enum attr_type insn_type, dep_insn_type;
21868 enum attr_memory memory;
21870 int dep_insn_code_number;
21872 /* Anti and output dependencies have zero cost on all CPUs. */
21873 if (REG_NOTE_KIND (link) != 0)
21876 dep_insn_code_number = recog_memoized (dep_insn);
21878 /* If we can't recognize the insns, we can't really do anything. */
21879 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21882 insn_type = get_attr_type (insn);
21883 dep_insn_type = get_attr_type (dep_insn);
21887 case PROCESSOR_PENTIUM:
21888 /* Address Generation Interlock adds a cycle of latency. */
21889 if (insn_type == TYPE_LEA)
21891 rtx addr = PATTERN (insn);
21893 if (GET_CODE (addr) == PARALLEL)
21894 addr = XVECEXP (addr, 0, 0);
21896 gcc_assert (GET_CODE (addr) == SET);
21898 addr = SET_SRC (addr);
21899 if (modified_in_p (addr, dep_insn))
21902 else if (ix86_agi_dependent (dep_insn, insn))
21905 /* ??? Compares pair with jump/setcc. */
21906 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21909 /* Floating point stores require value to be ready one cycle earlier. */
21910 if (insn_type == TYPE_FMOV
21911 && get_attr_memory (insn) == MEMORY_STORE
21912 && !ix86_agi_dependent (dep_insn, insn))
21916 case PROCESSOR_PENTIUMPRO:
21917 memory = get_attr_memory (insn);
21919 /* INT->FP conversion is expensive. */
21920 if (get_attr_fp_int_src (dep_insn))
21923 /* There is one cycle extra latency between an FP op and a store. */
21924 if (insn_type == TYPE_FMOV
21925 && (set = single_set (dep_insn)) != NULL_RTX
21926 && (set2 = single_set (insn)) != NULL_RTX
21927 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21928 && MEM_P (SET_DEST (set2)))
21931 /* Show ability of reorder buffer to hide latency of load by executing
21932 in parallel with previous instruction in case
21933 previous instruction is not needed to compute the address. */
21934 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21935 && !ix86_agi_dependent (dep_insn, insn))
21937 /* Claim moves to take one cycle, as core can issue one load
21938 at time and the next load can start cycle later. */
21939 if (dep_insn_type == TYPE_IMOV
21940 || dep_insn_type == TYPE_FMOV)
21948 memory = get_attr_memory (insn);
21950 /* The esp dependency is resolved before the instruction is really
21952 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21953 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21956 /* INT->FP conversion is expensive. */
21957 if (get_attr_fp_int_src (dep_insn))
21960 /* Show ability of reorder buffer to hide latency of load by executing
21961 in parallel with previous instruction in case
21962 previous instruction is not needed to compute the address. */
21963 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21964 && !ix86_agi_dependent (dep_insn, insn))
21966 /* Claim moves to take one cycle, as core can issue one load
21967 at time and the next load can start cycle later. */
21968 if (dep_insn_type == TYPE_IMOV
21969 || dep_insn_type == TYPE_FMOV)
21978 case PROCESSOR_ATHLON:
21980 case PROCESSOR_AMDFAM10:
21981 case PROCESSOR_BDVER1:
21982 case PROCESSOR_ATOM:
21983 case PROCESSOR_GENERIC32:
21984 case PROCESSOR_GENERIC64:
21985 memory = get_attr_memory (insn);
21987 /* Show ability of reorder buffer to hide latency of load by executing
21988 in parallel with previous instruction in case
21989 previous instruction is not needed to compute the address. */
21990 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21991 && !ix86_agi_dependent (dep_insn, insn))
21993 enum attr_unit unit = get_attr_unit (insn);
21996 /* Because of the difference between the length of integer and
21997 floating unit pipeline preparation stages, the memory operands
21998 for floating point are cheaper.
22000 ??? For Athlon it the difference is most probably 2. */
22001 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22004 loadcost = TARGET_ATHLON ? 2 : 0;
22006 if (cost >= loadcost)
22019 /* How many alternative schedules to try. This should be as wide as the
22020 scheduling freedom in the DFA, but no wider. Making this value too
22021 large results extra work for the scheduler. */
22024 ia32_multipass_dfa_lookahead (void)
22028 case PROCESSOR_PENTIUM:
22031 case PROCESSOR_PENTIUMPRO:
22041 /* Compute the alignment given to a constant that is being placed in memory.
22042 EXP is the constant and ALIGN is the alignment that the object would
22044 The value of this function is used instead of that alignment to align
22048 ix86_constant_alignment (tree exp, int align)
22050 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22051 || TREE_CODE (exp) == INTEGER_CST)
22053 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22055 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22058 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22059 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22060 return BITS_PER_WORD;
22065 /* Compute the alignment for a static variable.
22066 TYPE is the data type, and ALIGN is the alignment that
22067 the object would ordinarily have. The value of this function is used
22068 instead of that alignment to align the object. */
22071 ix86_data_alignment (tree type, int align)
22073 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22075 if (AGGREGATE_TYPE_P (type)
22076 && TYPE_SIZE (type)
22077 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22078 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22079 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22080 && align < max_align)
22083 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22084 to 16byte boundary. */
22087 if (AGGREGATE_TYPE_P (type)
22088 && TYPE_SIZE (type)
22089 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22090 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22091 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22095 if (TREE_CODE (type) == ARRAY_TYPE)
22097 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22099 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22102 else if (TREE_CODE (type) == COMPLEX_TYPE)
22105 if (TYPE_MODE (type) == DCmode && align < 64)
22107 if ((TYPE_MODE (type) == XCmode
22108 || TYPE_MODE (type) == TCmode) && align < 128)
22111 else if ((TREE_CODE (type) == RECORD_TYPE
22112 || TREE_CODE (type) == UNION_TYPE
22113 || TREE_CODE (type) == QUAL_UNION_TYPE)
22114 && TYPE_FIELDS (type))
22116 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22118 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22121 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22122 || TREE_CODE (type) == INTEGER_TYPE)
22124 if (TYPE_MODE (type) == DFmode && align < 64)
22126 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22133 /* Compute the alignment for a local variable or a stack slot. EXP is
22134 the data type or decl itself, MODE is the widest mode available and
22135 ALIGN is the alignment that the object would ordinarily have. The
22136 value of this macro is used instead of that alignment to align the
22140 ix86_local_alignment (tree exp, enum machine_mode mode,
22141 unsigned int align)
22145 if (exp && DECL_P (exp))
22147 type = TREE_TYPE (exp);
22156 if (use_avx256_p (mode, type))
22157 cfun->machine->use_avx256_p = true;
22159 /* Don't do dynamic stack realignment for long long objects with
22160 -mpreferred-stack-boundary=2. */
22163 && ix86_preferred_stack_boundary < 64
22164 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22165 && (!type || !TYPE_USER_ALIGN (type))
22166 && (!decl || !DECL_USER_ALIGN (decl)))
22169 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22170 register in MODE. We will return the largest alignment of XF
22174 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22175 align = GET_MODE_ALIGNMENT (DFmode);
22179 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22180 to 16byte boundary. Exact wording is:
22182 An array uses the same alignment as its elements, except that a local or
22183 global array variable of length at least 16 bytes or
22184 a C99 variable-length array variable always has alignment of at least 16 bytes.
22186 This was added to allow use of aligned SSE instructions at arrays. This
22187 rule is meant for static storage (where compiler can not do the analysis
22188 by itself). We follow it for automatic variables only when convenient.
22189 We fully control everything in the function compiled and functions from
22190 other unit can not rely on the alignment.
22192 Exclude va_list type. It is the common case of local array where
22193 we can not benefit from the alignment. */
22194 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22197 if (AGGREGATE_TYPE_P (type)
22198 && (TYPE_MAIN_VARIANT (type)
22199 != TYPE_MAIN_VARIANT (va_list_type_node))
22200 && TYPE_SIZE (type)
22201 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22202 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22203 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22206 if (TREE_CODE (type) == ARRAY_TYPE)
22208 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22210 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22213 else if (TREE_CODE (type) == COMPLEX_TYPE)
22215 if (TYPE_MODE (type) == DCmode && align < 64)
22217 if ((TYPE_MODE (type) == XCmode
22218 || TYPE_MODE (type) == TCmode) && align < 128)
22221 else if ((TREE_CODE (type) == RECORD_TYPE
22222 || TREE_CODE (type) == UNION_TYPE
22223 || TREE_CODE (type) == QUAL_UNION_TYPE)
22224 && TYPE_FIELDS (type))
22226 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22228 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22231 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22232 || TREE_CODE (type) == INTEGER_TYPE)
22235 if (TYPE_MODE (type) == DFmode && align < 64)
22237 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22243 /* Compute the minimum required alignment for dynamic stack realignment
22244 purposes for a local variable, parameter or a stack slot. EXP is
22245 the data type or decl itself, MODE is its mode and ALIGN is the
22246 alignment that the object would ordinarily have. */
22249 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22250 unsigned int align)
22254 if (exp && DECL_P (exp))
22256 type = TREE_TYPE (exp);
22265 if (use_avx256_p (mode, type))
22266 cfun->machine->use_avx256_p = true;
22268 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22271 /* Don't do dynamic stack realignment for long long objects with
22272 -mpreferred-stack-boundary=2. */
22273 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22274 && (!type || !TYPE_USER_ALIGN (type))
22275 && (!decl || !DECL_USER_ALIGN (decl)))
22281 /* Find a location for the static chain incoming to a nested function.
22282 This is a register, unless all free registers are used by arguments. */
22285 ix86_static_chain (const_tree fndecl, bool incoming_p)
22289 if (!DECL_STATIC_CHAIN (fndecl))
22294 /* We always use R10 in 64-bit mode. */
22300 /* By default in 32-bit mode we use ECX to pass the static chain. */
22303 fntype = TREE_TYPE (fndecl);
22304 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
22306 /* Fastcall functions use ecx/edx for arguments, which leaves
22307 us with EAX for the static chain. */
22310 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
22312 /* Thiscall functions use ecx for arguments, which leaves
22313 us with EAX for the static chain. */
22316 else if (ix86_function_regparm (fntype, fndecl) == 3)
22318 /* For regparm 3, we have no free call-clobbered registers in
22319 which to store the static chain. In order to implement this,
22320 we have the trampoline push the static chain to the stack.
22321 However, we can't push a value below the return address when
22322 we call the nested function directly, so we have to use an
22323 alternate entry point. For this we use ESI, and have the
22324 alternate entry point push ESI, so that things appear the
22325 same once we're executing the nested function. */
22328 if (fndecl == current_function_decl)
22329 ix86_static_chain_on_stack = true;
22330 return gen_frame_mem (SImode,
22331 plus_constant (arg_pointer_rtx, -8));
22337 return gen_rtx_REG (Pmode, regno);
22340 /* Emit RTL insns to initialize the variable parts of a trampoline.
22341 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22342 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22343 to be passed to the target function. */
22346 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22350 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22357 /* Depending on the static chain location, either load a register
22358 with a constant, or push the constant to the stack. All of the
22359 instructions are the same size. */
22360 chain = ix86_static_chain (fndecl, true);
22363 if (REGNO (chain) == CX_REG)
22365 else if (REGNO (chain) == AX_REG)
22368 gcc_unreachable ();
22373 mem = adjust_address (m_tramp, QImode, 0);
22374 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22376 mem = adjust_address (m_tramp, SImode, 1);
22377 emit_move_insn (mem, chain_value);
22379 /* Compute offset from the end of the jmp to the target function.
22380 In the case in which the trampoline stores the static chain on
22381 the stack, we need to skip the first insn which pushes the
22382 (call-saved) register static chain; this push is 1 byte. */
22383 disp = expand_binop (SImode, sub_optab, fnaddr,
22384 plus_constant (XEXP (m_tramp, 0),
22385 MEM_P (chain) ? 9 : 10),
22386 NULL_RTX, 1, OPTAB_DIRECT);
22388 mem = adjust_address (m_tramp, QImode, 5);
22389 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22391 mem = adjust_address (m_tramp, SImode, 6);
22392 emit_move_insn (mem, disp);
22398 /* Load the function address to r11. Try to load address using
22399 the shorter movl instead of movabs. We may want to support
22400 movq for kernel mode, but kernel does not use trampolines at
22402 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22404 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22406 mem = adjust_address (m_tramp, HImode, offset);
22407 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22409 mem = adjust_address (m_tramp, SImode, offset + 2);
22410 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22415 mem = adjust_address (m_tramp, HImode, offset);
22416 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22418 mem = adjust_address (m_tramp, DImode, offset + 2);
22419 emit_move_insn (mem, fnaddr);
22423 /* Load static chain using movabs to r10. */
22424 mem = adjust_address (m_tramp, HImode, offset);
22425 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
22427 mem = adjust_address (m_tramp, DImode, offset + 2);
22428 emit_move_insn (mem, chain_value);
22431 /* Jump to r11; the last (unused) byte is a nop, only there to
22432 pad the write out to a single 32-bit store. */
22433 mem = adjust_address (m_tramp, SImode, offset);
22434 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22437 gcc_assert (offset <= TRAMPOLINE_SIZE);
22440 #ifdef ENABLE_EXECUTE_STACK
22441 #ifdef CHECK_EXECUTE_STACK_ENABLED
22442 if (CHECK_EXECUTE_STACK_ENABLED)
22444 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22445 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22449 /* The following file contains several enumerations and data structures
22450 built from the definitions in i386-builtin-types.def. */
22452 #include "i386-builtin-types.inc"
22454 /* Table for the ix86 builtin non-function types. */
22455 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22457 /* Retrieve an element from the above table, building some of
22458 the types lazily. */
22461 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22463 unsigned int index;
22466 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22468 type = ix86_builtin_type_tab[(int) tcode];
22472 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22473 if (tcode <= IX86_BT_LAST_VECT)
22475 enum machine_mode mode;
22477 index = tcode - IX86_BT_LAST_PRIM - 1;
22478 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22479 mode = ix86_builtin_type_vect_mode[index];
22481 type = build_vector_type_for_mode (itype, mode);
22487 index = tcode - IX86_BT_LAST_VECT - 1;
22488 if (tcode <= IX86_BT_LAST_PTR)
22489 quals = TYPE_UNQUALIFIED;
22491 quals = TYPE_QUAL_CONST;
22493 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22494 if (quals != TYPE_UNQUALIFIED)
22495 itype = build_qualified_type (itype, quals);
22497 type = build_pointer_type (itype);
22500 ix86_builtin_type_tab[(int) tcode] = type;
22504 /* Table for the ix86 builtin function types. */
22505 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
22507 /* Retrieve an element from the above table, building some of
22508 the types lazily. */
22511 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
22515 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
22517 type = ix86_builtin_func_type_tab[(int) tcode];
22521 if (tcode <= IX86_BT_LAST_FUNC)
22523 unsigned start = ix86_builtin_func_start[(int) tcode];
22524 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
22525 tree rtype, atype, args = void_list_node;
22528 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
22529 for (i = after - 1; i > start; --i)
22531 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22532 args = tree_cons (NULL, atype, args);
22535 type = build_function_type (rtype, args);
22539 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
22540 enum ix86_builtin_func_type icode;
22542 icode = ix86_builtin_func_alias_base[index];
22543 type = ix86_get_builtin_func_type (icode);
22546 ix86_builtin_func_type_tab[(int) tcode] = type;
22551 /* Codes for all the SSE/MMX builtins. */
22554 IX86_BUILTIN_ADDPS,
22555 IX86_BUILTIN_ADDSS,
22556 IX86_BUILTIN_DIVPS,
22557 IX86_BUILTIN_DIVSS,
22558 IX86_BUILTIN_MULPS,
22559 IX86_BUILTIN_MULSS,
22560 IX86_BUILTIN_SUBPS,
22561 IX86_BUILTIN_SUBSS,
22563 IX86_BUILTIN_CMPEQPS,
22564 IX86_BUILTIN_CMPLTPS,
22565 IX86_BUILTIN_CMPLEPS,
22566 IX86_BUILTIN_CMPGTPS,
22567 IX86_BUILTIN_CMPGEPS,
22568 IX86_BUILTIN_CMPNEQPS,
22569 IX86_BUILTIN_CMPNLTPS,
22570 IX86_BUILTIN_CMPNLEPS,
22571 IX86_BUILTIN_CMPNGTPS,
22572 IX86_BUILTIN_CMPNGEPS,
22573 IX86_BUILTIN_CMPORDPS,
22574 IX86_BUILTIN_CMPUNORDPS,
22575 IX86_BUILTIN_CMPEQSS,
22576 IX86_BUILTIN_CMPLTSS,
22577 IX86_BUILTIN_CMPLESS,
22578 IX86_BUILTIN_CMPNEQSS,
22579 IX86_BUILTIN_CMPNLTSS,
22580 IX86_BUILTIN_CMPNLESS,
22581 IX86_BUILTIN_CMPNGTSS,
22582 IX86_BUILTIN_CMPNGESS,
22583 IX86_BUILTIN_CMPORDSS,
22584 IX86_BUILTIN_CMPUNORDSS,
22586 IX86_BUILTIN_COMIEQSS,
22587 IX86_BUILTIN_COMILTSS,
22588 IX86_BUILTIN_COMILESS,
22589 IX86_BUILTIN_COMIGTSS,
22590 IX86_BUILTIN_COMIGESS,
22591 IX86_BUILTIN_COMINEQSS,
22592 IX86_BUILTIN_UCOMIEQSS,
22593 IX86_BUILTIN_UCOMILTSS,
22594 IX86_BUILTIN_UCOMILESS,
22595 IX86_BUILTIN_UCOMIGTSS,
22596 IX86_BUILTIN_UCOMIGESS,
22597 IX86_BUILTIN_UCOMINEQSS,
22599 IX86_BUILTIN_CVTPI2PS,
22600 IX86_BUILTIN_CVTPS2PI,
22601 IX86_BUILTIN_CVTSI2SS,
22602 IX86_BUILTIN_CVTSI642SS,
22603 IX86_BUILTIN_CVTSS2SI,
22604 IX86_BUILTIN_CVTSS2SI64,
22605 IX86_BUILTIN_CVTTPS2PI,
22606 IX86_BUILTIN_CVTTSS2SI,
22607 IX86_BUILTIN_CVTTSS2SI64,
22609 IX86_BUILTIN_MAXPS,
22610 IX86_BUILTIN_MAXSS,
22611 IX86_BUILTIN_MINPS,
22612 IX86_BUILTIN_MINSS,
22614 IX86_BUILTIN_LOADUPS,
22615 IX86_BUILTIN_STOREUPS,
22616 IX86_BUILTIN_MOVSS,
22618 IX86_BUILTIN_MOVHLPS,
22619 IX86_BUILTIN_MOVLHPS,
22620 IX86_BUILTIN_LOADHPS,
22621 IX86_BUILTIN_LOADLPS,
22622 IX86_BUILTIN_STOREHPS,
22623 IX86_BUILTIN_STORELPS,
22625 IX86_BUILTIN_MASKMOVQ,
22626 IX86_BUILTIN_MOVMSKPS,
22627 IX86_BUILTIN_PMOVMSKB,
22629 IX86_BUILTIN_MOVNTPS,
22630 IX86_BUILTIN_MOVNTQ,
22632 IX86_BUILTIN_LOADDQU,
22633 IX86_BUILTIN_STOREDQU,
22635 IX86_BUILTIN_PACKSSWB,
22636 IX86_BUILTIN_PACKSSDW,
22637 IX86_BUILTIN_PACKUSWB,
22639 IX86_BUILTIN_PADDB,
22640 IX86_BUILTIN_PADDW,
22641 IX86_BUILTIN_PADDD,
22642 IX86_BUILTIN_PADDQ,
22643 IX86_BUILTIN_PADDSB,
22644 IX86_BUILTIN_PADDSW,
22645 IX86_BUILTIN_PADDUSB,
22646 IX86_BUILTIN_PADDUSW,
22647 IX86_BUILTIN_PSUBB,
22648 IX86_BUILTIN_PSUBW,
22649 IX86_BUILTIN_PSUBD,
22650 IX86_BUILTIN_PSUBQ,
22651 IX86_BUILTIN_PSUBSB,
22652 IX86_BUILTIN_PSUBSW,
22653 IX86_BUILTIN_PSUBUSB,
22654 IX86_BUILTIN_PSUBUSW,
22657 IX86_BUILTIN_PANDN,
22661 IX86_BUILTIN_PAVGB,
22662 IX86_BUILTIN_PAVGW,
22664 IX86_BUILTIN_PCMPEQB,
22665 IX86_BUILTIN_PCMPEQW,
22666 IX86_BUILTIN_PCMPEQD,
22667 IX86_BUILTIN_PCMPGTB,
22668 IX86_BUILTIN_PCMPGTW,
22669 IX86_BUILTIN_PCMPGTD,
22671 IX86_BUILTIN_PMADDWD,
22673 IX86_BUILTIN_PMAXSW,
22674 IX86_BUILTIN_PMAXUB,
22675 IX86_BUILTIN_PMINSW,
22676 IX86_BUILTIN_PMINUB,
22678 IX86_BUILTIN_PMULHUW,
22679 IX86_BUILTIN_PMULHW,
22680 IX86_BUILTIN_PMULLW,
22682 IX86_BUILTIN_PSADBW,
22683 IX86_BUILTIN_PSHUFW,
22685 IX86_BUILTIN_PSLLW,
22686 IX86_BUILTIN_PSLLD,
22687 IX86_BUILTIN_PSLLQ,
22688 IX86_BUILTIN_PSRAW,
22689 IX86_BUILTIN_PSRAD,
22690 IX86_BUILTIN_PSRLW,
22691 IX86_BUILTIN_PSRLD,
22692 IX86_BUILTIN_PSRLQ,
22693 IX86_BUILTIN_PSLLWI,
22694 IX86_BUILTIN_PSLLDI,
22695 IX86_BUILTIN_PSLLQI,
22696 IX86_BUILTIN_PSRAWI,
22697 IX86_BUILTIN_PSRADI,
22698 IX86_BUILTIN_PSRLWI,
22699 IX86_BUILTIN_PSRLDI,
22700 IX86_BUILTIN_PSRLQI,
22702 IX86_BUILTIN_PUNPCKHBW,
22703 IX86_BUILTIN_PUNPCKHWD,
22704 IX86_BUILTIN_PUNPCKHDQ,
22705 IX86_BUILTIN_PUNPCKLBW,
22706 IX86_BUILTIN_PUNPCKLWD,
22707 IX86_BUILTIN_PUNPCKLDQ,
22709 IX86_BUILTIN_SHUFPS,
22711 IX86_BUILTIN_RCPPS,
22712 IX86_BUILTIN_RCPSS,
22713 IX86_BUILTIN_RSQRTPS,
22714 IX86_BUILTIN_RSQRTPS_NR,
22715 IX86_BUILTIN_RSQRTSS,
22716 IX86_BUILTIN_RSQRTF,
22717 IX86_BUILTIN_SQRTPS,
22718 IX86_BUILTIN_SQRTPS_NR,
22719 IX86_BUILTIN_SQRTSS,
22721 IX86_BUILTIN_UNPCKHPS,
22722 IX86_BUILTIN_UNPCKLPS,
22724 IX86_BUILTIN_ANDPS,
22725 IX86_BUILTIN_ANDNPS,
22727 IX86_BUILTIN_XORPS,
22730 IX86_BUILTIN_LDMXCSR,
22731 IX86_BUILTIN_STMXCSR,
22732 IX86_BUILTIN_SFENCE,
22734 /* 3DNow! Original */
22735 IX86_BUILTIN_FEMMS,
22736 IX86_BUILTIN_PAVGUSB,
22737 IX86_BUILTIN_PF2ID,
22738 IX86_BUILTIN_PFACC,
22739 IX86_BUILTIN_PFADD,
22740 IX86_BUILTIN_PFCMPEQ,
22741 IX86_BUILTIN_PFCMPGE,
22742 IX86_BUILTIN_PFCMPGT,
22743 IX86_BUILTIN_PFMAX,
22744 IX86_BUILTIN_PFMIN,
22745 IX86_BUILTIN_PFMUL,
22746 IX86_BUILTIN_PFRCP,
22747 IX86_BUILTIN_PFRCPIT1,
22748 IX86_BUILTIN_PFRCPIT2,
22749 IX86_BUILTIN_PFRSQIT1,
22750 IX86_BUILTIN_PFRSQRT,
22751 IX86_BUILTIN_PFSUB,
22752 IX86_BUILTIN_PFSUBR,
22753 IX86_BUILTIN_PI2FD,
22754 IX86_BUILTIN_PMULHRW,
22756 /* 3DNow! Athlon Extensions */
22757 IX86_BUILTIN_PF2IW,
22758 IX86_BUILTIN_PFNACC,
22759 IX86_BUILTIN_PFPNACC,
22760 IX86_BUILTIN_PI2FW,
22761 IX86_BUILTIN_PSWAPDSI,
22762 IX86_BUILTIN_PSWAPDSF,
22765 IX86_BUILTIN_ADDPD,
22766 IX86_BUILTIN_ADDSD,
22767 IX86_BUILTIN_DIVPD,
22768 IX86_BUILTIN_DIVSD,
22769 IX86_BUILTIN_MULPD,
22770 IX86_BUILTIN_MULSD,
22771 IX86_BUILTIN_SUBPD,
22772 IX86_BUILTIN_SUBSD,
22774 IX86_BUILTIN_CMPEQPD,
22775 IX86_BUILTIN_CMPLTPD,
22776 IX86_BUILTIN_CMPLEPD,
22777 IX86_BUILTIN_CMPGTPD,
22778 IX86_BUILTIN_CMPGEPD,
22779 IX86_BUILTIN_CMPNEQPD,
22780 IX86_BUILTIN_CMPNLTPD,
22781 IX86_BUILTIN_CMPNLEPD,
22782 IX86_BUILTIN_CMPNGTPD,
22783 IX86_BUILTIN_CMPNGEPD,
22784 IX86_BUILTIN_CMPORDPD,
22785 IX86_BUILTIN_CMPUNORDPD,
22786 IX86_BUILTIN_CMPEQSD,
22787 IX86_BUILTIN_CMPLTSD,
22788 IX86_BUILTIN_CMPLESD,
22789 IX86_BUILTIN_CMPNEQSD,
22790 IX86_BUILTIN_CMPNLTSD,
22791 IX86_BUILTIN_CMPNLESD,
22792 IX86_BUILTIN_CMPORDSD,
22793 IX86_BUILTIN_CMPUNORDSD,
22795 IX86_BUILTIN_COMIEQSD,
22796 IX86_BUILTIN_COMILTSD,
22797 IX86_BUILTIN_COMILESD,
22798 IX86_BUILTIN_COMIGTSD,
22799 IX86_BUILTIN_COMIGESD,
22800 IX86_BUILTIN_COMINEQSD,
22801 IX86_BUILTIN_UCOMIEQSD,
22802 IX86_BUILTIN_UCOMILTSD,
22803 IX86_BUILTIN_UCOMILESD,
22804 IX86_BUILTIN_UCOMIGTSD,
22805 IX86_BUILTIN_UCOMIGESD,
22806 IX86_BUILTIN_UCOMINEQSD,
22808 IX86_BUILTIN_MAXPD,
22809 IX86_BUILTIN_MAXSD,
22810 IX86_BUILTIN_MINPD,
22811 IX86_BUILTIN_MINSD,
22813 IX86_BUILTIN_ANDPD,
22814 IX86_BUILTIN_ANDNPD,
22816 IX86_BUILTIN_XORPD,
22818 IX86_BUILTIN_SQRTPD,
22819 IX86_BUILTIN_SQRTSD,
22821 IX86_BUILTIN_UNPCKHPD,
22822 IX86_BUILTIN_UNPCKLPD,
22824 IX86_BUILTIN_SHUFPD,
22826 IX86_BUILTIN_LOADUPD,
22827 IX86_BUILTIN_STOREUPD,
22828 IX86_BUILTIN_MOVSD,
22830 IX86_BUILTIN_LOADHPD,
22831 IX86_BUILTIN_LOADLPD,
22833 IX86_BUILTIN_CVTDQ2PD,
22834 IX86_BUILTIN_CVTDQ2PS,
22836 IX86_BUILTIN_CVTPD2DQ,
22837 IX86_BUILTIN_CVTPD2PI,
22838 IX86_BUILTIN_CVTPD2PS,
22839 IX86_BUILTIN_CVTTPD2DQ,
22840 IX86_BUILTIN_CVTTPD2PI,
22842 IX86_BUILTIN_CVTPI2PD,
22843 IX86_BUILTIN_CVTSI2SD,
22844 IX86_BUILTIN_CVTSI642SD,
22846 IX86_BUILTIN_CVTSD2SI,
22847 IX86_BUILTIN_CVTSD2SI64,
22848 IX86_BUILTIN_CVTSD2SS,
22849 IX86_BUILTIN_CVTSS2SD,
22850 IX86_BUILTIN_CVTTSD2SI,
22851 IX86_BUILTIN_CVTTSD2SI64,
22853 IX86_BUILTIN_CVTPS2DQ,
22854 IX86_BUILTIN_CVTPS2PD,
22855 IX86_BUILTIN_CVTTPS2DQ,
22857 IX86_BUILTIN_MOVNTI,
22858 IX86_BUILTIN_MOVNTPD,
22859 IX86_BUILTIN_MOVNTDQ,
22861 IX86_BUILTIN_MOVQ128,
22864 IX86_BUILTIN_MASKMOVDQU,
22865 IX86_BUILTIN_MOVMSKPD,
22866 IX86_BUILTIN_PMOVMSKB128,
22868 IX86_BUILTIN_PACKSSWB128,
22869 IX86_BUILTIN_PACKSSDW128,
22870 IX86_BUILTIN_PACKUSWB128,
22872 IX86_BUILTIN_PADDB128,
22873 IX86_BUILTIN_PADDW128,
22874 IX86_BUILTIN_PADDD128,
22875 IX86_BUILTIN_PADDQ128,
22876 IX86_BUILTIN_PADDSB128,
22877 IX86_BUILTIN_PADDSW128,
22878 IX86_BUILTIN_PADDUSB128,
22879 IX86_BUILTIN_PADDUSW128,
22880 IX86_BUILTIN_PSUBB128,
22881 IX86_BUILTIN_PSUBW128,
22882 IX86_BUILTIN_PSUBD128,
22883 IX86_BUILTIN_PSUBQ128,
22884 IX86_BUILTIN_PSUBSB128,
22885 IX86_BUILTIN_PSUBSW128,
22886 IX86_BUILTIN_PSUBUSB128,
22887 IX86_BUILTIN_PSUBUSW128,
22889 IX86_BUILTIN_PAND128,
22890 IX86_BUILTIN_PANDN128,
22891 IX86_BUILTIN_POR128,
22892 IX86_BUILTIN_PXOR128,
22894 IX86_BUILTIN_PAVGB128,
22895 IX86_BUILTIN_PAVGW128,
22897 IX86_BUILTIN_PCMPEQB128,
22898 IX86_BUILTIN_PCMPEQW128,
22899 IX86_BUILTIN_PCMPEQD128,
22900 IX86_BUILTIN_PCMPGTB128,
22901 IX86_BUILTIN_PCMPGTW128,
22902 IX86_BUILTIN_PCMPGTD128,
22904 IX86_BUILTIN_PMADDWD128,
22906 IX86_BUILTIN_PMAXSW128,
22907 IX86_BUILTIN_PMAXUB128,
22908 IX86_BUILTIN_PMINSW128,
22909 IX86_BUILTIN_PMINUB128,
22911 IX86_BUILTIN_PMULUDQ,
22912 IX86_BUILTIN_PMULUDQ128,
22913 IX86_BUILTIN_PMULHUW128,
22914 IX86_BUILTIN_PMULHW128,
22915 IX86_BUILTIN_PMULLW128,
22917 IX86_BUILTIN_PSADBW128,
22918 IX86_BUILTIN_PSHUFHW,
22919 IX86_BUILTIN_PSHUFLW,
22920 IX86_BUILTIN_PSHUFD,
22922 IX86_BUILTIN_PSLLDQI128,
22923 IX86_BUILTIN_PSLLWI128,
22924 IX86_BUILTIN_PSLLDI128,
22925 IX86_BUILTIN_PSLLQI128,
22926 IX86_BUILTIN_PSRAWI128,
22927 IX86_BUILTIN_PSRADI128,
22928 IX86_BUILTIN_PSRLDQI128,
22929 IX86_BUILTIN_PSRLWI128,
22930 IX86_BUILTIN_PSRLDI128,
22931 IX86_BUILTIN_PSRLQI128,
22933 IX86_BUILTIN_PSLLDQ128,
22934 IX86_BUILTIN_PSLLW128,
22935 IX86_BUILTIN_PSLLD128,
22936 IX86_BUILTIN_PSLLQ128,
22937 IX86_BUILTIN_PSRAW128,
22938 IX86_BUILTIN_PSRAD128,
22939 IX86_BUILTIN_PSRLW128,
22940 IX86_BUILTIN_PSRLD128,
22941 IX86_BUILTIN_PSRLQ128,
22943 IX86_BUILTIN_PUNPCKHBW128,
22944 IX86_BUILTIN_PUNPCKHWD128,
22945 IX86_BUILTIN_PUNPCKHDQ128,
22946 IX86_BUILTIN_PUNPCKHQDQ128,
22947 IX86_BUILTIN_PUNPCKLBW128,
22948 IX86_BUILTIN_PUNPCKLWD128,
22949 IX86_BUILTIN_PUNPCKLDQ128,
22950 IX86_BUILTIN_PUNPCKLQDQ128,
22952 IX86_BUILTIN_CLFLUSH,
22953 IX86_BUILTIN_MFENCE,
22954 IX86_BUILTIN_LFENCE,
22956 IX86_BUILTIN_BSRSI,
22957 IX86_BUILTIN_BSRDI,
22958 IX86_BUILTIN_RDPMC,
22959 IX86_BUILTIN_RDTSC,
22960 IX86_BUILTIN_RDTSCP,
22961 IX86_BUILTIN_ROLQI,
22962 IX86_BUILTIN_ROLHI,
22963 IX86_BUILTIN_RORQI,
22964 IX86_BUILTIN_RORHI,
22967 IX86_BUILTIN_ADDSUBPS,
22968 IX86_BUILTIN_HADDPS,
22969 IX86_BUILTIN_HSUBPS,
22970 IX86_BUILTIN_MOVSHDUP,
22971 IX86_BUILTIN_MOVSLDUP,
22972 IX86_BUILTIN_ADDSUBPD,
22973 IX86_BUILTIN_HADDPD,
22974 IX86_BUILTIN_HSUBPD,
22975 IX86_BUILTIN_LDDQU,
22977 IX86_BUILTIN_MONITOR,
22978 IX86_BUILTIN_MWAIT,
22981 IX86_BUILTIN_PHADDW,
22982 IX86_BUILTIN_PHADDD,
22983 IX86_BUILTIN_PHADDSW,
22984 IX86_BUILTIN_PHSUBW,
22985 IX86_BUILTIN_PHSUBD,
22986 IX86_BUILTIN_PHSUBSW,
22987 IX86_BUILTIN_PMADDUBSW,
22988 IX86_BUILTIN_PMULHRSW,
22989 IX86_BUILTIN_PSHUFB,
22990 IX86_BUILTIN_PSIGNB,
22991 IX86_BUILTIN_PSIGNW,
22992 IX86_BUILTIN_PSIGND,
22993 IX86_BUILTIN_PALIGNR,
22994 IX86_BUILTIN_PABSB,
22995 IX86_BUILTIN_PABSW,
22996 IX86_BUILTIN_PABSD,
22998 IX86_BUILTIN_PHADDW128,
22999 IX86_BUILTIN_PHADDD128,
23000 IX86_BUILTIN_PHADDSW128,
23001 IX86_BUILTIN_PHSUBW128,
23002 IX86_BUILTIN_PHSUBD128,
23003 IX86_BUILTIN_PHSUBSW128,
23004 IX86_BUILTIN_PMADDUBSW128,
23005 IX86_BUILTIN_PMULHRSW128,
23006 IX86_BUILTIN_PSHUFB128,
23007 IX86_BUILTIN_PSIGNB128,
23008 IX86_BUILTIN_PSIGNW128,
23009 IX86_BUILTIN_PSIGND128,
23010 IX86_BUILTIN_PALIGNR128,
23011 IX86_BUILTIN_PABSB128,
23012 IX86_BUILTIN_PABSW128,
23013 IX86_BUILTIN_PABSD128,
23015 /* AMDFAM10 - SSE4A New Instructions. */
23016 IX86_BUILTIN_MOVNTSD,
23017 IX86_BUILTIN_MOVNTSS,
23018 IX86_BUILTIN_EXTRQI,
23019 IX86_BUILTIN_EXTRQ,
23020 IX86_BUILTIN_INSERTQI,
23021 IX86_BUILTIN_INSERTQ,
23024 IX86_BUILTIN_BLENDPD,
23025 IX86_BUILTIN_BLENDPS,
23026 IX86_BUILTIN_BLENDVPD,
23027 IX86_BUILTIN_BLENDVPS,
23028 IX86_BUILTIN_PBLENDVB128,
23029 IX86_BUILTIN_PBLENDW128,
23034 IX86_BUILTIN_INSERTPS128,
23036 IX86_BUILTIN_MOVNTDQA,
23037 IX86_BUILTIN_MPSADBW128,
23038 IX86_BUILTIN_PACKUSDW128,
23039 IX86_BUILTIN_PCMPEQQ,
23040 IX86_BUILTIN_PHMINPOSUW128,
23042 IX86_BUILTIN_PMAXSB128,
23043 IX86_BUILTIN_PMAXSD128,
23044 IX86_BUILTIN_PMAXUD128,
23045 IX86_BUILTIN_PMAXUW128,
23047 IX86_BUILTIN_PMINSB128,
23048 IX86_BUILTIN_PMINSD128,
23049 IX86_BUILTIN_PMINUD128,
23050 IX86_BUILTIN_PMINUW128,
23052 IX86_BUILTIN_PMOVSXBW128,
23053 IX86_BUILTIN_PMOVSXBD128,
23054 IX86_BUILTIN_PMOVSXBQ128,
23055 IX86_BUILTIN_PMOVSXWD128,
23056 IX86_BUILTIN_PMOVSXWQ128,
23057 IX86_BUILTIN_PMOVSXDQ128,
23059 IX86_BUILTIN_PMOVZXBW128,
23060 IX86_BUILTIN_PMOVZXBD128,
23061 IX86_BUILTIN_PMOVZXBQ128,
23062 IX86_BUILTIN_PMOVZXWD128,
23063 IX86_BUILTIN_PMOVZXWQ128,
23064 IX86_BUILTIN_PMOVZXDQ128,
23066 IX86_BUILTIN_PMULDQ128,
23067 IX86_BUILTIN_PMULLD128,
23069 IX86_BUILTIN_ROUNDPD,
23070 IX86_BUILTIN_ROUNDPS,
23071 IX86_BUILTIN_ROUNDSD,
23072 IX86_BUILTIN_ROUNDSS,
23074 IX86_BUILTIN_PTESTZ,
23075 IX86_BUILTIN_PTESTC,
23076 IX86_BUILTIN_PTESTNZC,
23078 IX86_BUILTIN_VEC_INIT_V2SI,
23079 IX86_BUILTIN_VEC_INIT_V4HI,
23080 IX86_BUILTIN_VEC_INIT_V8QI,
23081 IX86_BUILTIN_VEC_EXT_V2DF,
23082 IX86_BUILTIN_VEC_EXT_V2DI,
23083 IX86_BUILTIN_VEC_EXT_V4SF,
23084 IX86_BUILTIN_VEC_EXT_V4SI,
23085 IX86_BUILTIN_VEC_EXT_V8HI,
23086 IX86_BUILTIN_VEC_EXT_V2SI,
23087 IX86_BUILTIN_VEC_EXT_V4HI,
23088 IX86_BUILTIN_VEC_EXT_V16QI,
23089 IX86_BUILTIN_VEC_SET_V2DI,
23090 IX86_BUILTIN_VEC_SET_V4SF,
23091 IX86_BUILTIN_VEC_SET_V4SI,
23092 IX86_BUILTIN_VEC_SET_V8HI,
23093 IX86_BUILTIN_VEC_SET_V4HI,
23094 IX86_BUILTIN_VEC_SET_V16QI,
23096 IX86_BUILTIN_VEC_PACK_SFIX,
23099 IX86_BUILTIN_CRC32QI,
23100 IX86_BUILTIN_CRC32HI,
23101 IX86_BUILTIN_CRC32SI,
23102 IX86_BUILTIN_CRC32DI,
23104 IX86_BUILTIN_PCMPESTRI128,
23105 IX86_BUILTIN_PCMPESTRM128,
23106 IX86_BUILTIN_PCMPESTRA128,
23107 IX86_BUILTIN_PCMPESTRC128,
23108 IX86_BUILTIN_PCMPESTRO128,
23109 IX86_BUILTIN_PCMPESTRS128,
23110 IX86_BUILTIN_PCMPESTRZ128,
23111 IX86_BUILTIN_PCMPISTRI128,
23112 IX86_BUILTIN_PCMPISTRM128,
23113 IX86_BUILTIN_PCMPISTRA128,
23114 IX86_BUILTIN_PCMPISTRC128,
23115 IX86_BUILTIN_PCMPISTRO128,
23116 IX86_BUILTIN_PCMPISTRS128,
23117 IX86_BUILTIN_PCMPISTRZ128,
23119 IX86_BUILTIN_PCMPGTQ,
23121 /* AES instructions */
23122 IX86_BUILTIN_AESENC128,
23123 IX86_BUILTIN_AESENCLAST128,
23124 IX86_BUILTIN_AESDEC128,
23125 IX86_BUILTIN_AESDECLAST128,
23126 IX86_BUILTIN_AESIMC128,
23127 IX86_BUILTIN_AESKEYGENASSIST128,
23129 /* PCLMUL instruction */
23130 IX86_BUILTIN_PCLMULQDQ128,
23133 IX86_BUILTIN_ADDPD256,
23134 IX86_BUILTIN_ADDPS256,
23135 IX86_BUILTIN_ADDSUBPD256,
23136 IX86_BUILTIN_ADDSUBPS256,
23137 IX86_BUILTIN_ANDPD256,
23138 IX86_BUILTIN_ANDPS256,
23139 IX86_BUILTIN_ANDNPD256,
23140 IX86_BUILTIN_ANDNPS256,
23141 IX86_BUILTIN_BLENDPD256,
23142 IX86_BUILTIN_BLENDPS256,
23143 IX86_BUILTIN_BLENDVPD256,
23144 IX86_BUILTIN_BLENDVPS256,
23145 IX86_BUILTIN_DIVPD256,
23146 IX86_BUILTIN_DIVPS256,
23147 IX86_BUILTIN_DPPS256,
23148 IX86_BUILTIN_HADDPD256,
23149 IX86_BUILTIN_HADDPS256,
23150 IX86_BUILTIN_HSUBPD256,
23151 IX86_BUILTIN_HSUBPS256,
23152 IX86_BUILTIN_MAXPD256,
23153 IX86_BUILTIN_MAXPS256,
23154 IX86_BUILTIN_MINPD256,
23155 IX86_BUILTIN_MINPS256,
23156 IX86_BUILTIN_MULPD256,
23157 IX86_BUILTIN_MULPS256,
23158 IX86_BUILTIN_ORPD256,
23159 IX86_BUILTIN_ORPS256,
23160 IX86_BUILTIN_SHUFPD256,
23161 IX86_BUILTIN_SHUFPS256,
23162 IX86_BUILTIN_SUBPD256,
23163 IX86_BUILTIN_SUBPS256,
23164 IX86_BUILTIN_XORPD256,
23165 IX86_BUILTIN_XORPS256,
23166 IX86_BUILTIN_CMPSD,
23167 IX86_BUILTIN_CMPSS,
23168 IX86_BUILTIN_CMPPD,
23169 IX86_BUILTIN_CMPPS,
23170 IX86_BUILTIN_CMPPD256,
23171 IX86_BUILTIN_CMPPS256,
23172 IX86_BUILTIN_CVTDQ2PD256,
23173 IX86_BUILTIN_CVTDQ2PS256,
23174 IX86_BUILTIN_CVTPD2PS256,
23175 IX86_BUILTIN_CVTPS2DQ256,
23176 IX86_BUILTIN_CVTPS2PD256,
23177 IX86_BUILTIN_CVTTPD2DQ256,
23178 IX86_BUILTIN_CVTPD2DQ256,
23179 IX86_BUILTIN_CVTTPS2DQ256,
23180 IX86_BUILTIN_EXTRACTF128PD256,
23181 IX86_BUILTIN_EXTRACTF128PS256,
23182 IX86_BUILTIN_EXTRACTF128SI256,
23183 IX86_BUILTIN_VZEROALL,
23184 IX86_BUILTIN_VZEROUPPER,
23185 IX86_BUILTIN_VPERMILVARPD,
23186 IX86_BUILTIN_VPERMILVARPS,
23187 IX86_BUILTIN_VPERMILVARPD256,
23188 IX86_BUILTIN_VPERMILVARPS256,
23189 IX86_BUILTIN_VPERMILPD,
23190 IX86_BUILTIN_VPERMILPS,
23191 IX86_BUILTIN_VPERMILPD256,
23192 IX86_BUILTIN_VPERMILPS256,
23193 IX86_BUILTIN_VPERMIL2PD,
23194 IX86_BUILTIN_VPERMIL2PS,
23195 IX86_BUILTIN_VPERMIL2PD256,
23196 IX86_BUILTIN_VPERMIL2PS256,
23197 IX86_BUILTIN_VPERM2F128PD256,
23198 IX86_BUILTIN_VPERM2F128PS256,
23199 IX86_BUILTIN_VPERM2F128SI256,
23200 IX86_BUILTIN_VBROADCASTSS,
23201 IX86_BUILTIN_VBROADCASTSD256,
23202 IX86_BUILTIN_VBROADCASTSS256,
23203 IX86_BUILTIN_VBROADCASTPD256,
23204 IX86_BUILTIN_VBROADCASTPS256,
23205 IX86_BUILTIN_VINSERTF128PD256,
23206 IX86_BUILTIN_VINSERTF128PS256,
23207 IX86_BUILTIN_VINSERTF128SI256,
23208 IX86_BUILTIN_LOADUPD256,
23209 IX86_BUILTIN_LOADUPS256,
23210 IX86_BUILTIN_STOREUPD256,
23211 IX86_BUILTIN_STOREUPS256,
23212 IX86_BUILTIN_LDDQU256,
23213 IX86_BUILTIN_MOVNTDQ256,
23214 IX86_BUILTIN_MOVNTPD256,
23215 IX86_BUILTIN_MOVNTPS256,
23216 IX86_BUILTIN_LOADDQU256,
23217 IX86_BUILTIN_STOREDQU256,
23218 IX86_BUILTIN_MASKLOADPD,
23219 IX86_BUILTIN_MASKLOADPS,
23220 IX86_BUILTIN_MASKSTOREPD,
23221 IX86_BUILTIN_MASKSTOREPS,
23222 IX86_BUILTIN_MASKLOADPD256,
23223 IX86_BUILTIN_MASKLOADPS256,
23224 IX86_BUILTIN_MASKSTOREPD256,
23225 IX86_BUILTIN_MASKSTOREPS256,
23226 IX86_BUILTIN_MOVSHDUP256,
23227 IX86_BUILTIN_MOVSLDUP256,
23228 IX86_BUILTIN_MOVDDUP256,
23230 IX86_BUILTIN_SQRTPD256,
23231 IX86_BUILTIN_SQRTPS256,
23232 IX86_BUILTIN_SQRTPS_NR256,
23233 IX86_BUILTIN_RSQRTPS256,
23234 IX86_BUILTIN_RSQRTPS_NR256,
23236 IX86_BUILTIN_RCPPS256,
23238 IX86_BUILTIN_ROUNDPD256,
23239 IX86_BUILTIN_ROUNDPS256,
23241 IX86_BUILTIN_UNPCKHPD256,
23242 IX86_BUILTIN_UNPCKLPD256,
23243 IX86_BUILTIN_UNPCKHPS256,
23244 IX86_BUILTIN_UNPCKLPS256,
23246 IX86_BUILTIN_SI256_SI,
23247 IX86_BUILTIN_PS256_PS,
23248 IX86_BUILTIN_PD256_PD,
23249 IX86_BUILTIN_SI_SI256,
23250 IX86_BUILTIN_PS_PS256,
23251 IX86_BUILTIN_PD_PD256,
23253 IX86_BUILTIN_VTESTZPD,
23254 IX86_BUILTIN_VTESTCPD,
23255 IX86_BUILTIN_VTESTNZCPD,
23256 IX86_BUILTIN_VTESTZPS,
23257 IX86_BUILTIN_VTESTCPS,
23258 IX86_BUILTIN_VTESTNZCPS,
23259 IX86_BUILTIN_VTESTZPD256,
23260 IX86_BUILTIN_VTESTCPD256,
23261 IX86_BUILTIN_VTESTNZCPD256,
23262 IX86_BUILTIN_VTESTZPS256,
23263 IX86_BUILTIN_VTESTCPS256,
23264 IX86_BUILTIN_VTESTNZCPS256,
23265 IX86_BUILTIN_PTESTZ256,
23266 IX86_BUILTIN_PTESTC256,
23267 IX86_BUILTIN_PTESTNZC256,
23269 IX86_BUILTIN_MOVMSKPD256,
23270 IX86_BUILTIN_MOVMSKPS256,
23272 /* TFmode support builtins. */
23274 IX86_BUILTIN_HUGE_VALQ,
23275 IX86_BUILTIN_FABSQ,
23276 IX86_BUILTIN_COPYSIGNQ,
23278 /* Vectorizer support builtins. */
23279 IX86_BUILTIN_CPYSGNPS,
23280 IX86_BUILTIN_CPYSGNPD,
23281 IX86_BUILTIN_CPYSGNPS256,
23282 IX86_BUILTIN_CPYSGNPD256,
23284 IX86_BUILTIN_CVTUDQ2PS,
23286 IX86_BUILTIN_VEC_PERM_V2DF,
23287 IX86_BUILTIN_VEC_PERM_V4SF,
23288 IX86_BUILTIN_VEC_PERM_V2DI,
23289 IX86_BUILTIN_VEC_PERM_V4SI,
23290 IX86_BUILTIN_VEC_PERM_V8HI,
23291 IX86_BUILTIN_VEC_PERM_V16QI,
23292 IX86_BUILTIN_VEC_PERM_V2DI_U,
23293 IX86_BUILTIN_VEC_PERM_V4SI_U,
23294 IX86_BUILTIN_VEC_PERM_V8HI_U,
23295 IX86_BUILTIN_VEC_PERM_V16QI_U,
23296 IX86_BUILTIN_VEC_PERM_V4DF,
23297 IX86_BUILTIN_VEC_PERM_V8SF,
23299 /* FMA4 and XOP instructions. */
23300 IX86_BUILTIN_VFMADDSS,
23301 IX86_BUILTIN_VFMADDSD,
23302 IX86_BUILTIN_VFMADDPS,
23303 IX86_BUILTIN_VFMADDPD,
23304 IX86_BUILTIN_VFMADDPS256,
23305 IX86_BUILTIN_VFMADDPD256,
23306 IX86_BUILTIN_VFMADDSUBPS,
23307 IX86_BUILTIN_VFMADDSUBPD,
23308 IX86_BUILTIN_VFMADDSUBPS256,
23309 IX86_BUILTIN_VFMADDSUBPD256,
23311 IX86_BUILTIN_VPCMOV,
23312 IX86_BUILTIN_VPCMOV_V2DI,
23313 IX86_BUILTIN_VPCMOV_V4SI,
23314 IX86_BUILTIN_VPCMOV_V8HI,
23315 IX86_BUILTIN_VPCMOV_V16QI,
23316 IX86_BUILTIN_VPCMOV_V4SF,
23317 IX86_BUILTIN_VPCMOV_V2DF,
23318 IX86_BUILTIN_VPCMOV256,
23319 IX86_BUILTIN_VPCMOV_V4DI256,
23320 IX86_BUILTIN_VPCMOV_V8SI256,
23321 IX86_BUILTIN_VPCMOV_V16HI256,
23322 IX86_BUILTIN_VPCMOV_V32QI256,
23323 IX86_BUILTIN_VPCMOV_V8SF256,
23324 IX86_BUILTIN_VPCMOV_V4DF256,
23326 IX86_BUILTIN_VPPERM,
23328 IX86_BUILTIN_VPMACSSWW,
23329 IX86_BUILTIN_VPMACSWW,
23330 IX86_BUILTIN_VPMACSSWD,
23331 IX86_BUILTIN_VPMACSWD,
23332 IX86_BUILTIN_VPMACSSDD,
23333 IX86_BUILTIN_VPMACSDD,
23334 IX86_BUILTIN_VPMACSSDQL,
23335 IX86_BUILTIN_VPMACSSDQH,
23336 IX86_BUILTIN_VPMACSDQL,
23337 IX86_BUILTIN_VPMACSDQH,
23338 IX86_BUILTIN_VPMADCSSWD,
23339 IX86_BUILTIN_VPMADCSWD,
23341 IX86_BUILTIN_VPHADDBW,
23342 IX86_BUILTIN_VPHADDBD,
23343 IX86_BUILTIN_VPHADDBQ,
23344 IX86_BUILTIN_VPHADDWD,
23345 IX86_BUILTIN_VPHADDWQ,
23346 IX86_BUILTIN_VPHADDDQ,
23347 IX86_BUILTIN_VPHADDUBW,
23348 IX86_BUILTIN_VPHADDUBD,
23349 IX86_BUILTIN_VPHADDUBQ,
23350 IX86_BUILTIN_VPHADDUWD,
23351 IX86_BUILTIN_VPHADDUWQ,
23352 IX86_BUILTIN_VPHADDUDQ,
23353 IX86_BUILTIN_VPHSUBBW,
23354 IX86_BUILTIN_VPHSUBWD,
23355 IX86_BUILTIN_VPHSUBDQ,
23357 IX86_BUILTIN_VPROTB,
23358 IX86_BUILTIN_VPROTW,
23359 IX86_BUILTIN_VPROTD,
23360 IX86_BUILTIN_VPROTQ,
23361 IX86_BUILTIN_VPROTB_IMM,
23362 IX86_BUILTIN_VPROTW_IMM,
23363 IX86_BUILTIN_VPROTD_IMM,
23364 IX86_BUILTIN_VPROTQ_IMM,
23366 IX86_BUILTIN_VPSHLB,
23367 IX86_BUILTIN_VPSHLW,
23368 IX86_BUILTIN_VPSHLD,
23369 IX86_BUILTIN_VPSHLQ,
23370 IX86_BUILTIN_VPSHAB,
23371 IX86_BUILTIN_VPSHAW,
23372 IX86_BUILTIN_VPSHAD,
23373 IX86_BUILTIN_VPSHAQ,
23375 IX86_BUILTIN_VFRCZSS,
23376 IX86_BUILTIN_VFRCZSD,
23377 IX86_BUILTIN_VFRCZPS,
23378 IX86_BUILTIN_VFRCZPD,
23379 IX86_BUILTIN_VFRCZPS256,
23380 IX86_BUILTIN_VFRCZPD256,
23382 IX86_BUILTIN_VPCOMEQUB,
23383 IX86_BUILTIN_VPCOMNEUB,
23384 IX86_BUILTIN_VPCOMLTUB,
23385 IX86_BUILTIN_VPCOMLEUB,
23386 IX86_BUILTIN_VPCOMGTUB,
23387 IX86_BUILTIN_VPCOMGEUB,
23388 IX86_BUILTIN_VPCOMFALSEUB,
23389 IX86_BUILTIN_VPCOMTRUEUB,
23391 IX86_BUILTIN_VPCOMEQUW,
23392 IX86_BUILTIN_VPCOMNEUW,
23393 IX86_BUILTIN_VPCOMLTUW,
23394 IX86_BUILTIN_VPCOMLEUW,
23395 IX86_BUILTIN_VPCOMGTUW,
23396 IX86_BUILTIN_VPCOMGEUW,
23397 IX86_BUILTIN_VPCOMFALSEUW,
23398 IX86_BUILTIN_VPCOMTRUEUW,
23400 IX86_BUILTIN_VPCOMEQUD,
23401 IX86_BUILTIN_VPCOMNEUD,
23402 IX86_BUILTIN_VPCOMLTUD,
23403 IX86_BUILTIN_VPCOMLEUD,
23404 IX86_BUILTIN_VPCOMGTUD,
23405 IX86_BUILTIN_VPCOMGEUD,
23406 IX86_BUILTIN_VPCOMFALSEUD,
23407 IX86_BUILTIN_VPCOMTRUEUD,
23409 IX86_BUILTIN_VPCOMEQUQ,
23410 IX86_BUILTIN_VPCOMNEUQ,
23411 IX86_BUILTIN_VPCOMLTUQ,
23412 IX86_BUILTIN_VPCOMLEUQ,
23413 IX86_BUILTIN_VPCOMGTUQ,
23414 IX86_BUILTIN_VPCOMGEUQ,
23415 IX86_BUILTIN_VPCOMFALSEUQ,
23416 IX86_BUILTIN_VPCOMTRUEUQ,
23418 IX86_BUILTIN_VPCOMEQB,
23419 IX86_BUILTIN_VPCOMNEB,
23420 IX86_BUILTIN_VPCOMLTB,
23421 IX86_BUILTIN_VPCOMLEB,
23422 IX86_BUILTIN_VPCOMGTB,
23423 IX86_BUILTIN_VPCOMGEB,
23424 IX86_BUILTIN_VPCOMFALSEB,
23425 IX86_BUILTIN_VPCOMTRUEB,
23427 IX86_BUILTIN_VPCOMEQW,
23428 IX86_BUILTIN_VPCOMNEW,
23429 IX86_BUILTIN_VPCOMLTW,
23430 IX86_BUILTIN_VPCOMLEW,
23431 IX86_BUILTIN_VPCOMGTW,
23432 IX86_BUILTIN_VPCOMGEW,
23433 IX86_BUILTIN_VPCOMFALSEW,
23434 IX86_BUILTIN_VPCOMTRUEW,
23436 IX86_BUILTIN_VPCOMEQD,
23437 IX86_BUILTIN_VPCOMNED,
23438 IX86_BUILTIN_VPCOMLTD,
23439 IX86_BUILTIN_VPCOMLED,
23440 IX86_BUILTIN_VPCOMGTD,
23441 IX86_BUILTIN_VPCOMGED,
23442 IX86_BUILTIN_VPCOMFALSED,
23443 IX86_BUILTIN_VPCOMTRUED,
23445 IX86_BUILTIN_VPCOMEQQ,
23446 IX86_BUILTIN_VPCOMNEQ,
23447 IX86_BUILTIN_VPCOMLTQ,
23448 IX86_BUILTIN_VPCOMLEQ,
23449 IX86_BUILTIN_VPCOMGTQ,
23450 IX86_BUILTIN_VPCOMGEQ,
23451 IX86_BUILTIN_VPCOMFALSEQ,
23452 IX86_BUILTIN_VPCOMTRUEQ,
23454 /* LWP instructions. */
23455 IX86_BUILTIN_LLWPCB,
23456 IX86_BUILTIN_SLWPCB,
23457 IX86_BUILTIN_LWPVAL32,
23458 IX86_BUILTIN_LWPVAL64,
23459 IX86_BUILTIN_LWPINS32,
23460 IX86_BUILTIN_LWPINS64,
23464 /* FSGSBASE instructions. */
23465 IX86_BUILTIN_RDFSBASE32,
23466 IX86_BUILTIN_RDFSBASE64,
23467 IX86_BUILTIN_RDGSBASE32,
23468 IX86_BUILTIN_RDGSBASE64,
23469 IX86_BUILTIN_WRFSBASE32,
23470 IX86_BUILTIN_WRFSBASE64,
23471 IX86_BUILTIN_WRGSBASE32,
23472 IX86_BUILTIN_WRGSBASE64,
23474 /* RDRND instructions. */
23475 IX86_BUILTIN_RDRAND16,
23476 IX86_BUILTIN_RDRAND32,
23477 IX86_BUILTIN_RDRAND64,
23479 /* F16C instructions. */
23480 IX86_BUILTIN_CVTPH2PS,
23481 IX86_BUILTIN_CVTPH2PS256,
23482 IX86_BUILTIN_CVTPS2PH,
23483 IX86_BUILTIN_CVTPS2PH256,
23488 /* Table for the ix86 builtin decls. */
23489 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
23491 /* Table of all of the builtin functions that are possible with different ISA's
23492 but are waiting to be built until a function is declared to use that
23494 struct builtin_isa {
23495 const char *name; /* function name */
23496 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
23497 int isa; /* isa_flags this builtin is defined for */
23498 bool const_p; /* true if the declaration is constant */
23499 bool set_and_not_built_p;
23502 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
23505 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
23506 of which isa_flags to use in the ix86_builtins_isa array. Stores the
23507 function decl in the ix86_builtins array. Returns the function decl or
23508 NULL_TREE, if the builtin was not added.
23510 If the front end has a special hook for builtin functions, delay adding
23511 builtin functions that aren't in the current ISA until the ISA is changed
23512 with function specific optimization. Doing so, can save about 300K for the
23513 default compiler. When the builtin is expanded, check at that time whether
23516 If the front end doesn't have a special hook, record all builtins, even if
23517 it isn't an instruction set in the current ISA in case the user uses
23518 function specific options for a different ISA, so that we don't get scope
23519 errors if a builtin is added in the middle of a function scope. */
23522 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
23523 enum ix86_builtins code)
23525 tree decl = NULL_TREE;
23527 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
23529 ix86_builtins_isa[(int) code].isa = mask;
23531 mask &= ~OPTION_MASK_ISA_64BIT;
23533 || (mask & ix86_isa_flags) != 0
23534 || (lang_hooks.builtin_function
23535 == lang_hooks.builtin_function_ext_scope))
23538 tree type = ix86_get_builtin_func_type (tcode);
23539 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
23541 ix86_builtins[(int) code] = decl;
23542 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23546 ix86_builtins[(int) code] = NULL_TREE;
23547 ix86_builtins_isa[(int) code].tcode = tcode;
23548 ix86_builtins_isa[(int) code].name = name;
23549 ix86_builtins_isa[(int) code].const_p = false;
23550 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23557 /* Like def_builtin, but also marks the function decl "const". */
23560 def_builtin_const (int mask, const char *name,
23561 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23563 tree decl = def_builtin (mask, name, tcode, code);
23565 TREE_READONLY (decl) = 1;
23567 ix86_builtins_isa[(int) code].const_p = true;
23572 /* Add any new builtin functions for a given ISA that may not have been
23573 declared. This saves a bit of space compared to adding all of the
23574 declarations to the tree, even if we didn't use them. */
23577 ix86_add_new_builtins (int isa)
23581 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23583 if ((ix86_builtins_isa[i].isa & isa) != 0
23584 && ix86_builtins_isa[i].set_and_not_built_p)
23588 /* Don't define the builtin again. */
23589 ix86_builtins_isa[i].set_and_not_built_p = false;
23591 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23592 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23593 type, i, BUILT_IN_MD, NULL,
23596 ix86_builtins[i] = decl;
23597 if (ix86_builtins_isa[i].const_p)
23598 TREE_READONLY (decl) = 1;
23603 /* Bits for builtin_description.flag. */
23605 /* Set when we don't support the comparison natively, and should
23606 swap_comparison in order to support it. */
23607 #define BUILTIN_DESC_SWAP_OPERANDS 1
23609 struct builtin_description
23611 const unsigned int mask;
23612 const enum insn_code icode;
23613 const char *const name;
23614 const enum ix86_builtins code;
23615 const enum rtx_code comparison;
23619 static const struct builtin_description bdesc_comi[] =
23621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23647 static const struct builtin_description bdesc_pcmpestr[] =
23650 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23651 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23652 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23653 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23654 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23655 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23656 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23659 static const struct builtin_description bdesc_pcmpistr[] =
23662 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23663 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23664 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23665 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23666 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23667 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23668 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23671 /* Special builtins with variable number of arguments. */
23672 static const struct builtin_description bdesc_special_args[] =
23674 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23675 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23681 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23689 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23690 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23691 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23693 /* SSE or 3DNow!A */
23694 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23695 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23712 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23718 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23719 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23725 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23726 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23727 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23752 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23753 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23754 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23755 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23756 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23757 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23760 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23761 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23762 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23763 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23764 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23765 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23766 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23767 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23770 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23771 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23772 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23775 /* Builtins with variable number of arguments. */
23776 static const struct builtin_description bdesc_args[] =
23778 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23779 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23780 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23781 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23782 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23783 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23784 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23791 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23809 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23851 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23852 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23853 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23854 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23856 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23857 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23858 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23859 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23860 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23861 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23862 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23863 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23864 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23865 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23866 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23867 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23868 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23869 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23870 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23873 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23874 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23875 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23876 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23877 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23878 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23881 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23883 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23885 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23889 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23892 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23896 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23897 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23898 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23928 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23929 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23933 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23935 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23936 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23938 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23943 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23944 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23948 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23950 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23956 /* SSE MMX or 3Dnow!A */
23957 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23958 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23959 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23961 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23962 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23963 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23964 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23966 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23967 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23969 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23974 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23975 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23976 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23977 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23978 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23979 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23980 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23981 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23982 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23983 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23984 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23985 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24004 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24005 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24011 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24012 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24013 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24014 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24042 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24046 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24048 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24049 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24051 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24054 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24055 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24057 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24059 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24060 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24061 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24062 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24063 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24064 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24065 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24066 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24077 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24078 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24080 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24082 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24083 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24095 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24096 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24097 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24100 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24101 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24102 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24103 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24104 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24105 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24106 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24107 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24113 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24122 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24127 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24128 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24129 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24130 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24131 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24132 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24135 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24136 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24137 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24138 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24139 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24140 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24142 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24143 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24144 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24145 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24153 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24154 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24159 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24160 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24163 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24164 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24166 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24167 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24168 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24169 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24170 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24171 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24186 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24187 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24188 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24189 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24190 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24191 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24192 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24193 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24195 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24196 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24197 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24198 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24199 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24200 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24201 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24202 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24203 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24204 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24207 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24208 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24212 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24213 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24239 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24240 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24241 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24242 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24243 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24250 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24251 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24252 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24253 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24255 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24256 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24257 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24260 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24261 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24262 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24263 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24264 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24267 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24268 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24269 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24270 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24273 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24274 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24276 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24277 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24278 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24279 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24282 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24285 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24286 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24289 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24290 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24293 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24299 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24300 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24301 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24302 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24303 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24304 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24305 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24306 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24307 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24308 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24309 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24310 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24356 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24358 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24360 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24375 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24376 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24377 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24398 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24399 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24401 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24404 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24405 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24406 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24407 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24410 /* FMA4 and XOP. */
24411 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24412 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24413 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24414 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24415 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24416 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24417 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24418 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24419 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24420 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24421 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24422 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24423 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24424 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24425 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24426 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24427 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24428 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24429 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24430 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24431 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24432 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24433 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24434 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24435 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24436 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24437 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24438 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24439 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24440 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24441 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24442 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24443 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24444 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24445 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24446 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24447 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24448 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24449 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
24450 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
24451 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
24452 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
24453 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
24454 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
24455 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
24456 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
24457 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
24458 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
24459 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
24460 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
24461 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
24462 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
24464 static const struct builtin_description bdesc_multi_arg[] =
24466 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
24467 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
24468 UNKNOWN, (int)MULTI_ARG_3_SF },
24469 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
24470 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
24471 UNKNOWN, (int)MULTI_ARG_3_DF },
24473 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
24474 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
24475 UNKNOWN, (int)MULTI_ARG_3_SF },
24476 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
24477 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
24478 UNKNOWN, (int)MULTI_ARG_3_DF },
24479 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
24480 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
24481 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24482 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
24483 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
24484 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24486 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
24487 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
24488 UNKNOWN, (int)MULTI_ARG_3_SF },
24489 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
24490 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
24491 UNKNOWN, (int)MULTI_ARG_3_DF },
24492 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
24493 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
24494 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24495 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
24496 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
24497 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
24500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
24501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
24507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
24510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
24511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
24512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
24517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
24536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
24537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
24538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
24590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24659 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24660 in the current target ISA to allow the user to compile particular modules
24661 with different target specific options that differ from the command line
24664 ix86_init_mmx_sse_builtins (void)
24666 const struct builtin_description * d;
24667 enum ix86_builtin_func_type ftype;
24670 /* Add all special builtins with variable number of operands. */
24671 for (i = 0, d = bdesc_special_args;
24672 i < ARRAY_SIZE (bdesc_special_args);
24678 ftype = (enum ix86_builtin_func_type) d->flag;
24679 def_builtin (d->mask, d->name, ftype, d->code);
24682 /* Add all builtins with variable number of operands. */
24683 for (i = 0, d = bdesc_args;
24684 i < ARRAY_SIZE (bdesc_args);
24690 ftype = (enum ix86_builtin_func_type) d->flag;
24691 def_builtin_const (d->mask, d->name, ftype, d->code);
24694 /* pcmpestr[im] insns. */
24695 for (i = 0, d = bdesc_pcmpestr;
24696 i < ARRAY_SIZE (bdesc_pcmpestr);
24699 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24700 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24702 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24703 def_builtin_const (d->mask, d->name, ftype, d->code);
24706 /* pcmpistr[im] insns. */
24707 for (i = 0, d = bdesc_pcmpistr;
24708 i < ARRAY_SIZE (bdesc_pcmpistr);
24711 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24712 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24714 ftype = INT_FTYPE_V16QI_V16QI_INT;
24715 def_builtin_const (d->mask, d->name, ftype, d->code);
24718 /* comi/ucomi insns. */
24719 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24721 if (d->mask == OPTION_MASK_ISA_SSE2)
24722 ftype = INT_FTYPE_V2DF_V2DF;
24724 ftype = INT_FTYPE_V4SF_V4SF;
24725 def_builtin_const (d->mask, d->name, ftype, d->code);
24729 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24730 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24731 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24732 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24734 /* SSE or 3DNow!A */
24735 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24736 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24737 IX86_BUILTIN_MASKMOVQ);
24740 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24741 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24743 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24744 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24745 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24746 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24749 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24750 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24751 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24752 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24755 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24756 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24757 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24758 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24759 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24760 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24761 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24762 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24763 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24764 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24765 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24766 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24769 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24770 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24772 /* MMX access to the vec_init patterns. */
24773 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24774 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24776 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24777 V4HI_FTYPE_HI_HI_HI_HI,
24778 IX86_BUILTIN_VEC_INIT_V4HI);
24780 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24781 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24782 IX86_BUILTIN_VEC_INIT_V8QI);
24784 /* Access to the vec_extract patterns. */
24785 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24786 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24787 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24788 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24789 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24790 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24791 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24792 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24793 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24794 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24796 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24797 "__builtin_ia32_vec_ext_v4hi",
24798 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24800 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24801 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24803 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24804 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24806 /* Access to the vec_set patterns. */
24807 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24808 "__builtin_ia32_vec_set_v2di",
24809 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24811 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24812 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24814 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24815 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24817 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24818 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24820 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24821 "__builtin_ia32_vec_set_v4hi",
24822 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24824 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24825 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24827 /* Add FMA4 multi-arg argument instructions */
24828 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24833 ftype = (enum ix86_builtin_func_type) d->flag;
24834 def_builtin_const (d->mask, d->name, ftype, d->code);
24838 /* Internal method for ix86_init_builtins. */
24841 ix86_init_builtins_va_builtins_abi (void)
24843 tree ms_va_ref, sysv_va_ref;
24844 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24845 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24846 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24847 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24851 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24852 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24853 ms_va_ref = build_reference_type (ms_va_list_type_node);
24855 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24858 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24859 fnvoid_va_start_ms =
24860 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24861 fnvoid_va_end_sysv =
24862 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24863 fnvoid_va_start_sysv =
24864 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24866 fnvoid_va_copy_ms =
24867 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24869 fnvoid_va_copy_sysv =
24870 build_function_type_list (void_type_node, sysv_va_ref,
24871 sysv_va_ref, NULL_TREE);
24873 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24874 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24875 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24876 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24877 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24878 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24879 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24880 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24881 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24882 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24883 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24884 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24888 ix86_init_builtin_types (void)
24890 tree float128_type_node, float80_type_node;
24892 /* The __float80 type. */
24893 float80_type_node = long_double_type_node;
24894 if (TYPE_MODE (float80_type_node) != XFmode)
24896 /* The __float80 type. */
24897 float80_type_node = make_node (REAL_TYPE);
24899 TYPE_PRECISION (float80_type_node) = 80;
24900 layout_type (float80_type_node);
24902 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24904 /* The __float128 type. */
24905 float128_type_node = make_node (REAL_TYPE);
24906 TYPE_PRECISION (float128_type_node) = 128;
24907 layout_type (float128_type_node);
24908 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24910 /* This macro is built by i386-builtin-types.awk. */
24911 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24915 ix86_init_builtins (void)
24919 ix86_init_builtin_types ();
24921 /* TFmode support builtins. */
24922 def_builtin_const (0, "__builtin_infq",
24923 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24924 def_builtin_const (0, "__builtin_huge_valq",
24925 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24927 /* We will expand them to normal call if SSE2 isn't available since
24928 they are used by libgcc. */
24929 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24930 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24931 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24932 TREE_READONLY (t) = 1;
24933 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24935 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24936 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24937 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24938 TREE_READONLY (t) = 1;
24939 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24941 ix86_init_mmx_sse_builtins ();
24944 ix86_init_builtins_va_builtins_abi ();
24946 #ifdef SUBTARGET_INIT_BUILTINS
24947 SUBTARGET_INIT_BUILTINS;
24951 /* Return the ix86 builtin for CODE. */
24954 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24956 if (code >= IX86_BUILTIN_MAX)
24957 return error_mark_node;
24959 return ix86_builtins[code];
24962 /* Errors in the source file can cause expand_expr to return const0_rtx
24963 where we expect a vector. To avoid crashing, use one of the vector
24964 clear instructions. */
24966 safe_vector_operand (rtx x, enum machine_mode mode)
24968 if (x == const0_rtx)
24969 x = CONST0_RTX (mode);
24973 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24976 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24979 tree arg0 = CALL_EXPR_ARG (exp, 0);
24980 tree arg1 = CALL_EXPR_ARG (exp, 1);
24981 rtx op0 = expand_normal (arg0);
24982 rtx op1 = expand_normal (arg1);
24983 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24984 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24985 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24987 if (VECTOR_MODE_P (mode0))
24988 op0 = safe_vector_operand (op0, mode0);
24989 if (VECTOR_MODE_P (mode1))
24990 op1 = safe_vector_operand (op1, mode1);
24992 if (optimize || !target
24993 || GET_MODE (target) != tmode
24994 || !insn_data[icode].operand[0].predicate (target, tmode))
24995 target = gen_reg_rtx (tmode);
24997 if (GET_MODE (op1) == SImode && mode1 == TImode)
24999 rtx x = gen_reg_rtx (V4SImode);
25000 emit_insn (gen_sse2_loadd (x, op1));
25001 op1 = gen_lowpart (TImode, x);
25004 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25005 op0 = copy_to_mode_reg (mode0, op0);
25006 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25007 op1 = copy_to_mode_reg (mode1, op1);
25009 pat = GEN_FCN (icode) (target, op0, op1);
25018 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25021 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25022 enum ix86_builtin_func_type m_type,
25023 enum rtx_code sub_code)
25028 bool comparison_p = false;
25030 bool last_arg_constant = false;
25031 int num_memory = 0;
25034 enum machine_mode mode;
25037 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25041 case MULTI_ARG_4_DF2_DI_I:
25042 case MULTI_ARG_4_DF2_DI_I1:
25043 case MULTI_ARG_4_SF2_SI_I:
25044 case MULTI_ARG_4_SF2_SI_I1:
25046 last_arg_constant = true;
25049 case MULTI_ARG_3_SF:
25050 case MULTI_ARG_3_DF:
25051 case MULTI_ARG_3_SF2:
25052 case MULTI_ARG_3_DF2:
25053 case MULTI_ARG_3_DI:
25054 case MULTI_ARG_3_SI:
25055 case MULTI_ARG_3_SI_DI:
25056 case MULTI_ARG_3_HI:
25057 case MULTI_ARG_3_HI_SI:
25058 case MULTI_ARG_3_QI:
25059 case MULTI_ARG_3_DI2:
25060 case MULTI_ARG_3_SI2:
25061 case MULTI_ARG_3_HI2:
25062 case MULTI_ARG_3_QI2:
25066 case MULTI_ARG_2_SF:
25067 case MULTI_ARG_2_DF:
25068 case MULTI_ARG_2_DI:
25069 case MULTI_ARG_2_SI:
25070 case MULTI_ARG_2_HI:
25071 case MULTI_ARG_2_QI:
25075 case MULTI_ARG_2_DI_IMM:
25076 case MULTI_ARG_2_SI_IMM:
25077 case MULTI_ARG_2_HI_IMM:
25078 case MULTI_ARG_2_QI_IMM:
25080 last_arg_constant = true;
25083 case MULTI_ARG_1_SF:
25084 case MULTI_ARG_1_DF:
25085 case MULTI_ARG_1_SF2:
25086 case MULTI_ARG_1_DF2:
25087 case MULTI_ARG_1_DI:
25088 case MULTI_ARG_1_SI:
25089 case MULTI_ARG_1_HI:
25090 case MULTI_ARG_1_QI:
25091 case MULTI_ARG_1_SI_DI:
25092 case MULTI_ARG_1_HI_DI:
25093 case MULTI_ARG_1_HI_SI:
25094 case MULTI_ARG_1_QI_DI:
25095 case MULTI_ARG_1_QI_SI:
25096 case MULTI_ARG_1_QI_HI:
25100 case MULTI_ARG_2_DI_CMP:
25101 case MULTI_ARG_2_SI_CMP:
25102 case MULTI_ARG_2_HI_CMP:
25103 case MULTI_ARG_2_QI_CMP:
25105 comparison_p = true;
25108 case MULTI_ARG_2_SF_TF:
25109 case MULTI_ARG_2_DF_TF:
25110 case MULTI_ARG_2_DI_TF:
25111 case MULTI_ARG_2_SI_TF:
25112 case MULTI_ARG_2_HI_TF:
25113 case MULTI_ARG_2_QI_TF:
25119 gcc_unreachable ();
25122 if (optimize || !target
25123 || GET_MODE (target) != tmode
25124 || !insn_data[icode].operand[0].predicate (target, tmode))
25125 target = gen_reg_rtx (tmode);
25127 gcc_assert (nargs <= 4);
25129 for (i = 0; i < nargs; i++)
25131 tree arg = CALL_EXPR_ARG (exp, i);
25132 rtx op = expand_normal (arg);
25133 int adjust = (comparison_p) ? 1 : 0;
25134 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25136 if (last_arg_constant && i == nargs-1)
25138 if (!CONST_INT_P (op))
25140 error ("last argument must be an immediate");
25141 return gen_reg_rtx (tmode);
25146 if (VECTOR_MODE_P (mode))
25147 op = safe_vector_operand (op, mode);
25149 /* If we aren't optimizing, only allow one memory operand to be
25151 if (memory_operand (op, mode))
25154 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25157 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25159 op = force_reg (mode, op);
25163 args[i].mode = mode;
25169 pat = GEN_FCN (icode) (target, args[0].op);
25174 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25175 GEN_INT ((int)sub_code));
25176 else if (! comparison_p)
25177 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25180 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25184 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25189 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25193 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25197 gcc_unreachable ();
25207 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25208 insns with vec_merge. */
25211 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25215 tree arg0 = CALL_EXPR_ARG (exp, 0);
25216 rtx op1, op0 = expand_normal (arg0);
25217 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25218 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25220 if (optimize || !target
25221 || GET_MODE (target) != tmode
25222 || !insn_data[icode].operand[0].predicate (target, tmode))
25223 target = gen_reg_rtx (tmode);
25225 if (VECTOR_MODE_P (mode0))
25226 op0 = safe_vector_operand (op0, mode0);
25228 if ((optimize && !register_operand (op0, mode0))
25229 || !insn_data[icode].operand[1].predicate (op0, mode0))
25230 op0 = copy_to_mode_reg (mode0, op0);
25233 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25234 op1 = copy_to_mode_reg (mode0, op1);
25236 pat = GEN_FCN (icode) (target, op0, op1);
25243 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25246 ix86_expand_sse_compare (const struct builtin_description *d,
25247 tree exp, rtx target, bool swap)
25250 tree arg0 = CALL_EXPR_ARG (exp, 0);
25251 tree arg1 = CALL_EXPR_ARG (exp, 1);
25252 rtx op0 = expand_normal (arg0);
25253 rtx op1 = expand_normal (arg1);
25255 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25256 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25257 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25258 enum rtx_code comparison = d->comparison;
25260 if (VECTOR_MODE_P (mode0))
25261 op0 = safe_vector_operand (op0, mode0);
25262 if (VECTOR_MODE_P (mode1))
25263 op1 = safe_vector_operand (op1, mode1);
25265 /* Swap operands if we have a comparison that isn't available in
25269 rtx tmp = gen_reg_rtx (mode1);
25270 emit_move_insn (tmp, op1);
25275 if (optimize || !target
25276 || GET_MODE (target) != tmode
25277 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25278 target = gen_reg_rtx (tmode);
25280 if ((optimize && !register_operand (op0, mode0))
25281 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25282 op0 = copy_to_mode_reg (mode0, op0);
25283 if ((optimize && !register_operand (op1, mode1))
25284 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25285 op1 = copy_to_mode_reg (mode1, op1);
25287 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25288 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25295 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25298 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25302 tree arg0 = CALL_EXPR_ARG (exp, 0);
25303 tree arg1 = CALL_EXPR_ARG (exp, 1);
25304 rtx op0 = expand_normal (arg0);
25305 rtx op1 = expand_normal (arg1);
25306 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25307 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25308 enum rtx_code comparison = d->comparison;
25310 if (VECTOR_MODE_P (mode0))
25311 op0 = safe_vector_operand (op0, mode0);
25312 if (VECTOR_MODE_P (mode1))
25313 op1 = safe_vector_operand (op1, mode1);
25315 /* Swap operands if we have a comparison that isn't available in
25317 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25324 target = gen_reg_rtx (SImode);
25325 emit_move_insn (target, const0_rtx);
25326 target = gen_rtx_SUBREG (QImode, target, 0);
25328 if ((optimize && !register_operand (op0, mode0))
25329 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25330 op0 = copy_to_mode_reg (mode0, op0);
25331 if ((optimize && !register_operand (op1, mode1))
25332 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25333 op1 = copy_to_mode_reg (mode1, op1);
25335 pat = GEN_FCN (d->icode) (op0, op1);
25339 emit_insn (gen_rtx_SET (VOIDmode,
25340 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25341 gen_rtx_fmt_ee (comparison, QImode,
25345 return SUBREG_REG (target);
25348 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25351 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25355 tree arg0 = CALL_EXPR_ARG (exp, 0);
25356 tree arg1 = CALL_EXPR_ARG (exp, 1);
25357 rtx op0 = expand_normal (arg0);
25358 rtx op1 = expand_normal (arg1);
25359 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25360 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25361 enum rtx_code comparison = d->comparison;
25363 if (VECTOR_MODE_P (mode0))
25364 op0 = safe_vector_operand (op0, mode0);
25365 if (VECTOR_MODE_P (mode1))
25366 op1 = safe_vector_operand (op1, mode1);
25368 target = gen_reg_rtx (SImode);
25369 emit_move_insn (target, const0_rtx);
25370 target = gen_rtx_SUBREG (QImode, target, 0);
25372 if ((optimize && !register_operand (op0, mode0))
25373 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25374 op0 = copy_to_mode_reg (mode0, op0);
25375 if ((optimize && !register_operand (op1, mode1))
25376 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25377 op1 = copy_to_mode_reg (mode1, op1);
25379 pat = GEN_FCN (d->icode) (op0, op1);
25383 emit_insn (gen_rtx_SET (VOIDmode,
25384 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25385 gen_rtx_fmt_ee (comparison, QImode,
25389 return SUBREG_REG (target);
25392 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
25395 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
25396 tree exp, rtx target)
25399 tree arg0 = CALL_EXPR_ARG (exp, 0);
25400 tree arg1 = CALL_EXPR_ARG (exp, 1);
25401 tree arg2 = CALL_EXPR_ARG (exp, 2);
25402 tree arg3 = CALL_EXPR_ARG (exp, 3);
25403 tree arg4 = CALL_EXPR_ARG (exp, 4);
25404 rtx scratch0, scratch1;
25405 rtx op0 = expand_normal (arg0);
25406 rtx op1 = expand_normal (arg1);
25407 rtx op2 = expand_normal (arg2);
25408 rtx op3 = expand_normal (arg3);
25409 rtx op4 = expand_normal (arg4);
25410 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
25412 tmode0 = insn_data[d->icode].operand[0].mode;
25413 tmode1 = insn_data[d->icode].operand[1].mode;
25414 modev2 = insn_data[d->icode].operand[2].mode;
25415 modei3 = insn_data[d->icode].operand[3].mode;
25416 modev4 = insn_data[d->icode].operand[4].mode;
25417 modei5 = insn_data[d->icode].operand[5].mode;
25418 modeimm = insn_data[d->icode].operand[6].mode;
25420 if (VECTOR_MODE_P (modev2))
25421 op0 = safe_vector_operand (op0, modev2);
25422 if (VECTOR_MODE_P (modev4))
25423 op2 = safe_vector_operand (op2, modev4);
25425 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25426 op0 = copy_to_mode_reg (modev2, op0);
25427 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
25428 op1 = copy_to_mode_reg (modei3, op1);
25429 if ((optimize && !register_operand (op2, modev4))
25430 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
25431 op2 = copy_to_mode_reg (modev4, op2);
25432 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
25433 op3 = copy_to_mode_reg (modei5, op3);
25435 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
25437 error ("the fifth argument must be a 8-bit immediate");
25441 if (d->code == IX86_BUILTIN_PCMPESTRI128)
25443 if (optimize || !target
25444 || GET_MODE (target) != tmode0
25445 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25446 target = gen_reg_rtx (tmode0);
25448 scratch1 = gen_reg_rtx (tmode1);
25450 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
25452 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
25454 if (optimize || !target
25455 || GET_MODE (target) != tmode1
25456 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25457 target = gen_reg_rtx (tmode1);
25459 scratch0 = gen_reg_rtx (tmode0);
25461 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
25465 gcc_assert (d->flag);
25467 scratch0 = gen_reg_rtx (tmode0);
25468 scratch1 = gen_reg_rtx (tmode1);
25470 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
25480 target = gen_reg_rtx (SImode);
25481 emit_move_insn (target, const0_rtx);
25482 target = gen_rtx_SUBREG (QImode, target, 0);
25485 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25486 gen_rtx_fmt_ee (EQ, QImode,
25487 gen_rtx_REG ((enum machine_mode) d->flag,
25490 return SUBREG_REG (target);
25497 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
25500 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
25501 tree exp, rtx target)
25504 tree arg0 = CALL_EXPR_ARG (exp, 0);
25505 tree arg1 = CALL_EXPR_ARG (exp, 1);
25506 tree arg2 = CALL_EXPR_ARG (exp, 2);
25507 rtx scratch0, scratch1;
25508 rtx op0 = expand_normal (arg0);
25509 rtx op1 = expand_normal (arg1);
25510 rtx op2 = expand_normal (arg2);
25511 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
25513 tmode0 = insn_data[d->icode].operand[0].mode;
25514 tmode1 = insn_data[d->icode].operand[1].mode;
25515 modev2 = insn_data[d->icode].operand[2].mode;
25516 modev3 = insn_data[d->icode].operand[3].mode;
25517 modeimm = insn_data[d->icode].operand[4].mode;
25519 if (VECTOR_MODE_P (modev2))
25520 op0 = safe_vector_operand (op0, modev2);
25521 if (VECTOR_MODE_P (modev3))
25522 op1 = safe_vector_operand (op1, modev3);
25524 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25525 op0 = copy_to_mode_reg (modev2, op0);
25526 if ((optimize && !register_operand (op1, modev3))
25527 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
25528 op1 = copy_to_mode_reg (modev3, op1);
25530 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
25532 error ("the third argument must be a 8-bit immediate");
25536 if (d->code == IX86_BUILTIN_PCMPISTRI128)
25538 if (optimize || !target
25539 || GET_MODE (target) != tmode0
25540 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25541 target = gen_reg_rtx (tmode0);
25543 scratch1 = gen_reg_rtx (tmode1);
25545 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
25547 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
25549 if (optimize || !target
25550 || GET_MODE (target) != tmode1
25551 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25552 target = gen_reg_rtx (tmode1);
25554 scratch0 = gen_reg_rtx (tmode0);
25556 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
25560 gcc_assert (d->flag);
25562 scratch0 = gen_reg_rtx (tmode0);
25563 scratch1 = gen_reg_rtx (tmode1);
25565 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
25575 target = gen_reg_rtx (SImode);
25576 emit_move_insn (target, const0_rtx);
25577 target = gen_rtx_SUBREG (QImode, target, 0);
25580 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25581 gen_rtx_fmt_ee (EQ, QImode,
25582 gen_rtx_REG ((enum machine_mode) d->flag,
25585 return SUBREG_REG (target);
25591 /* Subroutine of ix86_expand_builtin to take care of insns with
25592 variable number of operands. */
25595 ix86_expand_args_builtin (const struct builtin_description *d,
25596 tree exp, rtx target)
25598 rtx pat, real_target;
25599 unsigned int i, nargs;
25600 unsigned int nargs_constant = 0;
25601 int num_memory = 0;
25605 enum machine_mode mode;
25607 bool last_arg_count = false;
25608 enum insn_code icode = d->icode;
25609 const struct insn_data_d *insn_p = &insn_data[icode];
25610 enum machine_mode tmode = insn_p->operand[0].mode;
25611 enum machine_mode rmode = VOIDmode;
25613 enum rtx_code comparison = d->comparison;
25615 switch ((enum ix86_builtin_func_type) d->flag)
25617 case INT_FTYPE_V8SF_V8SF_PTEST:
25618 case INT_FTYPE_V4DI_V4DI_PTEST:
25619 case INT_FTYPE_V4DF_V4DF_PTEST:
25620 case INT_FTYPE_V4SF_V4SF_PTEST:
25621 case INT_FTYPE_V2DI_V2DI_PTEST:
25622 case INT_FTYPE_V2DF_V2DF_PTEST:
25623 return ix86_expand_sse_ptest (d, exp, target);
25624 case FLOAT128_FTYPE_FLOAT128:
25625 case FLOAT_FTYPE_FLOAT:
25626 case INT_FTYPE_INT:
25627 case UINT64_FTYPE_INT:
25628 case UINT16_FTYPE_UINT16:
25629 case INT64_FTYPE_INT64:
25630 case INT64_FTYPE_V4SF:
25631 case INT64_FTYPE_V2DF:
25632 case INT_FTYPE_V16QI:
25633 case INT_FTYPE_V8QI:
25634 case INT_FTYPE_V8SF:
25635 case INT_FTYPE_V4DF:
25636 case INT_FTYPE_V4SF:
25637 case INT_FTYPE_V2DF:
25638 case V16QI_FTYPE_V16QI:
25639 case V8SI_FTYPE_V8SF:
25640 case V8SI_FTYPE_V4SI:
25641 case V8HI_FTYPE_V8HI:
25642 case V8HI_FTYPE_V16QI:
25643 case V8QI_FTYPE_V8QI:
25644 case V8SF_FTYPE_V8SF:
25645 case V8SF_FTYPE_V8SI:
25646 case V8SF_FTYPE_V4SF:
25647 case V8SF_FTYPE_V8HI:
25648 case V4SI_FTYPE_V4SI:
25649 case V4SI_FTYPE_V16QI:
25650 case V4SI_FTYPE_V4SF:
25651 case V4SI_FTYPE_V8SI:
25652 case V4SI_FTYPE_V8HI:
25653 case V4SI_FTYPE_V4DF:
25654 case V4SI_FTYPE_V2DF:
25655 case V4HI_FTYPE_V4HI:
25656 case V4DF_FTYPE_V4DF:
25657 case V4DF_FTYPE_V4SI:
25658 case V4DF_FTYPE_V4SF:
25659 case V4DF_FTYPE_V2DF:
25660 case V4SF_FTYPE_V4SF:
25661 case V4SF_FTYPE_V4SI:
25662 case V4SF_FTYPE_V8SF:
25663 case V4SF_FTYPE_V4DF:
25664 case V4SF_FTYPE_V8HI:
25665 case V4SF_FTYPE_V2DF:
25666 case V2DI_FTYPE_V2DI:
25667 case V2DI_FTYPE_V16QI:
25668 case V2DI_FTYPE_V8HI:
25669 case V2DI_FTYPE_V4SI:
25670 case V2DF_FTYPE_V2DF:
25671 case V2DF_FTYPE_V4SI:
25672 case V2DF_FTYPE_V4DF:
25673 case V2DF_FTYPE_V4SF:
25674 case V2DF_FTYPE_V2SI:
25675 case V2SI_FTYPE_V2SI:
25676 case V2SI_FTYPE_V4SF:
25677 case V2SI_FTYPE_V2SF:
25678 case V2SI_FTYPE_V2DF:
25679 case V2SF_FTYPE_V2SF:
25680 case V2SF_FTYPE_V2SI:
25683 case V4SF_FTYPE_V4SF_VEC_MERGE:
25684 case V2DF_FTYPE_V2DF_VEC_MERGE:
25685 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25686 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25687 case V16QI_FTYPE_V16QI_V16QI:
25688 case V16QI_FTYPE_V8HI_V8HI:
25689 case V8QI_FTYPE_V8QI_V8QI:
25690 case V8QI_FTYPE_V4HI_V4HI:
25691 case V8HI_FTYPE_V8HI_V8HI:
25692 case V8HI_FTYPE_V16QI_V16QI:
25693 case V8HI_FTYPE_V4SI_V4SI:
25694 case V8SF_FTYPE_V8SF_V8SF:
25695 case V8SF_FTYPE_V8SF_V8SI:
25696 case V4SI_FTYPE_V4SI_V4SI:
25697 case V4SI_FTYPE_V8HI_V8HI:
25698 case V4SI_FTYPE_V4SF_V4SF:
25699 case V4SI_FTYPE_V2DF_V2DF:
25700 case V4HI_FTYPE_V4HI_V4HI:
25701 case V4HI_FTYPE_V8QI_V8QI:
25702 case V4HI_FTYPE_V2SI_V2SI:
25703 case V4DF_FTYPE_V4DF_V4DF:
25704 case V4DF_FTYPE_V4DF_V4DI:
25705 case V4SF_FTYPE_V4SF_V4SF:
25706 case V4SF_FTYPE_V4SF_V4SI:
25707 case V4SF_FTYPE_V4SF_V2SI:
25708 case V4SF_FTYPE_V4SF_V2DF:
25709 case V4SF_FTYPE_V4SF_DI:
25710 case V4SF_FTYPE_V4SF_SI:
25711 case V2DI_FTYPE_V2DI_V2DI:
25712 case V2DI_FTYPE_V16QI_V16QI:
25713 case V2DI_FTYPE_V4SI_V4SI:
25714 case V2DI_FTYPE_V2DI_V16QI:
25715 case V2DI_FTYPE_V2DF_V2DF:
25716 case V2SI_FTYPE_V2SI_V2SI:
25717 case V2SI_FTYPE_V4HI_V4HI:
25718 case V2SI_FTYPE_V2SF_V2SF:
25719 case V2DF_FTYPE_V2DF_V2DF:
25720 case V2DF_FTYPE_V2DF_V4SF:
25721 case V2DF_FTYPE_V2DF_V2DI:
25722 case V2DF_FTYPE_V2DF_DI:
25723 case V2DF_FTYPE_V2DF_SI:
25724 case V2SF_FTYPE_V2SF_V2SF:
25725 case V1DI_FTYPE_V1DI_V1DI:
25726 case V1DI_FTYPE_V8QI_V8QI:
25727 case V1DI_FTYPE_V2SI_V2SI:
25728 if (comparison == UNKNOWN)
25729 return ix86_expand_binop_builtin (icode, exp, target);
25732 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25733 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25734 gcc_assert (comparison != UNKNOWN);
25738 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25739 case V8HI_FTYPE_V8HI_SI_COUNT:
25740 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25741 case V4SI_FTYPE_V4SI_SI_COUNT:
25742 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25743 case V4HI_FTYPE_V4HI_SI_COUNT:
25744 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25745 case V2DI_FTYPE_V2DI_SI_COUNT:
25746 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25747 case V2SI_FTYPE_V2SI_SI_COUNT:
25748 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25749 case V1DI_FTYPE_V1DI_SI_COUNT:
25751 last_arg_count = true;
25753 case UINT64_FTYPE_UINT64_UINT64:
25754 case UINT_FTYPE_UINT_UINT:
25755 case UINT_FTYPE_UINT_USHORT:
25756 case UINT_FTYPE_UINT_UCHAR:
25757 case UINT16_FTYPE_UINT16_INT:
25758 case UINT8_FTYPE_UINT8_INT:
25761 case V2DI_FTYPE_V2DI_INT_CONVERT:
25764 nargs_constant = 1;
25766 case V8HI_FTYPE_V8HI_INT:
25767 case V8HI_FTYPE_V8SF_INT:
25768 case V8HI_FTYPE_V4SF_INT:
25769 case V8SF_FTYPE_V8SF_INT:
25770 case V4SI_FTYPE_V4SI_INT:
25771 case V4SI_FTYPE_V8SI_INT:
25772 case V4HI_FTYPE_V4HI_INT:
25773 case V4DF_FTYPE_V4DF_INT:
25774 case V4SF_FTYPE_V4SF_INT:
25775 case V4SF_FTYPE_V8SF_INT:
25776 case V2DI_FTYPE_V2DI_INT:
25777 case V2DF_FTYPE_V2DF_INT:
25778 case V2DF_FTYPE_V4DF_INT:
25780 nargs_constant = 1;
25782 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25783 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25784 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25785 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25786 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25789 case V16QI_FTYPE_V16QI_V16QI_INT:
25790 case V8HI_FTYPE_V8HI_V8HI_INT:
25791 case V8SI_FTYPE_V8SI_V8SI_INT:
25792 case V8SI_FTYPE_V8SI_V4SI_INT:
25793 case V8SF_FTYPE_V8SF_V8SF_INT:
25794 case V8SF_FTYPE_V8SF_V4SF_INT:
25795 case V4SI_FTYPE_V4SI_V4SI_INT:
25796 case V4DF_FTYPE_V4DF_V4DF_INT:
25797 case V4DF_FTYPE_V4DF_V2DF_INT:
25798 case V4SF_FTYPE_V4SF_V4SF_INT:
25799 case V2DI_FTYPE_V2DI_V2DI_INT:
25800 case V2DF_FTYPE_V2DF_V2DF_INT:
25802 nargs_constant = 1;
25804 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25807 nargs_constant = 1;
25809 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25812 nargs_constant = 1;
25814 case V2DI_FTYPE_V2DI_UINT_UINT:
25816 nargs_constant = 2;
25818 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25819 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25820 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25821 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25823 nargs_constant = 1;
25825 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25827 nargs_constant = 2;
25830 gcc_unreachable ();
25833 gcc_assert (nargs <= ARRAY_SIZE (args));
25835 if (comparison != UNKNOWN)
25837 gcc_assert (nargs == 2);
25838 return ix86_expand_sse_compare (d, exp, target, swap);
25841 if (rmode == VOIDmode || rmode == tmode)
25845 || GET_MODE (target) != tmode
25846 || !insn_p->operand[0].predicate (target, tmode))
25847 target = gen_reg_rtx (tmode);
25848 real_target = target;
25852 target = gen_reg_rtx (rmode);
25853 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25856 for (i = 0; i < nargs; i++)
25858 tree arg = CALL_EXPR_ARG (exp, i);
25859 rtx op = expand_normal (arg);
25860 enum machine_mode mode = insn_p->operand[i + 1].mode;
25861 bool match = insn_p->operand[i + 1].predicate (op, mode);
25863 if (last_arg_count && (i + 1) == nargs)
25865 /* SIMD shift insns take either an 8-bit immediate or
25866 register as count. But builtin functions take int as
25867 count. If count doesn't match, we put it in register. */
25870 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25871 if (!insn_p->operand[i + 1].predicate (op, mode))
25872 op = copy_to_reg (op);
25875 else if ((nargs - i) <= nargs_constant)
25880 case CODE_FOR_sse4_1_roundpd:
25881 case CODE_FOR_sse4_1_roundps:
25882 case CODE_FOR_sse4_1_roundsd:
25883 case CODE_FOR_sse4_1_roundss:
25884 case CODE_FOR_sse4_1_blendps:
25885 case CODE_FOR_avx_blendpd256:
25886 case CODE_FOR_avx_vpermilv4df:
25887 case CODE_FOR_avx_roundpd256:
25888 case CODE_FOR_avx_roundps256:
25889 error ("the last argument must be a 4-bit immediate");
25892 case CODE_FOR_sse4_1_blendpd:
25893 case CODE_FOR_avx_vpermilv2df:
25894 case CODE_FOR_xop_vpermil2v2df3:
25895 case CODE_FOR_xop_vpermil2v4sf3:
25896 case CODE_FOR_xop_vpermil2v4df3:
25897 case CODE_FOR_xop_vpermil2v8sf3:
25898 error ("the last argument must be a 2-bit immediate");
25901 case CODE_FOR_avx_vextractf128v4df:
25902 case CODE_FOR_avx_vextractf128v8sf:
25903 case CODE_FOR_avx_vextractf128v8si:
25904 case CODE_FOR_avx_vinsertf128v4df:
25905 case CODE_FOR_avx_vinsertf128v8sf:
25906 case CODE_FOR_avx_vinsertf128v8si:
25907 error ("the last argument must be a 1-bit immediate");
25910 case CODE_FOR_avx_cmpsdv2df3:
25911 case CODE_FOR_avx_cmpssv4sf3:
25912 case CODE_FOR_avx_cmppdv2df3:
25913 case CODE_FOR_avx_cmppsv4sf3:
25914 case CODE_FOR_avx_cmppdv4df3:
25915 case CODE_FOR_avx_cmppsv8sf3:
25916 error ("the last argument must be a 5-bit immediate");
25920 switch (nargs_constant)
25923 if ((nargs - i) == nargs_constant)
25925 error ("the next to last argument must be an 8-bit immediate");
25929 error ("the last argument must be an 8-bit immediate");
25932 gcc_unreachable ();
25939 if (VECTOR_MODE_P (mode))
25940 op = safe_vector_operand (op, mode);
25942 /* If we aren't optimizing, only allow one memory operand to
25944 if (memory_operand (op, mode))
25947 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25949 if (optimize || !match || num_memory > 1)
25950 op = copy_to_mode_reg (mode, op);
25954 op = copy_to_reg (op);
25955 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25960 args[i].mode = mode;
25966 pat = GEN_FCN (icode) (real_target, args[0].op);
25969 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25972 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25976 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25977 args[2].op, args[3].op);
25980 gcc_unreachable ();
25990 /* Subroutine of ix86_expand_builtin to take care of special insns
25991 with variable number of operands. */
25994 ix86_expand_special_args_builtin (const struct builtin_description *d,
25995 tree exp, rtx target)
25999 unsigned int i, nargs, arg_adjust, memory;
26003 enum machine_mode mode;
26005 enum insn_code icode = d->icode;
26006 bool last_arg_constant = false;
26007 const struct insn_data_d *insn_p = &insn_data[icode];
26008 enum machine_mode tmode = insn_p->operand[0].mode;
26009 enum { load, store } klass;
26011 switch ((enum ix86_builtin_func_type) d->flag)
26013 case VOID_FTYPE_VOID:
26014 if (icode == CODE_FOR_avx_vzeroupper)
26015 target = GEN_INT (vzeroupper_intrinsic);
26016 emit_insn (GEN_FCN (icode) (target));
26018 case VOID_FTYPE_UINT64:
26019 case VOID_FTYPE_UNSIGNED:
26025 case UINT64_FTYPE_VOID:
26026 case UNSIGNED_FTYPE_VOID:
26027 case UINT16_FTYPE_VOID:
26032 case UINT64_FTYPE_PUNSIGNED:
26033 case V2DI_FTYPE_PV2DI:
26034 case V32QI_FTYPE_PCCHAR:
26035 case V16QI_FTYPE_PCCHAR:
26036 case V8SF_FTYPE_PCV4SF:
26037 case V8SF_FTYPE_PCFLOAT:
26038 case V4SF_FTYPE_PCFLOAT:
26039 case V4DF_FTYPE_PCV2DF:
26040 case V4DF_FTYPE_PCDOUBLE:
26041 case V2DF_FTYPE_PCDOUBLE:
26042 case VOID_FTYPE_PVOID:
26047 case VOID_FTYPE_PV2SF_V4SF:
26048 case VOID_FTYPE_PV4DI_V4DI:
26049 case VOID_FTYPE_PV2DI_V2DI:
26050 case VOID_FTYPE_PCHAR_V32QI:
26051 case VOID_FTYPE_PCHAR_V16QI:
26052 case VOID_FTYPE_PFLOAT_V8SF:
26053 case VOID_FTYPE_PFLOAT_V4SF:
26054 case VOID_FTYPE_PDOUBLE_V4DF:
26055 case VOID_FTYPE_PDOUBLE_V2DF:
26056 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26057 case VOID_FTYPE_PINT_INT:
26060 /* Reserve memory operand for target. */
26061 memory = ARRAY_SIZE (args);
26063 case V4SF_FTYPE_V4SF_PCV2SF:
26064 case V2DF_FTYPE_V2DF_PCDOUBLE:
26069 case V8SF_FTYPE_PCV8SF_V8SF:
26070 case V4DF_FTYPE_PCV4DF_V4DF:
26071 case V4SF_FTYPE_PCV4SF_V4SF:
26072 case V2DF_FTYPE_PCV2DF_V2DF:
26077 case VOID_FTYPE_PV8SF_V8SF_V8SF:
26078 case VOID_FTYPE_PV4DF_V4DF_V4DF:
26079 case VOID_FTYPE_PV4SF_V4SF_V4SF:
26080 case VOID_FTYPE_PV2DF_V2DF_V2DF:
26083 /* Reserve memory operand for target. */
26084 memory = ARRAY_SIZE (args);
26086 case VOID_FTYPE_UINT_UINT_UINT:
26087 case VOID_FTYPE_UINT64_UINT_UINT:
26088 case UCHAR_FTYPE_UINT_UINT_UINT:
26089 case UCHAR_FTYPE_UINT64_UINT_UINT:
26092 memory = ARRAY_SIZE (args);
26093 last_arg_constant = true;
26096 gcc_unreachable ();
26099 gcc_assert (nargs <= ARRAY_SIZE (args));
26101 if (klass == store)
26103 arg = CALL_EXPR_ARG (exp, 0);
26104 op = expand_normal (arg);
26105 gcc_assert (target == 0);
26107 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26109 target = force_reg (tmode, op);
26117 || GET_MODE (target) != tmode
26118 || !insn_p->operand[0].predicate (target, tmode))
26119 target = gen_reg_rtx (tmode);
26122 for (i = 0; i < nargs; i++)
26124 enum machine_mode mode = insn_p->operand[i + 1].mode;
26127 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26128 op = expand_normal (arg);
26129 match = insn_p->operand[i + 1].predicate (op, mode);
26131 if (last_arg_constant && (i + 1) == nargs)
26135 if (icode == CODE_FOR_lwp_lwpvalsi3
26136 || icode == CODE_FOR_lwp_lwpinssi3
26137 || icode == CODE_FOR_lwp_lwpvaldi3
26138 || icode == CODE_FOR_lwp_lwpinsdi3)
26139 error ("the last argument must be a 32-bit immediate");
26141 error ("the last argument must be an 8-bit immediate");
26149 /* This must be the memory operand. */
26150 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26151 gcc_assert (GET_MODE (op) == mode
26152 || GET_MODE (op) == VOIDmode);
26156 /* This must be register. */
26157 if (VECTOR_MODE_P (mode))
26158 op = safe_vector_operand (op, mode);
26160 gcc_assert (GET_MODE (op) == mode
26161 || GET_MODE (op) == VOIDmode);
26162 op = copy_to_mode_reg (mode, op);
26167 args[i].mode = mode;
26173 pat = GEN_FCN (icode) (target);
26176 pat = GEN_FCN (icode) (target, args[0].op);
26179 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26182 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26185 gcc_unreachable ();
26191 return klass == store ? 0 : target;
26194 /* Return the integer constant in ARG. Constrain it to be in the range
26195 of the subparts of VEC_TYPE; issue an error if not. */
26198 get_element_number (tree vec_type, tree arg)
26200 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26202 if (!host_integerp (arg, 1)
26203 || (elt = tree_low_cst (arg, 1), elt > max))
26205 error ("selector must be an integer constant in the range 0..%wi", max);
26212 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26213 ix86_expand_vector_init. We DO have language-level syntax for this, in
26214 the form of (type){ init-list }. Except that since we can't place emms
26215 instructions from inside the compiler, we can't allow the use of MMX
26216 registers unless the user explicitly asks for it. So we do *not* define
26217 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26218 we have builtins invoked by mmintrin.h that gives us license to emit
26219 these sorts of instructions. */
26222 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26224 enum machine_mode tmode = TYPE_MODE (type);
26225 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26226 int i, n_elt = GET_MODE_NUNITS (tmode);
26227 rtvec v = rtvec_alloc (n_elt);
26229 gcc_assert (VECTOR_MODE_P (tmode));
26230 gcc_assert (call_expr_nargs (exp) == n_elt);
26232 for (i = 0; i < n_elt; ++i)
26234 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26235 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26238 if (!target || !register_operand (target, tmode))
26239 target = gen_reg_rtx (tmode);
26241 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26245 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26246 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26247 had a language-level syntax for referencing vector elements. */
26250 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26252 enum machine_mode tmode, mode0;
26257 arg0 = CALL_EXPR_ARG (exp, 0);
26258 arg1 = CALL_EXPR_ARG (exp, 1);
26260 op0 = expand_normal (arg0);
26261 elt = get_element_number (TREE_TYPE (arg0), arg1);
26263 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26264 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26265 gcc_assert (VECTOR_MODE_P (mode0));
26267 op0 = force_reg (mode0, op0);
26269 if (optimize || !target || !register_operand (target, tmode))
26270 target = gen_reg_rtx (tmode);
26272 ix86_expand_vector_extract (true, target, op0, elt);
26277 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26278 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26279 a language-level syntax for referencing vector elements. */
26282 ix86_expand_vec_set_builtin (tree exp)
26284 enum machine_mode tmode, mode1;
26285 tree arg0, arg1, arg2;
26287 rtx op0, op1, target;
26289 arg0 = CALL_EXPR_ARG (exp, 0);
26290 arg1 = CALL_EXPR_ARG (exp, 1);
26291 arg2 = CALL_EXPR_ARG (exp, 2);
26293 tmode = TYPE_MODE (TREE_TYPE (arg0));
26294 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26295 gcc_assert (VECTOR_MODE_P (tmode));
26297 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26298 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26299 elt = get_element_number (TREE_TYPE (arg0), arg2);
26301 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26302 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26304 op0 = force_reg (tmode, op0);
26305 op1 = force_reg (mode1, op1);
26307 /* OP0 is the source of these builtin functions and shouldn't be
26308 modified. Create a copy, use it and return it as target. */
26309 target = gen_reg_rtx (tmode);
26310 emit_move_insn (target, op0);
26311 ix86_expand_vector_set (true, target, op1, elt);
26316 /* Expand an expression EXP that calls a built-in function,
26317 with result going to TARGET if that's convenient
26318 (and in mode MODE if that's convenient).
26319 SUBTARGET may be used as the target for computing one of EXP's operands.
26320 IGNORE is nonzero if the value is to be ignored. */
26323 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26324 enum machine_mode mode ATTRIBUTE_UNUSED,
26325 int ignore ATTRIBUTE_UNUSED)
26327 const struct builtin_description *d;
26329 enum insn_code icode;
26330 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26331 tree arg0, arg1, arg2;
26332 rtx op0, op1, op2, pat;
26333 enum machine_mode mode0, mode1, mode2;
26334 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26336 /* Determine whether the builtin function is available under the current ISA.
26337 Originally the builtin was not created if it wasn't applicable to the
26338 current ISA based on the command line switches. With function specific
26339 options, we need to check in the context of the function making the call
26340 whether it is supported. */
26341 if (ix86_builtins_isa[fcode].isa
26342 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26344 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26345 NULL, NULL, false);
26348 error ("%qE needs unknown isa option", fndecl);
26351 gcc_assert (opts != NULL);
26352 error ("%qE needs isa option %s", fndecl, opts);
26360 case IX86_BUILTIN_MASKMOVQ:
26361 case IX86_BUILTIN_MASKMOVDQU:
26362 icode = (fcode == IX86_BUILTIN_MASKMOVQ
26363 ? CODE_FOR_mmx_maskmovq
26364 : CODE_FOR_sse2_maskmovdqu);
26365 /* Note the arg order is different from the operand order. */
26366 arg1 = CALL_EXPR_ARG (exp, 0);
26367 arg2 = CALL_EXPR_ARG (exp, 1);
26368 arg0 = CALL_EXPR_ARG (exp, 2);
26369 op0 = expand_normal (arg0);
26370 op1 = expand_normal (arg1);
26371 op2 = expand_normal (arg2);
26372 mode0 = insn_data[icode].operand[0].mode;
26373 mode1 = insn_data[icode].operand[1].mode;
26374 mode2 = insn_data[icode].operand[2].mode;
26376 op0 = force_reg (Pmode, op0);
26377 op0 = gen_rtx_MEM (mode1, op0);
26379 if (!insn_data[icode].operand[0].predicate (op0, mode0))
26380 op0 = copy_to_mode_reg (mode0, op0);
26381 if (!insn_data[icode].operand[1].predicate (op1, mode1))
26382 op1 = copy_to_mode_reg (mode1, op1);
26383 if (!insn_data[icode].operand[2].predicate (op2, mode2))
26384 op2 = copy_to_mode_reg (mode2, op2);
26385 pat = GEN_FCN (icode) (op0, op1, op2);
26391 case IX86_BUILTIN_LDMXCSR:
26392 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
26393 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26394 emit_move_insn (target, op0);
26395 emit_insn (gen_sse_ldmxcsr (target));
26398 case IX86_BUILTIN_STMXCSR:
26399 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26400 emit_insn (gen_sse_stmxcsr (target));
26401 return copy_to_mode_reg (SImode, target);
26403 case IX86_BUILTIN_CLFLUSH:
26404 arg0 = CALL_EXPR_ARG (exp, 0);
26405 op0 = expand_normal (arg0);
26406 icode = CODE_FOR_sse2_clflush;
26407 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26408 op0 = copy_to_mode_reg (Pmode, op0);
26410 emit_insn (gen_sse2_clflush (op0));
26413 case IX86_BUILTIN_MONITOR:
26414 arg0 = CALL_EXPR_ARG (exp, 0);
26415 arg1 = CALL_EXPR_ARG (exp, 1);
26416 arg2 = CALL_EXPR_ARG (exp, 2);
26417 op0 = expand_normal (arg0);
26418 op1 = expand_normal (arg1);
26419 op2 = expand_normal (arg2);
26421 op0 = copy_to_mode_reg (Pmode, op0);
26423 op1 = copy_to_mode_reg (SImode, op1);
26425 op2 = copy_to_mode_reg (SImode, op2);
26426 emit_insn (ix86_gen_monitor (op0, op1, op2));
26429 case IX86_BUILTIN_MWAIT:
26430 arg0 = CALL_EXPR_ARG (exp, 0);
26431 arg1 = CALL_EXPR_ARG (exp, 1);
26432 op0 = expand_normal (arg0);
26433 op1 = expand_normal (arg1);
26435 op0 = copy_to_mode_reg (SImode, op0);
26437 op1 = copy_to_mode_reg (SImode, op1);
26438 emit_insn (gen_sse3_mwait (op0, op1));
26441 case IX86_BUILTIN_VEC_INIT_V2SI:
26442 case IX86_BUILTIN_VEC_INIT_V4HI:
26443 case IX86_BUILTIN_VEC_INIT_V8QI:
26444 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
26446 case IX86_BUILTIN_VEC_EXT_V2DF:
26447 case IX86_BUILTIN_VEC_EXT_V2DI:
26448 case IX86_BUILTIN_VEC_EXT_V4SF:
26449 case IX86_BUILTIN_VEC_EXT_V4SI:
26450 case IX86_BUILTIN_VEC_EXT_V8HI:
26451 case IX86_BUILTIN_VEC_EXT_V2SI:
26452 case IX86_BUILTIN_VEC_EXT_V4HI:
26453 case IX86_BUILTIN_VEC_EXT_V16QI:
26454 return ix86_expand_vec_ext_builtin (exp, target);
26456 case IX86_BUILTIN_VEC_SET_V2DI:
26457 case IX86_BUILTIN_VEC_SET_V4SF:
26458 case IX86_BUILTIN_VEC_SET_V4SI:
26459 case IX86_BUILTIN_VEC_SET_V8HI:
26460 case IX86_BUILTIN_VEC_SET_V4HI:
26461 case IX86_BUILTIN_VEC_SET_V16QI:
26462 return ix86_expand_vec_set_builtin (exp);
26464 case IX86_BUILTIN_VEC_PERM_V2DF:
26465 case IX86_BUILTIN_VEC_PERM_V4SF:
26466 case IX86_BUILTIN_VEC_PERM_V2DI:
26467 case IX86_BUILTIN_VEC_PERM_V4SI:
26468 case IX86_BUILTIN_VEC_PERM_V8HI:
26469 case IX86_BUILTIN_VEC_PERM_V16QI:
26470 case IX86_BUILTIN_VEC_PERM_V2DI_U:
26471 case IX86_BUILTIN_VEC_PERM_V4SI_U:
26472 case IX86_BUILTIN_VEC_PERM_V8HI_U:
26473 case IX86_BUILTIN_VEC_PERM_V16QI_U:
26474 case IX86_BUILTIN_VEC_PERM_V4DF:
26475 case IX86_BUILTIN_VEC_PERM_V8SF:
26476 return ix86_expand_vec_perm_builtin (exp);
26478 case IX86_BUILTIN_INFQ:
26479 case IX86_BUILTIN_HUGE_VALQ:
26481 REAL_VALUE_TYPE inf;
26485 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
26487 tmp = validize_mem (force_const_mem (mode, tmp));
26490 target = gen_reg_rtx (mode);
26492 emit_move_insn (target, tmp);
26496 case IX86_BUILTIN_LLWPCB:
26497 arg0 = CALL_EXPR_ARG (exp, 0);
26498 op0 = expand_normal (arg0);
26499 icode = CODE_FOR_lwp_llwpcb;
26500 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26501 op0 = copy_to_mode_reg (Pmode, op0);
26502 emit_insn (gen_lwp_llwpcb (op0));
26505 case IX86_BUILTIN_SLWPCB:
26506 icode = CODE_FOR_lwp_slwpcb;
26508 || !insn_data[icode].operand[0].predicate (target, Pmode))
26509 target = gen_reg_rtx (Pmode);
26510 emit_insn (gen_lwp_slwpcb (target));
26517 for (i = 0, d = bdesc_special_args;
26518 i < ARRAY_SIZE (bdesc_special_args);
26520 if (d->code == fcode)
26521 return ix86_expand_special_args_builtin (d, exp, target);
26523 for (i = 0, d = bdesc_args;
26524 i < ARRAY_SIZE (bdesc_args);
26526 if (d->code == fcode)
26529 case IX86_BUILTIN_FABSQ:
26530 case IX86_BUILTIN_COPYSIGNQ:
26532 /* Emit a normal call if SSE2 isn't available. */
26533 return expand_call (exp, target, ignore);
26535 return ix86_expand_args_builtin (d, exp, target);
26538 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26539 if (d->code == fcode)
26540 return ix86_expand_sse_comi (d, exp, target);
26542 for (i = 0, d = bdesc_pcmpestr;
26543 i < ARRAY_SIZE (bdesc_pcmpestr);
26545 if (d->code == fcode)
26546 return ix86_expand_sse_pcmpestr (d, exp, target);
26548 for (i = 0, d = bdesc_pcmpistr;
26549 i < ARRAY_SIZE (bdesc_pcmpistr);
26551 if (d->code == fcode)
26552 return ix86_expand_sse_pcmpistr (d, exp, target);
26554 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26555 if (d->code == fcode)
26556 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
26557 (enum ix86_builtin_func_type)
26558 d->flag, d->comparison);
26560 gcc_unreachable ();
26563 /* Returns a function decl for a vectorized version of the builtin function
26564 with builtin function code FN and the result vector type TYPE, or NULL_TREE
26565 if it is not available. */
26568 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
26571 enum machine_mode in_mode, out_mode;
26573 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26575 if (TREE_CODE (type_out) != VECTOR_TYPE
26576 || TREE_CODE (type_in) != VECTOR_TYPE
26577 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
26580 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26581 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26582 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26583 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26587 case BUILT_IN_SQRT:
26588 if (out_mode == DFmode && in_mode == DFmode)
26590 if (out_n == 2 && in_n == 2)
26591 return ix86_builtins[IX86_BUILTIN_SQRTPD];
26592 else if (out_n == 4 && in_n == 4)
26593 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
26597 case BUILT_IN_SQRTF:
26598 if (out_mode == SFmode && in_mode == SFmode)
26600 if (out_n == 4 && in_n == 4)
26601 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26602 else if (out_n == 8 && in_n == 8)
26603 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
26607 case BUILT_IN_LRINT:
26608 if (out_mode == SImode && out_n == 4
26609 && in_mode == DFmode && in_n == 2)
26610 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26613 case BUILT_IN_LRINTF:
26614 if (out_mode == SImode && in_mode == SFmode)
26616 if (out_n == 4 && in_n == 4)
26617 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26618 else if (out_n == 8 && in_n == 8)
26619 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
26623 case BUILT_IN_COPYSIGN:
26624 if (out_mode == DFmode && in_mode == DFmode)
26626 if (out_n == 2 && in_n == 2)
26627 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26628 else if (out_n == 4 && in_n == 4)
26629 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
26633 case BUILT_IN_COPYSIGNF:
26634 if (out_mode == SFmode && in_mode == SFmode)
26636 if (out_n == 4 && in_n == 4)
26637 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26638 else if (out_n == 8 && in_n == 8)
26639 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
26644 if (out_mode == DFmode && in_mode == DFmode)
26646 if (out_n == 2 && in_n == 2)
26647 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
26648 if (out_n == 4 && in_n == 4)
26649 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
26653 case BUILT_IN_FMAF:
26654 if (out_mode == SFmode && in_mode == SFmode)
26656 if (out_n == 4 && in_n == 4)
26657 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
26658 if (out_n == 8 && in_n == 8)
26659 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
26667 /* Dispatch to a handler for a vectorization library. */
26668 if (ix86_veclib_handler)
26669 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26675 /* Handler for an SVML-style interface to
26676 a library with vectorized intrinsics. */
26679 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26682 tree fntype, new_fndecl, args;
26685 enum machine_mode el_mode, in_mode;
26688 /* The SVML is suitable for unsafe math only. */
26689 if (!flag_unsafe_math_optimizations)
26692 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26693 n = TYPE_VECTOR_SUBPARTS (type_out);
26694 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26695 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26696 if (el_mode != in_mode
26704 case BUILT_IN_LOG10:
26706 case BUILT_IN_TANH:
26708 case BUILT_IN_ATAN:
26709 case BUILT_IN_ATAN2:
26710 case BUILT_IN_ATANH:
26711 case BUILT_IN_CBRT:
26712 case BUILT_IN_SINH:
26714 case BUILT_IN_ASINH:
26715 case BUILT_IN_ASIN:
26716 case BUILT_IN_COSH:
26718 case BUILT_IN_ACOSH:
26719 case BUILT_IN_ACOS:
26720 if (el_mode != DFmode || n != 2)
26724 case BUILT_IN_EXPF:
26725 case BUILT_IN_LOGF:
26726 case BUILT_IN_LOG10F:
26727 case BUILT_IN_POWF:
26728 case BUILT_IN_TANHF:
26729 case BUILT_IN_TANF:
26730 case BUILT_IN_ATANF:
26731 case BUILT_IN_ATAN2F:
26732 case BUILT_IN_ATANHF:
26733 case BUILT_IN_CBRTF:
26734 case BUILT_IN_SINHF:
26735 case BUILT_IN_SINF:
26736 case BUILT_IN_ASINHF:
26737 case BUILT_IN_ASINF:
26738 case BUILT_IN_COSHF:
26739 case BUILT_IN_COSF:
26740 case BUILT_IN_ACOSHF:
26741 case BUILT_IN_ACOSF:
26742 if (el_mode != SFmode || n != 4)
26750 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26752 if (fn == BUILT_IN_LOGF)
26753 strcpy (name, "vmlsLn4");
26754 else if (fn == BUILT_IN_LOG)
26755 strcpy (name, "vmldLn2");
26758 sprintf (name, "vmls%s", bname+10);
26759 name[strlen (name)-1] = '4';
26762 sprintf (name, "vmld%s2", bname+10);
26764 /* Convert to uppercase. */
26768 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26769 args = TREE_CHAIN (args))
26773 fntype = build_function_type_list (type_out, type_in, NULL);
26775 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26777 /* Build a function declaration for the vectorized function. */
26778 new_fndecl = build_decl (BUILTINS_LOCATION,
26779 FUNCTION_DECL, get_identifier (name), fntype);
26780 TREE_PUBLIC (new_fndecl) = 1;
26781 DECL_EXTERNAL (new_fndecl) = 1;
26782 DECL_IS_NOVOPS (new_fndecl) = 1;
26783 TREE_READONLY (new_fndecl) = 1;
26788 /* Handler for an ACML-style interface to
26789 a library with vectorized intrinsics. */
26792 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26794 char name[20] = "__vr.._";
26795 tree fntype, new_fndecl, args;
26798 enum machine_mode el_mode, in_mode;
26801 /* The ACML is 64bits only and suitable for unsafe math only as
26802 it does not correctly support parts of IEEE with the required
26803 precision such as denormals. */
26805 || !flag_unsafe_math_optimizations)
26808 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26809 n = TYPE_VECTOR_SUBPARTS (type_out);
26810 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26811 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26812 if (el_mode != in_mode
26822 case BUILT_IN_LOG2:
26823 case BUILT_IN_LOG10:
26826 if (el_mode != DFmode
26831 case BUILT_IN_SINF:
26832 case BUILT_IN_COSF:
26833 case BUILT_IN_EXPF:
26834 case BUILT_IN_POWF:
26835 case BUILT_IN_LOGF:
26836 case BUILT_IN_LOG2F:
26837 case BUILT_IN_LOG10F:
26840 if (el_mode != SFmode
26849 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26850 sprintf (name + 7, "%s", bname+10);
26853 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26854 args = TREE_CHAIN (args))
26858 fntype = build_function_type_list (type_out, type_in, NULL);
26860 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26862 /* Build a function declaration for the vectorized function. */
26863 new_fndecl = build_decl (BUILTINS_LOCATION,
26864 FUNCTION_DECL, get_identifier (name), fntype);
26865 TREE_PUBLIC (new_fndecl) = 1;
26866 DECL_EXTERNAL (new_fndecl) = 1;
26867 DECL_IS_NOVOPS (new_fndecl) = 1;
26868 TREE_READONLY (new_fndecl) = 1;
26874 /* Returns a decl of a function that implements conversion of an integer vector
26875 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26876 are the types involved when converting according to CODE.
26877 Return NULL_TREE if it is not available. */
26880 ix86_vectorize_builtin_conversion (unsigned int code,
26881 tree dest_type, tree src_type)
26889 switch (TYPE_MODE (src_type))
26892 switch (TYPE_MODE (dest_type))
26895 return (TYPE_UNSIGNED (src_type)
26896 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26897 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26899 return (TYPE_UNSIGNED (src_type)
26901 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26907 switch (TYPE_MODE (dest_type))
26910 return (TYPE_UNSIGNED (src_type)
26912 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
26921 case FIX_TRUNC_EXPR:
26922 switch (TYPE_MODE (dest_type))
26925 switch (TYPE_MODE (src_type))
26928 return (TYPE_UNSIGNED (dest_type)
26930 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26932 return (TYPE_UNSIGNED (dest_type)
26934 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26941 switch (TYPE_MODE (src_type))
26944 return (TYPE_UNSIGNED (dest_type)
26946 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26963 /* Returns a code for a target-specific builtin that implements
26964 reciprocal of the function, or NULL_TREE if not available. */
26967 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26968 bool sqrt ATTRIBUTE_UNUSED)
26970 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26971 && flag_finite_math_only && !flag_trapping_math
26972 && flag_unsafe_math_optimizations))
26976 /* Machine dependent builtins. */
26979 /* Vectorized version of sqrt to rsqrt conversion. */
26980 case IX86_BUILTIN_SQRTPS_NR:
26981 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26983 case IX86_BUILTIN_SQRTPS_NR256:
26984 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
26990 /* Normal builtins. */
26993 /* Sqrt to rsqrt conversion. */
26994 case BUILT_IN_SQRTF:
26995 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27002 /* Helper for avx_vpermilps256_operand et al. This is also used by
27003 the expansion functions to turn the parallel back into a mask.
27004 The return value is 0 for no match and the imm8+1 for a match. */
27007 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27009 unsigned i, nelt = GET_MODE_NUNITS (mode);
27011 unsigned char ipar[8];
27013 if (XVECLEN (par, 0) != (int) nelt)
27016 /* Validate that all of the elements are constants, and not totally
27017 out of range. Copy the data into an integral array to make the
27018 subsequent checks easier. */
27019 for (i = 0; i < nelt; ++i)
27021 rtx er = XVECEXP (par, 0, i);
27022 unsigned HOST_WIDE_INT ei;
27024 if (!CONST_INT_P (er))
27035 /* In the 256-bit DFmode case, we can only move elements within
27037 for (i = 0; i < 2; ++i)
27041 mask |= ipar[i] << i;
27043 for (i = 2; i < 4; ++i)
27047 mask |= (ipar[i] - 2) << i;
27052 /* In the 256-bit SFmode case, we have full freedom of movement
27053 within the low 128-bit lane, but the high 128-bit lane must
27054 mirror the exact same pattern. */
27055 for (i = 0; i < 4; ++i)
27056 if (ipar[i] + 4 != ipar[i + 4])
27063 /* In the 128-bit case, we've full freedom in the placement of
27064 the elements from the source operand. */
27065 for (i = 0; i < nelt; ++i)
27066 mask |= ipar[i] << (i * (nelt / 2));
27070 gcc_unreachable ();
27073 /* Make sure success has a non-zero value by adding one. */
27077 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27078 the expansion functions to turn the parallel back into a mask.
27079 The return value is 0 for no match and the imm8+1 for a match. */
27082 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27084 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27086 unsigned char ipar[8];
27088 if (XVECLEN (par, 0) != (int) nelt)
27091 /* Validate that all of the elements are constants, and not totally
27092 out of range. Copy the data into an integral array to make the
27093 subsequent checks easier. */
27094 for (i = 0; i < nelt; ++i)
27096 rtx er = XVECEXP (par, 0, i);
27097 unsigned HOST_WIDE_INT ei;
27099 if (!CONST_INT_P (er))
27102 if (ei >= 2 * nelt)
27107 /* Validate that the halves of the permute are halves. */
27108 for (i = 0; i < nelt2 - 1; ++i)
27109 if (ipar[i] + 1 != ipar[i + 1])
27111 for (i = nelt2; i < nelt - 1; ++i)
27112 if (ipar[i] + 1 != ipar[i + 1])
27115 /* Reconstruct the mask. */
27116 for (i = 0; i < 2; ++i)
27118 unsigned e = ipar[i * nelt2];
27122 mask |= e << (i * 4);
27125 /* Make sure success has a non-zero value by adding one. */
27130 /* Store OPERAND to the memory after reload is completed. This means
27131 that we can't easily use assign_stack_local. */
27133 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27137 gcc_assert (reload_completed);
27138 if (ix86_using_red_zone ())
27140 result = gen_rtx_MEM (mode,
27141 gen_rtx_PLUS (Pmode,
27143 GEN_INT (-RED_ZONE_SIZE)));
27144 emit_move_insn (result, operand);
27146 else if (TARGET_64BIT)
27152 operand = gen_lowpart (DImode, operand);
27156 gen_rtx_SET (VOIDmode,
27157 gen_rtx_MEM (DImode,
27158 gen_rtx_PRE_DEC (DImode,
27159 stack_pointer_rtx)),
27163 gcc_unreachable ();
27165 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27174 split_double_mode (mode, &operand, 1, operands, operands + 1);
27176 gen_rtx_SET (VOIDmode,
27177 gen_rtx_MEM (SImode,
27178 gen_rtx_PRE_DEC (Pmode,
27179 stack_pointer_rtx)),
27182 gen_rtx_SET (VOIDmode,
27183 gen_rtx_MEM (SImode,
27184 gen_rtx_PRE_DEC (Pmode,
27185 stack_pointer_rtx)),
27190 /* Store HImodes as SImodes. */
27191 operand = gen_lowpart (SImode, operand);
27195 gen_rtx_SET (VOIDmode,
27196 gen_rtx_MEM (GET_MODE (operand),
27197 gen_rtx_PRE_DEC (SImode,
27198 stack_pointer_rtx)),
27202 gcc_unreachable ();
27204 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27209 /* Free operand from the memory. */
27211 ix86_free_from_memory (enum machine_mode mode)
27213 if (!ix86_using_red_zone ())
27217 if (mode == DImode || TARGET_64BIT)
27221 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27222 to pop or add instruction if registers are available. */
27223 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27224 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27229 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
27230 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
27232 static const reg_class_t *
27233 i386_ira_cover_classes (void)
27235 static const reg_class_t sse_fpmath_classes[] = {
27236 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
27238 static const reg_class_t no_sse_fpmath_classes[] = {
27239 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
27242 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
27245 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
27247 Put float CONST_DOUBLE in the constant pool instead of fp regs.
27248 QImode must go into class Q_REGS.
27249 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
27250 movdf to do mem-to-mem moves through integer regs. */
27253 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
27255 enum machine_mode mode = GET_MODE (x);
27257 /* We're only allowed to return a subclass of CLASS. Many of the
27258 following checks fail for NO_REGS, so eliminate that early. */
27259 if (regclass == NO_REGS)
27262 /* All classes can load zeros. */
27263 if (x == CONST0_RTX (mode))
27266 /* Force constants into memory if we are loading a (nonzero) constant into
27267 an MMX or SSE register. This is because there are no MMX/SSE instructions
27268 to load from a constant. */
27270 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
27273 /* Prefer SSE regs only, if we can use them for math. */
27274 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
27275 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
27277 /* Floating-point constants need more complex checks. */
27278 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
27280 /* General regs can load everything. */
27281 if (reg_class_subset_p (regclass, GENERAL_REGS))
27284 /* Floats can load 0 and 1 plus some others. Note that we eliminated
27285 zero above. We only want to wind up preferring 80387 registers if
27286 we plan on doing computation with them. */
27288 && standard_80387_constant_p (x))
27290 /* Limit class to non-sse. */
27291 if (regclass == FLOAT_SSE_REGS)
27293 if (regclass == FP_TOP_SSE_REGS)
27295 if (regclass == FP_SECOND_SSE_REGS)
27296 return FP_SECOND_REG;
27297 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
27304 /* Generally when we see PLUS here, it's the function invariant
27305 (plus soft-fp const_int). Which can only be computed into general
27307 if (GET_CODE (x) == PLUS)
27308 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
27310 /* QImode constants are easy to load, but non-constant QImode data
27311 must go into Q_REGS. */
27312 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
27314 if (reg_class_subset_p (regclass, Q_REGS))
27316 if (reg_class_subset_p (Q_REGS, regclass))
27324 /* Discourage putting floating-point values in SSE registers unless
27325 SSE math is being used, and likewise for the 387 registers. */
27327 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
27329 enum machine_mode mode = GET_MODE (x);
27331 /* Restrict the output reload class to the register bank that we are doing
27332 math on. If we would like not to return a subset of CLASS, reject this
27333 alternative: if reload cannot do this, it will still use its choice. */
27334 mode = GET_MODE (x);
27335 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
27336 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
27338 if (X87_FLOAT_MODE_P (mode))
27340 if (regclass == FP_TOP_SSE_REGS)
27342 else if (regclass == FP_SECOND_SSE_REGS)
27343 return FP_SECOND_REG;
27345 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
27352 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
27353 enum machine_mode mode,
27354 secondary_reload_info *sri ATTRIBUTE_UNUSED)
27356 /* QImode spills from non-QI registers require
27357 intermediate register on 32bit targets. */
27358 if (!in_p && mode == QImode && !TARGET_64BIT
27359 && (rclass == GENERAL_REGS
27360 || rclass == LEGACY_REGS
27361 || rclass == INDEX_REGS))
27370 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
27371 regno = true_regnum (x);
27373 /* Return Q_REGS if the operand is in memory. */
27381 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
27384 ix86_class_likely_spilled_p (reg_class_t rclass)
27395 case SSE_FIRST_REG:
27397 case FP_SECOND_REG:
27407 /* If we are copying between general and FP registers, we need a memory
27408 location. The same is true for SSE and MMX registers.
27410 To optimize register_move_cost performance, allow inline variant.
27412 The macro can't work reliably when one of the CLASSES is class containing
27413 registers from multiple units (SSE, MMX, integer). We avoid this by never
27414 combining those units in single alternative in the machine description.
27415 Ensure that this constraint holds to avoid unexpected surprises.
27417 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
27418 enforce these sanity checks. */
27421 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
27422 enum machine_mode mode, int strict)
27424 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
27425 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
27426 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
27427 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
27428 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
27429 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
27431 gcc_assert (!strict);
27435 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
27438 /* ??? This is a lie. We do have moves between mmx/general, and for
27439 mmx/sse2. But by saying we need secondary memory we discourage the
27440 register allocator from using the mmx registers unless needed. */
27441 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
27444 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27446 /* SSE1 doesn't have any direct moves from other classes. */
27450 /* If the target says that inter-unit moves are more expensive
27451 than moving through memory, then don't generate them. */
27452 if (!TARGET_INTER_UNIT_MOVES)
27455 /* Between SSE and general, we have moves no larger than word size. */
27456 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
27464 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
27465 enum machine_mode mode, int strict)
27467 return inline_secondary_memory_needed (class1, class2, mode, strict);
27470 /* Return true if the registers in CLASS cannot represent the change from
27471 modes FROM to TO. */
27474 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
27475 enum reg_class regclass)
27480 /* x87 registers can't do subreg at all, as all values are reformatted
27481 to extended precision. */
27482 if (MAYBE_FLOAT_CLASS_P (regclass))
27485 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
27487 /* Vector registers do not support QI or HImode loads. If we don't
27488 disallow a change to these modes, reload will assume it's ok to
27489 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
27490 the vec_dupv4hi pattern. */
27491 if (GET_MODE_SIZE (from) < 4)
27494 /* Vector registers do not support subreg with nonzero offsets, which
27495 are otherwise valid for integer registers. Since we can't see
27496 whether we have a nonzero offset from here, prohibit all
27497 nonparadoxical subregs changing size. */
27498 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
27505 /* Return the cost of moving data of mode M between a
27506 register and memory. A value of 2 is the default; this cost is
27507 relative to those in `REGISTER_MOVE_COST'.
27509 This function is used extensively by register_move_cost that is used to
27510 build tables at startup. Make it inline in this case.
27511 When IN is 2, return maximum of in and out move cost.
27513 If moving between registers and memory is more expensive than
27514 between two registers, you should define this macro to express the
27517 Model also increased moving costs of QImode registers in non
27521 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
27525 if (FLOAT_CLASS_P (regclass))
27543 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
27544 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
27546 if (SSE_CLASS_P (regclass))
27549 switch (GET_MODE_SIZE (mode))
27564 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
27565 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
27567 if (MMX_CLASS_P (regclass))
27570 switch (GET_MODE_SIZE (mode))
27582 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
27583 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
27585 switch (GET_MODE_SIZE (mode))
27588 if (Q_CLASS_P (regclass) || TARGET_64BIT)
27591 return ix86_cost->int_store[0];
27592 if (TARGET_PARTIAL_REG_DEPENDENCY
27593 && optimize_function_for_speed_p (cfun))
27594 cost = ix86_cost->movzbl_load;
27596 cost = ix86_cost->int_load[0];
27598 return MAX (cost, ix86_cost->int_store[0]);
27604 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
27606 return ix86_cost->movzbl_load;
27608 return ix86_cost->int_store[0] + 4;
27613 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
27614 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
27616 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
27617 if (mode == TFmode)
27620 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
27622 cost = ix86_cost->int_load[2];
27624 cost = ix86_cost->int_store[2];
27625 return (cost * (((int) GET_MODE_SIZE (mode)
27626 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
27631 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
27634 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
27638 /* Return the cost of moving data from a register in class CLASS1 to
27639 one in class CLASS2.
27641 It is not required that the cost always equal 2 when FROM is the same as TO;
27642 on some machines it is expensive to move between registers if they are not
27643 general registers. */
27646 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27647 reg_class_t class2_i)
27649 enum reg_class class1 = (enum reg_class) class1_i;
27650 enum reg_class class2 = (enum reg_class) class2_i;
27652 /* In case we require secondary memory, compute cost of the store followed
27653 by load. In order to avoid bad register allocation choices, we need
27654 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27656 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27660 cost += inline_memory_move_cost (mode, class1, 2);
27661 cost += inline_memory_move_cost (mode, class2, 2);
27663 /* In case of copying from general_purpose_register we may emit multiple
27664 stores followed by single load causing memory size mismatch stall.
27665 Count this as arbitrarily high cost of 20. */
27666 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27669 /* In the case of FP/MMX moves, the registers actually overlap, and we
27670 have to switch modes in order to treat them differently. */
27671 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27672 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27678 /* Moves between SSE/MMX and integer unit are expensive. */
27679 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27680 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27682 /* ??? By keeping returned value relatively high, we limit the number
27683 of moves between integer and MMX/SSE registers for all targets.
27684 Additionally, high value prevents problem with x86_modes_tieable_p(),
27685 where integer modes in MMX/SSE registers are not tieable
27686 because of missing QImode and HImode moves to, from or between
27687 MMX/SSE registers. */
27688 return MAX (8, ix86_cost->mmxsse_to_integer);
27690 if (MAYBE_FLOAT_CLASS_P (class1))
27691 return ix86_cost->fp_move;
27692 if (MAYBE_SSE_CLASS_P (class1))
27693 return ix86_cost->sse_move;
27694 if (MAYBE_MMX_CLASS_P (class1))
27695 return ix86_cost->mmx_move;
27699 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27702 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27704 /* Flags and only flags can only hold CCmode values. */
27705 if (CC_REGNO_P (regno))
27706 return GET_MODE_CLASS (mode) == MODE_CC;
27707 if (GET_MODE_CLASS (mode) == MODE_CC
27708 || GET_MODE_CLASS (mode) == MODE_RANDOM
27709 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27711 if (FP_REGNO_P (regno))
27712 return VALID_FP_MODE_P (mode);
27713 if (SSE_REGNO_P (regno))
27715 /* We implement the move patterns for all vector modes into and
27716 out of SSE registers, even when no operation instructions
27717 are available. OImode move is available only when AVX is
27719 return ((TARGET_AVX && mode == OImode)
27720 || VALID_AVX256_REG_MODE (mode)
27721 || VALID_SSE_REG_MODE (mode)
27722 || VALID_SSE2_REG_MODE (mode)
27723 || VALID_MMX_REG_MODE (mode)
27724 || VALID_MMX_REG_MODE_3DNOW (mode));
27726 if (MMX_REGNO_P (regno))
27728 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27729 so if the register is available at all, then we can move data of
27730 the given mode into or out of it. */
27731 return (VALID_MMX_REG_MODE (mode)
27732 || VALID_MMX_REG_MODE_3DNOW (mode));
27735 if (mode == QImode)
27737 /* Take care for QImode values - they can be in non-QI regs,
27738 but then they do cause partial register stalls. */
27739 if (regno <= BX_REG || TARGET_64BIT)
27741 if (!TARGET_PARTIAL_REG_STALL)
27743 return reload_in_progress || reload_completed;
27745 /* We handle both integer and floats in the general purpose registers. */
27746 else if (VALID_INT_MODE_P (mode))
27748 else if (VALID_FP_MODE_P (mode))
27750 else if (VALID_DFP_MODE_P (mode))
27752 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27753 on to use that value in smaller contexts, this can easily force a
27754 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27755 supporting DImode, allow it. */
27756 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27762 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27763 tieable integer mode. */
27766 ix86_tieable_integer_mode_p (enum machine_mode mode)
27775 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27778 return TARGET_64BIT;
27785 /* Return true if MODE1 is accessible in a register that can hold MODE2
27786 without copying. That is, all register classes that can hold MODE2
27787 can also hold MODE1. */
27790 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27792 if (mode1 == mode2)
27795 if (ix86_tieable_integer_mode_p (mode1)
27796 && ix86_tieable_integer_mode_p (mode2))
27799 /* MODE2 being XFmode implies fp stack or general regs, which means we
27800 can tie any smaller floating point modes to it. Note that we do not
27801 tie this with TFmode. */
27802 if (mode2 == XFmode)
27803 return mode1 == SFmode || mode1 == DFmode;
27805 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27806 that we can tie it with SFmode. */
27807 if (mode2 == DFmode)
27808 return mode1 == SFmode;
27810 /* If MODE2 is only appropriate for an SSE register, then tie with
27811 any other mode acceptable to SSE registers. */
27812 if (GET_MODE_SIZE (mode2) == 16
27813 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27814 return (GET_MODE_SIZE (mode1) == 16
27815 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27817 /* If MODE2 is appropriate for an MMX register, then tie
27818 with any other mode acceptable to MMX registers. */
27819 if (GET_MODE_SIZE (mode2) == 8
27820 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27821 return (GET_MODE_SIZE (mode1) == 8
27822 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27827 /* Compute a (partial) cost for rtx X. Return true if the complete
27828 cost has been computed, and false if subexpressions should be
27829 scanned. In either case, *TOTAL contains the cost result. */
27832 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27834 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27835 enum machine_mode mode = GET_MODE (x);
27836 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27844 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27846 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27848 else if (flag_pic && SYMBOLIC_CONST (x)
27850 || (!GET_CODE (x) != LABEL_REF
27851 && (GET_CODE (x) != SYMBOL_REF
27852 || !SYMBOL_REF_LOCAL_P (x)))))
27859 if (mode == VOIDmode)
27862 switch (standard_80387_constant_p (x))
27867 default: /* Other constants */
27872 /* Start with (MEM (SYMBOL_REF)), since that's where
27873 it'll probably end up. Add a penalty for size. */
27874 *total = (COSTS_N_INSNS (1)
27875 + (flag_pic != 0 && !TARGET_64BIT)
27876 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27882 /* The zero extensions is often completely free on x86_64, so make
27883 it as cheap as possible. */
27884 if (TARGET_64BIT && mode == DImode
27885 && GET_MODE (XEXP (x, 0)) == SImode)
27887 else if (TARGET_ZERO_EXTEND_WITH_AND)
27888 *total = cost->add;
27890 *total = cost->movzx;
27894 *total = cost->movsx;
27898 if (CONST_INT_P (XEXP (x, 1))
27899 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27901 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27904 *total = cost->add;
27907 if ((value == 2 || value == 3)
27908 && cost->lea <= cost->shift_const)
27910 *total = cost->lea;
27920 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27922 if (CONST_INT_P (XEXP (x, 1)))
27924 if (INTVAL (XEXP (x, 1)) > 32)
27925 *total = cost->shift_const + COSTS_N_INSNS (2);
27927 *total = cost->shift_const * 2;
27931 if (GET_CODE (XEXP (x, 1)) == AND)
27932 *total = cost->shift_var * 2;
27934 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27939 if (CONST_INT_P (XEXP (x, 1)))
27940 *total = cost->shift_const;
27942 *total = cost->shift_var;
27947 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27949 /* ??? SSE scalar cost should be used here. */
27950 *total = cost->fmul;
27953 else if (X87_FLOAT_MODE_P (mode))
27955 *total = cost->fmul;
27958 else if (FLOAT_MODE_P (mode))
27960 /* ??? SSE vector cost should be used here. */
27961 *total = cost->fmul;
27966 rtx op0 = XEXP (x, 0);
27967 rtx op1 = XEXP (x, 1);
27969 if (CONST_INT_P (XEXP (x, 1)))
27971 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27972 for (nbits = 0; value != 0; value &= value - 1)
27976 /* This is arbitrary. */
27979 /* Compute costs correctly for widening multiplication. */
27980 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27981 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27982 == GET_MODE_SIZE (mode))
27984 int is_mulwiden = 0;
27985 enum machine_mode inner_mode = GET_MODE (op0);
27987 if (GET_CODE (op0) == GET_CODE (op1))
27988 is_mulwiden = 1, op1 = XEXP (op1, 0);
27989 else if (CONST_INT_P (op1))
27991 if (GET_CODE (op0) == SIGN_EXTEND)
27992 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27995 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27999 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28002 *total = (cost->mult_init[MODE_INDEX (mode)]
28003 + nbits * cost->mult_bit
28004 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28013 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28014 /* ??? SSE cost should be used here. */
28015 *total = cost->fdiv;
28016 else if (X87_FLOAT_MODE_P (mode))
28017 *total = cost->fdiv;
28018 else if (FLOAT_MODE_P (mode))
28019 /* ??? SSE vector cost should be used here. */
28020 *total = cost->fdiv;
28022 *total = cost->divide[MODE_INDEX (mode)];
28026 if (GET_MODE_CLASS (mode) == MODE_INT
28027 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28029 if (GET_CODE (XEXP (x, 0)) == PLUS
28030 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28031 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28032 && CONSTANT_P (XEXP (x, 1)))
28034 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28035 if (val == 2 || val == 4 || val == 8)
28037 *total = cost->lea;
28038 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28039 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28040 outer_code, speed);
28041 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28045 else if (GET_CODE (XEXP (x, 0)) == MULT
28046 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28048 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28049 if (val == 2 || val == 4 || val == 8)
28051 *total = cost->lea;
28052 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28053 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28057 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28059 *total = cost->lea;
28060 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28061 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28062 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28069 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28071 /* ??? SSE cost should be used here. */
28072 *total = cost->fadd;
28075 else if (X87_FLOAT_MODE_P (mode))
28077 *total = cost->fadd;
28080 else if (FLOAT_MODE_P (mode))
28082 /* ??? SSE vector cost should be used here. */
28083 *total = cost->fadd;
28091 if (!TARGET_64BIT && mode == DImode)
28093 *total = (cost->add * 2
28094 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28095 << (GET_MODE (XEXP (x, 0)) != DImode))
28096 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28097 << (GET_MODE (XEXP (x, 1)) != DImode)));
28103 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28105 /* ??? SSE cost should be used here. */
28106 *total = cost->fchs;
28109 else if (X87_FLOAT_MODE_P (mode))
28111 *total = cost->fchs;
28114 else if (FLOAT_MODE_P (mode))
28116 /* ??? SSE vector cost should be used here. */
28117 *total = cost->fchs;
28123 if (!TARGET_64BIT && mode == DImode)
28124 *total = cost->add * 2;
28126 *total = cost->add;
28130 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28131 && XEXP (XEXP (x, 0), 1) == const1_rtx
28132 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28133 && XEXP (x, 1) == const0_rtx)
28135 /* This kind of construct is implemented using test[bwl].
28136 Treat it as if we had an AND. */
28137 *total = (cost->add
28138 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28139 + rtx_cost (const1_rtx, outer_code, speed));
28145 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28150 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28151 /* ??? SSE cost should be used here. */
28152 *total = cost->fabs;
28153 else if (X87_FLOAT_MODE_P (mode))
28154 *total = cost->fabs;
28155 else if (FLOAT_MODE_P (mode))
28156 /* ??? SSE vector cost should be used here. */
28157 *total = cost->fabs;
28161 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28162 /* ??? SSE cost should be used here. */
28163 *total = cost->fsqrt;
28164 else if (X87_FLOAT_MODE_P (mode))
28165 *total = cost->fsqrt;
28166 else if (FLOAT_MODE_P (mode))
28167 /* ??? SSE vector cost should be used here. */
28168 *total = cost->fsqrt;
28172 if (XINT (x, 1) == UNSPEC_TP)
28179 case VEC_DUPLICATE:
28180 /* ??? Assume all of these vector manipulation patterns are
28181 recognizable. In which case they all pretty much have the
28183 *total = COSTS_N_INSNS (1);
28193 static int current_machopic_label_num;
28195 /* Given a symbol name and its associated stub, write out the
28196 definition of the stub. */
28199 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28201 unsigned int length;
28202 char *binder_name, *symbol_name, lazy_ptr_name[32];
28203 int label = ++current_machopic_label_num;
28205 /* For 64-bit we shouldn't get here. */
28206 gcc_assert (!TARGET_64BIT);
28208 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28209 symb = targetm.strip_name_encoding (symb);
28211 length = strlen (stub);
28212 binder_name = XALLOCAVEC (char, length + 32);
28213 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
28215 length = strlen (symb);
28216 symbol_name = XALLOCAVEC (char, length + 32);
28217 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28219 sprintf (lazy_ptr_name, "L%d$lz", label);
28222 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
28224 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
28226 fprintf (file, "%s:\n", stub);
28227 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28231 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
28232 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
28233 fprintf (file, "\tjmp\t*%%edx\n");
28236 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
28238 fprintf (file, "%s:\n", binder_name);
28242 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
28243 fputs ("\tpushl\t%eax\n", file);
28246 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
28248 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
28250 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
28251 fprintf (file, "%s:\n", lazy_ptr_name);
28252 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28253 fprintf (file, ASM_LONG "%s\n", binder_name);
28255 #endif /* TARGET_MACHO */
28257 /* Order the registers for register allocator. */
28260 x86_order_regs_for_local_alloc (void)
28265 /* First allocate the local general purpose registers. */
28266 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28267 if (GENERAL_REGNO_P (i) && call_used_regs[i])
28268 reg_alloc_order [pos++] = i;
28270 /* Global general purpose registers. */
28271 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28272 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
28273 reg_alloc_order [pos++] = i;
28275 /* x87 registers come first in case we are doing FP math
28277 if (!TARGET_SSE_MATH)
28278 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
28279 reg_alloc_order [pos++] = i;
28281 /* SSE registers. */
28282 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
28283 reg_alloc_order [pos++] = i;
28284 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
28285 reg_alloc_order [pos++] = i;
28287 /* x87 registers. */
28288 if (TARGET_SSE_MATH)
28289 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
28290 reg_alloc_order [pos++] = i;
28292 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
28293 reg_alloc_order [pos++] = i;
28295 /* Initialize the rest of array as we do not allocate some registers
28297 while (pos < FIRST_PSEUDO_REGISTER)
28298 reg_alloc_order [pos++] = 0;
28301 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
28302 struct attribute_spec.handler. */
28304 ix86_handle_abi_attribute (tree *node, tree name,
28305 tree args ATTRIBUTE_UNUSED,
28306 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28308 if (TREE_CODE (*node) != FUNCTION_TYPE
28309 && TREE_CODE (*node) != METHOD_TYPE
28310 && TREE_CODE (*node) != FIELD_DECL
28311 && TREE_CODE (*node) != TYPE_DECL)
28313 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28315 *no_add_attrs = true;
28320 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
28322 *no_add_attrs = true;
28326 /* Can combine regparm with all attributes but fastcall. */
28327 if (is_attribute_p ("ms_abi", name))
28329 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
28331 error ("ms_abi and sysv_abi attributes are not compatible");
28336 else if (is_attribute_p ("sysv_abi", name))
28338 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
28340 error ("ms_abi and sysv_abi attributes are not compatible");
28349 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28350 struct attribute_spec.handler. */
28352 ix86_handle_struct_attribute (tree *node, tree name,
28353 tree args ATTRIBUTE_UNUSED,
28354 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28357 if (DECL_P (*node))
28359 if (TREE_CODE (*node) == TYPE_DECL)
28360 type = &TREE_TYPE (*node);
28365 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28366 || TREE_CODE (*type) == UNION_TYPE)))
28368 warning (OPT_Wattributes, "%qE attribute ignored",
28370 *no_add_attrs = true;
28373 else if ((is_attribute_p ("ms_struct", name)
28374 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28375 || ((is_attribute_p ("gcc_struct", name)
28376 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28378 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28380 *no_add_attrs = true;
28387 ix86_handle_fndecl_attribute (tree *node, tree name,
28388 tree args ATTRIBUTE_UNUSED,
28389 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28391 if (TREE_CODE (*node) != FUNCTION_DECL)
28393 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28395 *no_add_attrs = true;
28401 ix86_ms_bitfield_layout_p (const_tree record_type)
28403 return ((TARGET_MS_BITFIELD_LAYOUT
28404 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28405 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
28408 /* Returns an expression indicating where the this parameter is
28409 located on entry to the FUNCTION. */
28412 x86_this_parameter (tree function)
28414 tree type = TREE_TYPE (function);
28415 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
28420 const int *parm_regs;
28422 if (ix86_function_type_abi (type) == MS_ABI)
28423 parm_regs = x86_64_ms_abi_int_parameter_registers;
28425 parm_regs = x86_64_int_parameter_registers;
28426 return gen_rtx_REG (DImode, parm_regs[aggr]);
28429 nregs = ix86_function_regparm (type, function);
28431 if (nregs > 0 && !stdarg_p (type))
28435 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
28436 regno = aggr ? DX_REG : CX_REG;
28437 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
28441 return gen_rtx_MEM (SImode,
28442 plus_constant (stack_pointer_rtx, 4));
28451 return gen_rtx_MEM (SImode,
28452 plus_constant (stack_pointer_rtx, 4));
28455 return gen_rtx_REG (SImode, regno);
28458 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
28461 /* Determine whether x86_output_mi_thunk can succeed. */
28464 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
28465 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
28466 HOST_WIDE_INT vcall_offset, const_tree function)
28468 /* 64-bit can handle anything. */
28472 /* For 32-bit, everything's fine if we have one free register. */
28473 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
28476 /* Need a free register for vcall_offset. */
28480 /* Need a free register for GOT references. */
28481 if (flag_pic && !targetm.binds_local_p (function))
28484 /* Otherwise ok. */
28488 /* Output the assembler code for a thunk function. THUNK_DECL is the
28489 declaration for the thunk function itself, FUNCTION is the decl for
28490 the target function. DELTA is an immediate constant offset to be
28491 added to THIS. If VCALL_OFFSET is nonzero, the word at
28492 *(*this + vcall_offset) should be added to THIS. */
28495 x86_output_mi_thunk (FILE *file,
28496 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
28497 HOST_WIDE_INT vcall_offset, tree function)
28500 rtx this_param = x86_this_parameter (function);
28503 /* Make sure unwind info is emitted for the thunk if needed. */
28504 final_start_function (emit_barrier (), file, 1);
28506 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
28507 pull it in now and let DELTA benefit. */
28508 if (REG_P (this_param))
28509 this_reg = this_param;
28510 else if (vcall_offset)
28512 /* Put the this parameter into %eax. */
28513 xops[0] = this_param;
28514 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
28515 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28518 this_reg = NULL_RTX;
28520 /* Adjust the this parameter by a fixed constant. */
28523 xops[0] = GEN_INT (delta);
28524 xops[1] = this_reg ? this_reg : this_param;
28527 if (!x86_64_general_operand (xops[0], DImode))
28529 tmp = gen_rtx_REG (DImode, R10_REG);
28531 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
28533 xops[1] = this_param;
28535 if (x86_maybe_negate_const_int (&xops[0], DImode))
28536 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
28538 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
28540 else if (x86_maybe_negate_const_int (&xops[0], SImode))
28541 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
28543 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
28546 /* Adjust the this parameter by a value stored in the vtable. */
28550 tmp = gen_rtx_REG (DImode, R10_REG);
28553 int tmp_regno = CX_REG;
28554 if (lookup_attribute ("fastcall",
28555 TYPE_ATTRIBUTES (TREE_TYPE (function)))
28556 || lookup_attribute ("thiscall",
28557 TYPE_ATTRIBUTES (TREE_TYPE (function))))
28558 tmp_regno = AX_REG;
28559 tmp = gen_rtx_REG (SImode, tmp_regno);
28562 xops[0] = gen_rtx_MEM (Pmode, this_reg);
28564 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28566 /* Adjust the this parameter. */
28567 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
28568 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
28570 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
28571 xops[0] = GEN_INT (vcall_offset);
28573 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
28574 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
28576 xops[1] = this_reg;
28577 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
28580 /* If necessary, drop THIS back to its stack slot. */
28581 if (this_reg && this_reg != this_param)
28583 xops[0] = this_reg;
28584 xops[1] = this_param;
28585 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28588 xops[0] = XEXP (DECL_RTL (function), 0);
28591 if (!flag_pic || targetm.binds_local_p (function))
28592 output_asm_insn ("jmp\t%P0", xops);
28593 /* All thunks should be in the same object as their target,
28594 and thus binds_local_p should be true. */
28595 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
28596 gcc_unreachable ();
28599 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
28600 tmp = gen_rtx_CONST (Pmode, tmp);
28601 tmp = gen_rtx_MEM (QImode, tmp);
28603 output_asm_insn ("jmp\t%A0", xops);
28608 if (!flag_pic || targetm.binds_local_p (function))
28609 output_asm_insn ("jmp\t%P0", xops);
28614 rtx sym_ref = XEXP (DECL_RTL (function), 0);
28615 if (TARGET_MACHO_BRANCH_ISLANDS)
28616 sym_ref = (gen_rtx_SYMBOL_REF
28618 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
28619 tmp = gen_rtx_MEM (QImode, sym_ref);
28621 output_asm_insn ("jmp\t%0", xops);
28624 #endif /* TARGET_MACHO */
28626 tmp = gen_rtx_REG (SImode, CX_REG);
28627 output_set_got (tmp, NULL_RTX);
28630 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
28631 output_asm_insn ("jmp\t{*}%1", xops);
28634 final_end_function ();
28638 x86_file_start (void)
28640 default_file_start ();
28642 darwin_file_start ();
28644 if (X86_FILE_START_VERSION_DIRECTIVE)
28645 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28646 if (X86_FILE_START_FLTUSED)
28647 fputs ("\t.global\t__fltused\n", asm_out_file);
28648 if (ix86_asm_dialect == ASM_INTEL)
28649 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28653 x86_field_alignment (tree field, int computed)
28655 enum machine_mode mode;
28656 tree type = TREE_TYPE (field);
28658 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28660 mode = TYPE_MODE (strip_array_types (type));
28661 if (mode == DFmode || mode == DCmode
28662 || GET_MODE_CLASS (mode) == MODE_INT
28663 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28664 return MIN (32, computed);
28668 /* Output assembler code to FILE to increment profiler label # LABELNO
28669 for profiling a function entry. */
28671 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28673 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28678 #ifndef NO_PROFILE_COUNTERS
28679 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28682 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28683 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28685 fprintf (file, "\tcall\t%s\n", mcount_name);
28689 #ifndef NO_PROFILE_COUNTERS
28690 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28693 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28697 #ifndef NO_PROFILE_COUNTERS
28698 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28701 fprintf (file, "\tcall\t%s\n", mcount_name);
28705 /* We don't have exact information about the insn sizes, but we may assume
28706 quite safely that we are informed about all 1 byte insns and memory
28707 address sizes. This is enough to eliminate unnecessary padding in
28711 min_insn_size (rtx insn)
28715 if (!INSN_P (insn) || !active_insn_p (insn))
28718 /* Discard alignments we've emit and jump instructions. */
28719 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28720 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28722 if (JUMP_TABLE_DATA_P (insn))
28725 /* Important case - calls are always 5 bytes.
28726 It is common to have many calls in the row. */
28728 && symbolic_reference_mentioned_p (PATTERN (insn))
28729 && !SIBLING_CALL_P (insn))
28731 len = get_attr_length (insn);
28735 /* For normal instructions we rely on get_attr_length being exact,
28736 with a few exceptions. */
28737 if (!JUMP_P (insn))
28739 enum attr_type type = get_attr_type (insn);
28744 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28745 || asm_noperands (PATTERN (insn)) >= 0)
28752 /* Otherwise trust get_attr_length. */
28756 l = get_attr_length_address (insn);
28757 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28766 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28768 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28772 ix86_avoid_jump_mispredicts (void)
28774 rtx insn, start = get_insns ();
28775 int nbytes = 0, njumps = 0;
28778 /* Look for all minimal intervals of instructions containing 4 jumps.
28779 The intervals are bounded by START and INSN. NBYTES is the total
28780 size of instructions in the interval including INSN and not including
28781 START. When the NBYTES is smaller than 16 bytes, it is possible
28782 that the end of START and INSN ends up in the same 16byte page.
28784 The smallest offset in the page INSN can start is the case where START
28785 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28786 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28788 for (insn = start; insn; insn = NEXT_INSN (insn))
28792 if (LABEL_P (insn))
28794 int align = label_to_alignment (insn);
28795 int max_skip = label_to_max_skip (insn);
28799 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28800 already in the current 16 byte page, because otherwise
28801 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28802 bytes to reach 16 byte boundary. */
28804 || (align <= 3 && max_skip != (1 << align) - 1))
28807 fprintf (dump_file, "Label %i with max_skip %i\n",
28808 INSN_UID (insn), max_skip);
28811 while (nbytes + max_skip >= 16)
28813 start = NEXT_INSN (start);
28814 if ((JUMP_P (start)
28815 && GET_CODE (PATTERN (start)) != ADDR_VEC
28816 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28818 njumps--, isjump = 1;
28821 nbytes -= min_insn_size (start);
28827 min_size = min_insn_size (insn);
28828 nbytes += min_size;
28830 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28831 INSN_UID (insn), min_size);
28833 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28834 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28842 start = NEXT_INSN (start);
28843 if ((JUMP_P (start)
28844 && GET_CODE (PATTERN (start)) != ADDR_VEC
28845 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28847 njumps--, isjump = 1;
28850 nbytes -= min_insn_size (start);
28852 gcc_assert (njumps >= 0);
28854 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28855 INSN_UID (start), INSN_UID (insn), nbytes);
28857 if (njumps == 3 && isjump && nbytes < 16)
28859 int padsize = 15 - nbytes + min_insn_size (insn);
28862 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28863 INSN_UID (insn), padsize);
28864 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28870 /* AMD Athlon works faster
28871 when RET is not destination of conditional jump or directly preceded
28872 by other jump instruction. We avoid the penalty by inserting NOP just
28873 before the RET instructions in such cases. */
28875 ix86_pad_returns (void)
28880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28882 basic_block bb = e->src;
28883 rtx ret = BB_END (bb);
28885 bool replace = false;
28887 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28888 || optimize_bb_for_size_p (bb))
28890 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28891 if (active_insn_p (prev) || LABEL_P (prev))
28893 if (prev && LABEL_P (prev))
28898 FOR_EACH_EDGE (e, ei, bb->preds)
28899 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28900 && !(e->flags & EDGE_FALLTHRU))
28905 prev = prev_active_insn (ret);
28907 && ((JUMP_P (prev) && any_condjump_p (prev))
28910 /* Empty functions get branch mispredict even when the jump destination
28911 is not visible to us. */
28912 if (!prev && !optimize_function_for_size_p (cfun))
28917 emit_jump_insn_before (gen_return_internal_long (), ret);
28923 /* Count the minimum number of instructions in BB. Return 4 if the
28924 number of instructions >= 4. */
28927 ix86_count_insn_bb (basic_block bb)
28930 int insn_count = 0;
28932 /* Count number of instructions in this block. Return 4 if the number
28933 of instructions >= 4. */
28934 FOR_BB_INSNS (bb, insn)
28936 /* Only happen in exit blocks. */
28938 && GET_CODE (PATTERN (insn)) == RETURN)
28941 if (NONDEBUG_INSN_P (insn)
28942 && GET_CODE (PATTERN (insn)) != USE
28943 && GET_CODE (PATTERN (insn)) != CLOBBER)
28946 if (insn_count >= 4)
28955 /* Count the minimum number of instructions in code path in BB.
28956 Return 4 if the number of instructions >= 4. */
28959 ix86_count_insn (basic_block bb)
28963 int min_prev_count;
28965 /* Only bother counting instructions along paths with no
28966 more than 2 basic blocks between entry and exit. Given
28967 that BB has an edge to exit, determine if a predecessor
28968 of BB has an edge from entry. If so, compute the number
28969 of instructions in the predecessor block. If there
28970 happen to be multiple such blocks, compute the minimum. */
28971 min_prev_count = 4;
28972 FOR_EACH_EDGE (e, ei, bb->preds)
28975 edge_iterator prev_ei;
28977 if (e->src == ENTRY_BLOCK_PTR)
28979 min_prev_count = 0;
28982 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28984 if (prev_e->src == ENTRY_BLOCK_PTR)
28986 int count = ix86_count_insn_bb (e->src);
28987 if (count < min_prev_count)
28988 min_prev_count = count;
28994 if (min_prev_count < 4)
28995 min_prev_count += ix86_count_insn_bb (bb);
28997 return min_prev_count;
29000 /* Pad short funtion to 4 instructions. */
29003 ix86_pad_short_function (void)
29008 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29010 rtx ret = BB_END (e->src);
29011 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29013 int insn_count = ix86_count_insn (e->src);
29015 /* Pad short function. */
29016 if (insn_count < 4)
29020 /* Find epilogue. */
29023 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29024 insn = PREV_INSN (insn);
29029 /* Two NOPs are counted as one instruction. */
29030 insn_count = 2 * (4 - insn_count);
29031 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29037 /* Implement machine specific optimizations. We implement padding of returns
29038 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29042 if (optimize && optimize_function_for_speed_p (cfun))
29044 if (TARGET_PAD_SHORT_FUNCTION)
29045 ix86_pad_short_function ();
29046 else if (TARGET_PAD_RETURNS)
29047 ix86_pad_returns ();
29048 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29049 if (TARGET_FOUR_JUMP_LIMIT)
29050 ix86_avoid_jump_mispredicts ();
29054 /* Run the vzeroupper optimization if needed. */
29055 if (cfun->machine->use_vzeroupper_p)
29056 move_or_delete_vzeroupper ();
29059 /* Return nonzero when QImode register that must be represented via REX prefix
29062 x86_extended_QIreg_mentioned_p (rtx insn)
29065 extract_insn_cached (insn);
29066 for (i = 0; i < recog_data.n_operands; i++)
29067 if (REG_P (recog_data.operand[i])
29068 && REGNO (recog_data.operand[i]) > BX_REG)
29073 /* Return nonzero when P points to register encoded via REX prefix.
29074 Called via for_each_rtx. */
29076 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29078 unsigned int regno;
29081 regno = REGNO (*p);
29082 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29085 /* Return true when INSN mentions register that must be encoded using REX
29088 x86_extended_reg_mentioned_p (rtx insn)
29090 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29091 extended_reg_mentioned_1, NULL);
29094 /* If profitable, negate (without causing overflow) integer constant
29095 of mode MODE at location LOC. Return true in this case. */
29097 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29101 if (!CONST_INT_P (*loc))
29107 /* DImode x86_64 constants must fit in 32 bits. */
29108 gcc_assert (x86_64_immediate_operand (*loc, mode));
29119 gcc_unreachable ();
29122 /* Avoid overflows. */
29123 if (mode_signbit_p (mode, *loc))
29126 val = INTVAL (*loc);
29128 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
29129 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
29130 if ((val < 0 && val != -128)
29133 *loc = GEN_INT (-val);
29140 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
29141 optabs would emit if we didn't have TFmode patterns. */
29144 x86_emit_floatuns (rtx operands[2])
29146 rtx neglab, donelab, i0, i1, f0, in, out;
29147 enum machine_mode mode, inmode;
29149 inmode = GET_MODE (operands[1]);
29150 gcc_assert (inmode == SImode || inmode == DImode);
29153 in = force_reg (inmode, operands[1]);
29154 mode = GET_MODE (out);
29155 neglab = gen_label_rtx ();
29156 donelab = gen_label_rtx ();
29157 f0 = gen_reg_rtx (mode);
29159 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
29161 expand_float (out, in, 0);
29163 emit_jump_insn (gen_jump (donelab));
29166 emit_label (neglab);
29168 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
29170 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
29172 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
29174 expand_float (f0, i0, 0);
29176 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
29178 emit_label (donelab);
29181 /* AVX does not support 32-byte integer vector operations,
29182 thus the longest vector we are faced with is V16QImode. */
29183 #define MAX_VECT_LEN 16
29185 struct expand_vec_perm_d
29187 rtx target, op0, op1;
29188 unsigned char perm[MAX_VECT_LEN];
29189 enum machine_mode vmode;
29190 unsigned char nelt;
29194 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
29195 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
29197 /* Get a vector mode of the same size as the original but with elements
29198 twice as wide. This is only guaranteed to apply to integral vectors. */
29200 static inline enum machine_mode
29201 get_mode_wider_vector (enum machine_mode o)
29203 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
29204 enum machine_mode n = GET_MODE_WIDER_MODE (o);
29205 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
29206 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
29210 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29211 with all elements equal to VAR. Return true if successful. */
29214 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
29215 rtx target, rtx val)
29238 /* First attempt to recognize VAL as-is. */
29239 dup = gen_rtx_VEC_DUPLICATE (mode, val);
29240 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
29241 if (recog_memoized (insn) < 0)
29244 /* If that fails, force VAL into a register. */
29247 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
29248 seq = get_insns ();
29251 emit_insn_before (seq, insn);
29253 ok = recog_memoized (insn) >= 0;
29262 if (TARGET_SSE || TARGET_3DNOW_A)
29266 val = gen_lowpart (SImode, val);
29267 x = gen_rtx_TRUNCATE (HImode, val);
29268 x = gen_rtx_VEC_DUPLICATE (mode, x);
29269 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29282 struct expand_vec_perm_d dperm;
29286 memset (&dperm, 0, sizeof (dperm));
29287 dperm.target = target;
29288 dperm.vmode = mode;
29289 dperm.nelt = GET_MODE_NUNITS (mode);
29290 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
29292 /* Extend to SImode using a paradoxical SUBREG. */
29293 tmp1 = gen_reg_rtx (SImode);
29294 emit_move_insn (tmp1, gen_lowpart (SImode, val));
29296 /* Insert the SImode value as low element of a V4SImode vector. */
29297 tmp2 = gen_lowpart (V4SImode, dperm.op0);
29298 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
29300 ok = (expand_vec_perm_1 (&dperm)
29301 || expand_vec_perm_broadcast_1 (&dperm));
29313 /* Replicate the value once into the next wider mode and recurse. */
29315 enum machine_mode smode, wsmode, wvmode;
29318 smode = GET_MODE_INNER (mode);
29319 wvmode = get_mode_wider_vector (mode);
29320 wsmode = GET_MODE_INNER (wvmode);
29322 val = convert_modes (wsmode, smode, val, true);
29323 x = expand_simple_binop (wsmode, ASHIFT, val,
29324 GEN_INT (GET_MODE_BITSIZE (smode)),
29325 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29326 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
29328 x = gen_lowpart (wvmode, target);
29329 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
29337 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
29338 rtx x = gen_reg_rtx (hvmode);
29340 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
29343 x = gen_rtx_VEC_CONCAT (mode, x, x);
29344 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29353 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29354 whose ONE_VAR element is VAR, and other elements are zero. Return true
29358 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
29359 rtx target, rtx var, int one_var)
29361 enum machine_mode vsimode;
29364 bool use_vector_set = false;
29369 /* For SSE4.1, we normally use vector set. But if the second
29370 element is zero and inter-unit moves are OK, we use movq
29372 use_vector_set = (TARGET_64BIT
29374 && !(TARGET_INTER_UNIT_MOVES
29380 use_vector_set = TARGET_SSE4_1;
29383 use_vector_set = TARGET_SSE2;
29386 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
29393 use_vector_set = TARGET_AVX;
29396 /* Use ix86_expand_vector_set in 64bit mode only. */
29397 use_vector_set = TARGET_AVX && TARGET_64BIT;
29403 if (use_vector_set)
29405 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
29406 var = force_reg (GET_MODE_INNER (mode), var);
29407 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29423 var = force_reg (GET_MODE_INNER (mode), var);
29424 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
29425 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29430 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
29431 new_target = gen_reg_rtx (mode);
29433 new_target = target;
29434 var = force_reg (GET_MODE_INNER (mode), var);
29435 x = gen_rtx_VEC_DUPLICATE (mode, var);
29436 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
29437 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
29440 /* We need to shuffle the value to the correct position, so
29441 create a new pseudo to store the intermediate result. */
29443 /* With SSE2, we can use the integer shuffle insns. */
29444 if (mode != V4SFmode && TARGET_SSE2)
29446 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
29448 GEN_INT (one_var == 1 ? 0 : 1),
29449 GEN_INT (one_var == 2 ? 0 : 1),
29450 GEN_INT (one_var == 3 ? 0 : 1)));
29451 if (target != new_target)
29452 emit_move_insn (target, new_target);
29456 /* Otherwise convert the intermediate result to V4SFmode and
29457 use the SSE1 shuffle instructions. */
29458 if (mode != V4SFmode)
29460 tmp = gen_reg_rtx (V4SFmode);
29461 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
29466 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
29468 GEN_INT (one_var == 1 ? 0 : 1),
29469 GEN_INT (one_var == 2 ? 0+4 : 1+4),
29470 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
29472 if (mode != V4SFmode)
29473 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
29474 else if (tmp != target)
29475 emit_move_insn (target, tmp);
29477 else if (target != new_target)
29478 emit_move_insn (target, new_target);
29483 vsimode = V4SImode;
29489 vsimode = V2SImode;
29495 /* Zero extend the variable element to SImode and recurse. */
29496 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
29498 x = gen_reg_rtx (vsimode);
29499 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
29501 gcc_unreachable ();
29503 emit_move_insn (target, gen_lowpart (mode, x));
29511 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29512 consisting of the values in VALS. It is known that all elements
29513 except ONE_VAR are constants. Return true if successful. */
29516 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
29517 rtx target, rtx vals, int one_var)
29519 rtx var = XVECEXP (vals, 0, one_var);
29520 enum machine_mode wmode;
29523 const_vec = copy_rtx (vals);
29524 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
29525 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
29533 /* For the two element vectors, it's just as easy to use
29534 the general case. */
29538 /* Use ix86_expand_vector_set in 64bit mode only. */
29561 /* There's no way to set one QImode entry easily. Combine
29562 the variable value with its adjacent constant value, and
29563 promote to an HImode set. */
29564 x = XVECEXP (vals, 0, one_var ^ 1);
29567 var = convert_modes (HImode, QImode, var, true);
29568 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
29569 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29570 x = GEN_INT (INTVAL (x) & 0xff);
29574 var = convert_modes (HImode, QImode, var, true);
29575 x = gen_int_mode (INTVAL (x) << 8, HImode);
29577 if (x != const0_rtx)
29578 var = expand_simple_binop (HImode, IOR, var, x, var,
29579 1, OPTAB_LIB_WIDEN);
29581 x = gen_reg_rtx (wmode);
29582 emit_move_insn (x, gen_lowpart (wmode, const_vec));
29583 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
29585 emit_move_insn (target, gen_lowpart (mode, x));
29592 emit_move_insn (target, const_vec);
29593 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29597 /* A subroutine of ix86_expand_vector_init_general. Use vector
29598 concatenate to handle the most general case: all values variable,
29599 and none identical. */
29602 ix86_expand_vector_init_concat (enum machine_mode mode,
29603 rtx target, rtx *ops, int n)
29605 enum machine_mode cmode, hmode = VOIDmode;
29606 rtx first[8], second[4];
29646 gcc_unreachable ();
29649 if (!register_operand (ops[1], cmode))
29650 ops[1] = force_reg (cmode, ops[1]);
29651 if (!register_operand (ops[0], cmode))
29652 ops[0] = force_reg (cmode, ops[0]);
29653 emit_insn (gen_rtx_SET (VOIDmode, target,
29654 gen_rtx_VEC_CONCAT (mode, ops[0],
29674 gcc_unreachable ();
29690 gcc_unreachable ();
29695 /* FIXME: We process inputs backward to help RA. PR 36222. */
29698 for (; i > 0; i -= 2, j--)
29700 first[j] = gen_reg_rtx (cmode);
29701 v = gen_rtvec (2, ops[i - 1], ops[i]);
29702 ix86_expand_vector_init (false, first[j],
29703 gen_rtx_PARALLEL (cmode, v));
29709 gcc_assert (hmode != VOIDmode);
29710 for (i = j = 0; i < n; i += 2, j++)
29712 second[j] = gen_reg_rtx (hmode);
29713 ix86_expand_vector_init_concat (hmode, second [j],
29717 ix86_expand_vector_init_concat (mode, target, second, n);
29720 ix86_expand_vector_init_concat (mode, target, first, n);
29724 gcc_unreachable ();
29728 /* A subroutine of ix86_expand_vector_init_general. Use vector
29729 interleave to handle the most general case: all values variable,
29730 and none identical. */
29733 ix86_expand_vector_init_interleave (enum machine_mode mode,
29734 rtx target, rtx *ops, int n)
29736 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29739 rtx (*gen_load_even) (rtx, rtx, rtx);
29740 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29741 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29746 gen_load_even = gen_vec_setv8hi;
29747 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29748 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29749 inner_mode = HImode;
29750 first_imode = V4SImode;
29751 second_imode = V2DImode;
29752 third_imode = VOIDmode;
29755 gen_load_even = gen_vec_setv16qi;
29756 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29757 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29758 inner_mode = QImode;
29759 first_imode = V8HImode;
29760 second_imode = V4SImode;
29761 third_imode = V2DImode;
29764 gcc_unreachable ();
29767 for (i = 0; i < n; i++)
29769 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29770 op0 = gen_reg_rtx (SImode);
29771 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29773 /* Insert the SImode value as low element of V4SImode vector. */
29774 op1 = gen_reg_rtx (V4SImode);
29775 op0 = gen_rtx_VEC_MERGE (V4SImode,
29776 gen_rtx_VEC_DUPLICATE (V4SImode,
29778 CONST0_RTX (V4SImode),
29780 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29782 /* Cast the V4SImode vector back to a vector in orignal mode. */
29783 op0 = gen_reg_rtx (mode);
29784 emit_move_insn (op0, gen_lowpart (mode, op1));
29786 /* Load even elements into the second positon. */
29787 emit_insn (gen_load_even (op0,
29788 force_reg (inner_mode,
29792 /* Cast vector to FIRST_IMODE vector. */
29793 ops[i] = gen_reg_rtx (first_imode);
29794 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29797 /* Interleave low FIRST_IMODE vectors. */
29798 for (i = j = 0; i < n; i += 2, j++)
29800 op0 = gen_reg_rtx (first_imode);
29801 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29803 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29804 ops[j] = gen_reg_rtx (second_imode);
29805 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29808 /* Interleave low SECOND_IMODE vectors. */
29809 switch (second_imode)
29812 for (i = j = 0; i < n / 2; i += 2, j++)
29814 op0 = gen_reg_rtx (second_imode);
29815 emit_insn (gen_interleave_second_low (op0, ops[i],
29818 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29820 ops[j] = gen_reg_rtx (third_imode);
29821 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29823 second_imode = V2DImode;
29824 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29828 op0 = gen_reg_rtx (second_imode);
29829 emit_insn (gen_interleave_second_low (op0, ops[0],
29832 /* Cast the SECOND_IMODE vector back to a vector on original
29834 emit_insn (gen_rtx_SET (VOIDmode, target,
29835 gen_lowpart (mode, op0)));
29839 gcc_unreachable ();
29843 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29844 all values variable, and none identical. */
29847 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29848 rtx target, rtx vals)
29850 rtx ops[32], op0, op1;
29851 enum machine_mode half_mode = VOIDmode;
29858 if (!mmx_ok && !TARGET_SSE)
29870 n = GET_MODE_NUNITS (mode);
29871 for (i = 0; i < n; i++)
29872 ops[i] = XVECEXP (vals, 0, i);
29873 ix86_expand_vector_init_concat (mode, target, ops, n);
29877 half_mode = V16QImode;
29881 half_mode = V8HImode;
29885 n = GET_MODE_NUNITS (mode);
29886 for (i = 0; i < n; i++)
29887 ops[i] = XVECEXP (vals, 0, i);
29888 op0 = gen_reg_rtx (half_mode);
29889 op1 = gen_reg_rtx (half_mode);
29890 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29892 ix86_expand_vector_init_interleave (half_mode, op1,
29893 &ops [n >> 1], n >> 2);
29894 emit_insn (gen_rtx_SET (VOIDmode, target,
29895 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29899 if (!TARGET_SSE4_1)
29907 /* Don't use ix86_expand_vector_init_interleave if we can't
29908 move from GPR to SSE register directly. */
29909 if (!TARGET_INTER_UNIT_MOVES)
29912 n = GET_MODE_NUNITS (mode);
29913 for (i = 0; i < n; i++)
29914 ops[i] = XVECEXP (vals, 0, i);
29915 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29923 gcc_unreachable ();
29927 int i, j, n_elts, n_words, n_elt_per_word;
29928 enum machine_mode inner_mode;
29929 rtx words[4], shift;
29931 inner_mode = GET_MODE_INNER (mode);
29932 n_elts = GET_MODE_NUNITS (mode);
29933 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29934 n_elt_per_word = n_elts / n_words;
29935 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29937 for (i = 0; i < n_words; ++i)
29939 rtx word = NULL_RTX;
29941 for (j = 0; j < n_elt_per_word; ++j)
29943 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29944 elt = convert_modes (word_mode, inner_mode, elt, true);
29950 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29951 word, 1, OPTAB_LIB_WIDEN);
29952 word = expand_simple_binop (word_mode, IOR, word, elt,
29953 word, 1, OPTAB_LIB_WIDEN);
29961 emit_move_insn (target, gen_lowpart (mode, words[0]));
29962 else if (n_words == 2)
29964 rtx tmp = gen_reg_rtx (mode);
29965 emit_clobber (tmp);
29966 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29967 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29968 emit_move_insn (target, tmp);
29970 else if (n_words == 4)
29972 rtx tmp = gen_reg_rtx (V4SImode);
29973 gcc_assert (word_mode == SImode);
29974 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29975 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29976 emit_move_insn (target, gen_lowpart (mode, tmp));
29979 gcc_unreachable ();
29983 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29984 instructions unless MMX_OK is true. */
29987 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29989 enum machine_mode mode = GET_MODE (target);
29990 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29991 int n_elts = GET_MODE_NUNITS (mode);
29992 int n_var = 0, one_var = -1;
29993 bool all_same = true, all_const_zero = true;
29997 for (i = 0; i < n_elts; ++i)
29999 x = XVECEXP (vals, 0, i);
30000 if (!(CONST_INT_P (x)
30001 || GET_CODE (x) == CONST_DOUBLE
30002 || GET_CODE (x) == CONST_FIXED))
30003 n_var++, one_var = i;
30004 else if (x != CONST0_RTX (inner_mode))
30005 all_const_zero = false;
30006 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30010 /* Constants are best loaded from the constant pool. */
30013 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30017 /* If all values are identical, broadcast the value. */
30019 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30020 XVECEXP (vals, 0, 0)))
30023 /* Values where only one field is non-constant are best loaded from
30024 the pool and overwritten via move later. */
30028 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30029 XVECEXP (vals, 0, one_var),
30033 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30037 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30041 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30043 enum machine_mode mode = GET_MODE (target);
30044 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30045 enum machine_mode half_mode;
30046 bool use_vec_merge = false;
30048 static rtx (*gen_extract[6][2]) (rtx, rtx)
30050 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30051 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30052 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30053 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30054 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30055 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30057 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30059 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30060 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30061 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30062 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30063 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30064 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30074 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30075 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30077 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30079 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30080 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30086 use_vec_merge = TARGET_SSE4_1;
30094 /* For the two element vectors, we implement a VEC_CONCAT with
30095 the extraction of the other element. */
30097 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30098 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30101 op0 = val, op1 = tmp;
30103 op0 = tmp, op1 = val;
30105 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30106 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30111 use_vec_merge = TARGET_SSE4_1;
30118 use_vec_merge = true;
30122 /* tmp = target = A B C D */
30123 tmp = copy_to_reg (target);
30124 /* target = A A B B */
30125 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
30126 /* target = X A B B */
30127 ix86_expand_vector_set (false, target, val, 0);
30128 /* target = A X C D */
30129 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30130 const1_rtx, const0_rtx,
30131 GEN_INT (2+4), GEN_INT (3+4)));
30135 /* tmp = target = A B C D */
30136 tmp = copy_to_reg (target);
30137 /* tmp = X B C D */
30138 ix86_expand_vector_set (false, tmp, val, 0);
30139 /* target = A B X D */
30140 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30141 const0_rtx, const1_rtx,
30142 GEN_INT (0+4), GEN_INT (3+4)));
30146 /* tmp = target = A B C D */
30147 tmp = copy_to_reg (target);
30148 /* tmp = X B C D */
30149 ix86_expand_vector_set (false, tmp, val, 0);
30150 /* target = A B X D */
30151 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30152 const0_rtx, const1_rtx,
30153 GEN_INT (2+4), GEN_INT (0+4)));
30157 gcc_unreachable ();
30162 use_vec_merge = TARGET_SSE4_1;
30166 /* Element 0 handled by vec_merge below. */
30169 use_vec_merge = true;
30175 /* With SSE2, use integer shuffles to swap element 0 and ELT,
30176 store into element 0, then shuffle them back. */
30180 order[0] = GEN_INT (elt);
30181 order[1] = const1_rtx;
30182 order[2] = const2_rtx;
30183 order[3] = GEN_INT (3);
30184 order[elt] = const0_rtx;
30186 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30187 order[1], order[2], order[3]));
30189 ix86_expand_vector_set (false, target, val, 0);
30191 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30192 order[1], order[2], order[3]));
30196 /* For SSE1, we have to reuse the V4SF code. */
30197 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
30198 gen_lowpart (SFmode, val), elt);
30203 use_vec_merge = TARGET_SSE2;
30206 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30210 use_vec_merge = TARGET_SSE4_1;
30217 half_mode = V16QImode;
30223 half_mode = V8HImode;
30229 half_mode = V4SImode;
30235 half_mode = V2DImode;
30241 half_mode = V4SFmode;
30247 half_mode = V2DFmode;
30253 /* Compute offset. */
30257 gcc_assert (i <= 1);
30259 /* Extract the half. */
30260 tmp = gen_reg_rtx (half_mode);
30261 emit_insn (gen_extract[j][i] (tmp, target));
30263 /* Put val in tmp at elt. */
30264 ix86_expand_vector_set (false, tmp, val, elt);
30267 emit_insn (gen_insert[j][i] (target, target, tmp));
30276 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
30277 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
30278 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30282 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
30284 emit_move_insn (mem, target);
30286 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
30287 emit_move_insn (tmp, val);
30289 emit_move_insn (target, mem);
30294 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
30296 enum machine_mode mode = GET_MODE (vec);
30297 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30298 bool use_vec_extr = false;
30311 use_vec_extr = true;
30315 use_vec_extr = TARGET_SSE4_1;
30327 tmp = gen_reg_rtx (mode);
30328 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
30329 GEN_INT (elt), GEN_INT (elt),
30330 GEN_INT (elt+4), GEN_INT (elt+4)));
30334 tmp = gen_reg_rtx (mode);
30335 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
30339 gcc_unreachable ();
30342 use_vec_extr = true;
30347 use_vec_extr = TARGET_SSE4_1;
30361 tmp = gen_reg_rtx (mode);
30362 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
30363 GEN_INT (elt), GEN_INT (elt),
30364 GEN_INT (elt), GEN_INT (elt)));
30368 tmp = gen_reg_rtx (mode);
30369 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
30373 gcc_unreachable ();
30376 use_vec_extr = true;
30381 /* For SSE1, we have to reuse the V4SF code. */
30382 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
30383 gen_lowpart (V4SFmode, vec), elt);
30389 use_vec_extr = TARGET_SSE2;
30392 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30396 use_vec_extr = TARGET_SSE4_1;
30400 /* ??? Could extract the appropriate HImode element and shift. */
30407 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
30408 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
30410 /* Let the rtl optimizers know about the zero extension performed. */
30411 if (inner_mode == QImode || inner_mode == HImode)
30413 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
30414 target = gen_lowpart (SImode, target);
30417 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30421 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
30423 emit_move_insn (mem, vec);
30425 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
30426 emit_move_insn (target, tmp);
30430 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
30431 pattern to reduce; DEST is the destination; IN is the input vector. */
30434 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
30436 rtx tmp1, tmp2, tmp3;
30438 tmp1 = gen_reg_rtx (V4SFmode);
30439 tmp2 = gen_reg_rtx (V4SFmode);
30440 tmp3 = gen_reg_rtx (V4SFmode);
30442 emit_insn (gen_sse_movhlps (tmp1, in, in));
30443 emit_insn (fn (tmp2, tmp1, in));
30445 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
30446 const1_rtx, const1_rtx,
30447 GEN_INT (1+4), GEN_INT (1+4)));
30448 emit_insn (fn (dest, tmp2, tmp3));
30451 /* Target hook for scalar_mode_supported_p. */
30453 ix86_scalar_mode_supported_p (enum machine_mode mode)
30455 if (DECIMAL_FLOAT_MODE_P (mode))
30456 return default_decimal_float_supported_p ();
30457 else if (mode == TFmode)
30460 return default_scalar_mode_supported_p (mode);
30463 /* Implements target hook vector_mode_supported_p. */
30465 ix86_vector_mode_supported_p (enum machine_mode mode)
30467 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
30469 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
30471 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
30473 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
30475 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
30480 /* Target hook for c_mode_for_suffix. */
30481 static enum machine_mode
30482 ix86_c_mode_for_suffix (char suffix)
30492 /* Worker function for TARGET_MD_ASM_CLOBBERS.
30494 We do this in the new i386 backend to maintain source compatibility
30495 with the old cc0-based compiler. */
30498 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
30499 tree inputs ATTRIBUTE_UNUSED,
30502 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
30504 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
30509 /* Implements target vector targetm.asm.encode_section_info. This
30510 is not used by netware. */
30512 static void ATTRIBUTE_UNUSED
30513 ix86_encode_section_info (tree decl, rtx rtl, int first)
30515 default_encode_section_info (decl, rtl, first);
30517 if (TREE_CODE (decl) == VAR_DECL
30518 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
30519 && ix86_in_large_data_p (decl))
30520 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
30523 /* Worker function for REVERSE_CONDITION. */
30526 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
30528 return (mode != CCFPmode && mode != CCFPUmode
30529 ? reverse_condition (code)
30530 : reverse_condition_maybe_unordered (code));
30533 /* Output code to perform an x87 FP register move, from OPERANDS[1]
30537 output_387_reg_move (rtx insn, rtx *operands)
30539 if (REG_P (operands[0]))
30541 if (REG_P (operands[1])
30542 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
30544 if (REGNO (operands[0]) == FIRST_STACK_REG)
30545 return output_387_ffreep (operands, 0);
30546 return "fstp\t%y0";
30548 if (STACK_TOP_P (operands[0]))
30549 return "fld%Z1\t%y1";
30552 else if (MEM_P (operands[0]))
30554 gcc_assert (REG_P (operands[1]));
30555 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
30556 return "fstp%Z0\t%y0";
30559 /* There is no non-popping store to memory for XFmode.
30560 So if we need one, follow the store with a load. */
30561 if (GET_MODE (operands[0]) == XFmode)
30562 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
30564 return "fst%Z0\t%y0";
30571 /* Output code to perform a conditional jump to LABEL, if C2 flag in
30572 FP status register is set. */
30575 ix86_emit_fp_unordered_jump (rtx label)
30577 rtx reg = gen_reg_rtx (HImode);
30580 emit_insn (gen_x86_fnstsw_1 (reg));
30582 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
30584 emit_insn (gen_x86_sahf_1 (reg));
30586 temp = gen_rtx_REG (CCmode, FLAGS_REG);
30587 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
30591 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
30593 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
30594 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
30597 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
30598 gen_rtx_LABEL_REF (VOIDmode, label),
30600 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
30602 emit_jump_insn (temp);
30603 predict_jump (REG_BR_PROB_BASE * 10 / 100);
30606 /* Output code to perform a log1p XFmode calculation. */
30608 void ix86_emit_i387_log1p (rtx op0, rtx op1)
30610 rtx label1 = gen_label_rtx ();
30611 rtx label2 = gen_label_rtx ();
30613 rtx tmp = gen_reg_rtx (XFmode);
30614 rtx tmp2 = gen_reg_rtx (XFmode);
30617 emit_insn (gen_absxf2 (tmp, op1));
30618 test = gen_rtx_GE (VOIDmode, tmp,
30619 CONST_DOUBLE_FROM_REAL_VALUE (
30620 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
30622 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
30624 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30625 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
30626 emit_jump (label2);
30628 emit_label (label1);
30629 emit_move_insn (tmp, CONST1_RTX (XFmode));
30630 emit_insn (gen_addxf3 (tmp, op1, tmp));
30631 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30632 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
30634 emit_label (label2);
30637 /* Output code to perform a Newton-Rhapson approximation of a single precision
30638 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
30640 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
30642 rtx x0, x1, e0, e1, two;
30644 x0 = gen_reg_rtx (mode);
30645 e0 = gen_reg_rtx (mode);
30646 e1 = gen_reg_rtx (mode);
30647 x1 = gen_reg_rtx (mode);
30649 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30651 if (VECTOR_MODE_P (mode))
30652 two = ix86_build_const_vector (mode, true, two);
30654 two = force_reg (mode, two);
30656 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30658 /* x0 = rcp(b) estimate */
30659 emit_insn (gen_rtx_SET (VOIDmode, x0,
30660 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30663 emit_insn (gen_rtx_SET (VOIDmode, e0,
30664 gen_rtx_MULT (mode, x0, a)));
30666 emit_insn (gen_rtx_SET (VOIDmode, e1,
30667 gen_rtx_MULT (mode, x0, b)));
30669 emit_insn (gen_rtx_SET (VOIDmode, x1,
30670 gen_rtx_MINUS (mode, two, e1)));
30671 /* res = e0 * x1 */
30672 emit_insn (gen_rtx_SET (VOIDmode, res,
30673 gen_rtx_MULT (mode, e0, x1)));
30676 /* Output code to perform a Newton-Rhapson approximation of a
30677 single precision floating point [reciprocal] square root. */
30679 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30682 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30685 x0 = gen_reg_rtx (mode);
30686 e0 = gen_reg_rtx (mode);
30687 e1 = gen_reg_rtx (mode);
30688 e2 = gen_reg_rtx (mode);
30689 e3 = gen_reg_rtx (mode);
30691 real_from_integer (&r, VOIDmode, -3, -1, 0);
30692 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30694 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30695 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30697 if (VECTOR_MODE_P (mode))
30699 mthree = ix86_build_const_vector (mode, true, mthree);
30700 mhalf = ix86_build_const_vector (mode, true, mhalf);
30703 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30704 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30706 /* x0 = rsqrt(a) estimate */
30707 emit_insn (gen_rtx_SET (VOIDmode, x0,
30708 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30711 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30716 zero = gen_reg_rtx (mode);
30717 mask = gen_reg_rtx (mode);
30719 zero = force_reg (mode, CONST0_RTX(mode));
30720 emit_insn (gen_rtx_SET (VOIDmode, mask,
30721 gen_rtx_NE (mode, zero, a)));
30723 emit_insn (gen_rtx_SET (VOIDmode, x0,
30724 gen_rtx_AND (mode, x0, mask)));
30728 emit_insn (gen_rtx_SET (VOIDmode, e0,
30729 gen_rtx_MULT (mode, x0, a)));
30731 emit_insn (gen_rtx_SET (VOIDmode, e1,
30732 gen_rtx_MULT (mode, e0, x0)));
30735 mthree = force_reg (mode, mthree);
30736 emit_insn (gen_rtx_SET (VOIDmode, e2,
30737 gen_rtx_PLUS (mode, e1, mthree)));
30739 mhalf = force_reg (mode, mhalf);
30741 /* e3 = -.5 * x0 */
30742 emit_insn (gen_rtx_SET (VOIDmode, e3,
30743 gen_rtx_MULT (mode, x0, mhalf)));
30745 /* e3 = -.5 * e0 */
30746 emit_insn (gen_rtx_SET (VOIDmode, e3,
30747 gen_rtx_MULT (mode, e0, mhalf)));
30748 /* ret = e2 * e3 */
30749 emit_insn (gen_rtx_SET (VOIDmode, res,
30750 gen_rtx_MULT (mode, e2, e3)));
30753 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30755 static void ATTRIBUTE_UNUSED
30756 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30759 /* With Binutils 2.15, the "@unwind" marker must be specified on
30760 every occurrence of the ".eh_frame" section, not just the first
30763 && strcmp (name, ".eh_frame") == 0)
30765 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30766 flags & SECTION_WRITE ? "aw" : "a");
30769 default_elf_asm_named_section (name, flags, decl);
30772 /* Return the mangling of TYPE if it is an extended fundamental type. */
30774 static const char *
30775 ix86_mangle_type (const_tree type)
30777 type = TYPE_MAIN_VARIANT (type);
30779 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30780 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30783 switch (TYPE_MODE (type))
30786 /* __float128 is "g". */
30789 /* "long double" or __float80 is "e". */
30796 /* For 32-bit code we can save PIC register setup by using
30797 __stack_chk_fail_local hidden function instead of calling
30798 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30799 register, so it is better to call __stack_chk_fail directly. */
30802 ix86_stack_protect_fail (void)
30804 return TARGET_64BIT
30805 ? default_external_stack_protect_fail ()
30806 : default_hidden_stack_protect_fail ();
30809 /* Select a format to encode pointers in exception handling data. CODE
30810 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30811 true if the symbol may be affected by dynamic relocations.
30813 ??? All x86 object file formats are capable of representing this.
30814 After all, the relocation needed is the same as for the call insn.
30815 Whether or not a particular assembler allows us to enter such, I
30816 guess we'll have to see. */
30818 asm_preferred_eh_data_format (int code, int global)
30822 int type = DW_EH_PE_sdata8;
30824 || ix86_cmodel == CM_SMALL_PIC
30825 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30826 type = DW_EH_PE_sdata4;
30827 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30829 if (ix86_cmodel == CM_SMALL
30830 || (ix86_cmodel == CM_MEDIUM && code))
30831 return DW_EH_PE_udata4;
30832 return DW_EH_PE_absptr;
30835 /* Expand copysign from SIGN to the positive value ABS_VALUE
30836 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30839 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30841 enum machine_mode mode = GET_MODE (sign);
30842 rtx sgn = gen_reg_rtx (mode);
30843 if (mask == NULL_RTX)
30845 enum machine_mode vmode;
30847 if (mode == SFmode)
30849 else if (mode == DFmode)
30854 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
30855 if (!VECTOR_MODE_P (mode))
30857 /* We need to generate a scalar mode mask in this case. */
30858 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30859 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30860 mask = gen_reg_rtx (mode);
30861 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30865 mask = gen_rtx_NOT (mode, mask);
30866 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30867 gen_rtx_AND (mode, mask, sign)));
30868 emit_insn (gen_rtx_SET (VOIDmode, result,
30869 gen_rtx_IOR (mode, abs_value, sgn)));
30872 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30873 mask for masking out the sign-bit is stored in *SMASK, if that is
30876 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30878 enum machine_mode vmode, mode = GET_MODE (op0);
30881 xa = gen_reg_rtx (mode);
30882 if (mode == SFmode)
30884 else if (mode == DFmode)
30888 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
30889 if (!VECTOR_MODE_P (mode))
30891 /* We need to generate a scalar mode mask in this case. */
30892 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30893 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30894 mask = gen_reg_rtx (mode);
30895 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30897 emit_insn (gen_rtx_SET (VOIDmode, xa,
30898 gen_rtx_AND (mode, op0, mask)));
30906 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30907 swapping the operands if SWAP_OPERANDS is true. The expanded
30908 code is a forward jump to a newly created label in case the
30909 comparison is true. The generated label rtx is returned. */
30911 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30912 bool swap_operands)
30923 label = gen_label_rtx ();
30924 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30925 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30926 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30927 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30928 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30929 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30930 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30931 JUMP_LABEL (tmp) = label;
30936 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30937 using comparison code CODE. Operands are swapped for the comparison if
30938 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30940 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30941 bool swap_operands)
30943 enum machine_mode mode = GET_MODE (op0);
30944 rtx mask = gen_reg_rtx (mode);
30953 if (mode == DFmode)
30954 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30955 gen_rtx_fmt_ee (code, mode, op0, op1)));
30957 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30958 gen_rtx_fmt_ee (code, mode, op0, op1)));
30963 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30964 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30966 ix86_gen_TWO52 (enum machine_mode mode)
30968 REAL_VALUE_TYPE TWO52r;
30971 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30972 TWO52 = const_double_from_real_value (TWO52r, mode);
30973 TWO52 = force_reg (mode, TWO52);
30978 /* Expand SSE sequence for computing lround from OP1 storing
30981 ix86_expand_lround (rtx op0, rtx op1)
30983 /* C code for the stuff we're doing below:
30984 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30987 enum machine_mode mode = GET_MODE (op1);
30988 const struct real_format *fmt;
30989 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30992 /* load nextafter (0.5, 0.0) */
30993 fmt = REAL_MODE_FORMAT (mode);
30994 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30995 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30997 /* adj = copysign (0.5, op1) */
30998 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30999 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31001 /* adj = op1 + adj */
31002 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31004 /* op0 = (imode)adj */
31005 expand_fix (op0, adj, 0);
31008 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31011 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31013 /* C code for the stuff we're doing below (for do_floor):
31015 xi -= (double)xi > op1 ? 1 : 0;
31018 enum machine_mode fmode = GET_MODE (op1);
31019 enum machine_mode imode = GET_MODE (op0);
31020 rtx ireg, freg, label, tmp;
31022 /* reg = (long)op1 */
31023 ireg = gen_reg_rtx (imode);
31024 expand_fix (ireg, op1, 0);
31026 /* freg = (double)reg */
31027 freg = gen_reg_rtx (fmode);
31028 expand_float (freg, ireg, 0);
31030 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31031 label = ix86_expand_sse_compare_and_jump (UNLE,
31032 freg, op1, !do_floor);
31033 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31034 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31035 emit_move_insn (ireg, tmp);
31037 emit_label (label);
31038 LABEL_NUSES (label) = 1;
31040 emit_move_insn (op0, ireg);
31043 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31044 result in OPERAND0. */
31046 ix86_expand_rint (rtx operand0, rtx operand1)
31048 /* C code for the stuff we're doing below:
31049 xa = fabs (operand1);
31050 if (!isless (xa, 2**52))
31052 xa = xa + 2**52 - 2**52;
31053 return copysign (xa, operand1);
31055 enum machine_mode mode = GET_MODE (operand0);
31056 rtx res, xa, label, TWO52, mask;
31058 res = gen_reg_rtx (mode);
31059 emit_move_insn (res, operand1);
31061 /* xa = abs (operand1) */
31062 xa = ix86_expand_sse_fabs (res, &mask);
31064 /* if (!isless (xa, TWO52)) goto label; */
31065 TWO52 = ix86_gen_TWO52 (mode);
31066 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31068 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31069 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31071 ix86_sse_copysign_to_positive (res, xa, res, mask);
31073 emit_label (label);
31074 LABEL_NUSES (label) = 1;
31076 emit_move_insn (operand0, res);
31079 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31082 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31084 /* C code for the stuff we expand below.
31085 double xa = fabs (x), x2;
31086 if (!isless (xa, TWO52))
31088 xa = xa + TWO52 - TWO52;
31089 x2 = copysign (xa, x);
31098 enum machine_mode mode = GET_MODE (operand0);
31099 rtx xa, TWO52, tmp, label, one, res, mask;
31101 TWO52 = ix86_gen_TWO52 (mode);
31103 /* Temporary for holding the result, initialized to the input
31104 operand to ease control flow. */
31105 res = gen_reg_rtx (mode);
31106 emit_move_insn (res, operand1);
31108 /* xa = abs (operand1) */
31109 xa = ix86_expand_sse_fabs (res, &mask);
31111 /* if (!isless (xa, TWO52)) goto label; */
31112 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31114 /* xa = xa + TWO52 - TWO52; */
31115 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31116 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31118 /* xa = copysign (xa, operand1) */
31119 ix86_sse_copysign_to_positive (xa, xa, res, mask);
31121 /* generate 1.0 or -1.0 */
31122 one = force_reg (mode,
31123 const_double_from_real_value (do_floor
31124 ? dconst1 : dconstm1, mode));
31126 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31127 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31128 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31129 gen_rtx_AND (mode, one, tmp)));
31130 /* We always need to subtract here to preserve signed zero. */
31131 tmp = expand_simple_binop (mode, MINUS,
31132 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31133 emit_move_insn (res, tmp);
31135 emit_label (label);
31136 LABEL_NUSES (label) = 1;
31138 emit_move_insn (operand0, res);
31141 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31144 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
31146 /* C code for the stuff we expand below.
31147 double xa = fabs (x), x2;
31148 if (!isless (xa, TWO52))
31150 x2 = (double)(long)x;
31157 if (HONOR_SIGNED_ZEROS (mode))
31158 return copysign (x2, x);
31161 enum machine_mode mode = GET_MODE (operand0);
31162 rtx xa, xi, TWO52, tmp, label, one, res, mask;
31164 TWO52 = ix86_gen_TWO52 (mode);
31166 /* Temporary for holding the result, initialized to the input
31167 operand to ease control flow. */
31168 res = gen_reg_rtx (mode);
31169 emit_move_insn (res, operand1);
31171 /* xa = abs (operand1) */
31172 xa = ix86_expand_sse_fabs (res, &mask);
31174 /* if (!isless (xa, TWO52)) goto label; */
31175 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31177 /* xa = (double)(long)x */
31178 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31179 expand_fix (xi, res, 0);
31180 expand_float (xa, xi, 0);
31183 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31185 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31186 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31187 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31188 gen_rtx_AND (mode, one, tmp)));
31189 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
31190 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31191 emit_move_insn (res, tmp);
31193 if (HONOR_SIGNED_ZEROS (mode))
31194 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31196 emit_label (label);
31197 LABEL_NUSES (label) = 1;
31199 emit_move_insn (operand0, res);
31202 /* Expand SSE sequence for computing round from OPERAND1 storing
31203 into OPERAND0. Sequence that works without relying on DImode truncation
31204 via cvttsd2siq that is only available on 64bit targets. */
31206 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
31208 /* C code for the stuff we expand below.
31209 double xa = fabs (x), xa2, x2;
31210 if (!isless (xa, TWO52))
31212 Using the absolute value and copying back sign makes
31213 -0.0 -> -0.0 correct.
31214 xa2 = xa + TWO52 - TWO52;
31219 else if (dxa > 0.5)
31221 x2 = copysign (xa2, x);
31224 enum machine_mode mode = GET_MODE (operand0);
31225 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
31227 TWO52 = ix86_gen_TWO52 (mode);
31229 /* Temporary for holding the result, initialized to the input
31230 operand to ease control flow. */
31231 res = gen_reg_rtx (mode);
31232 emit_move_insn (res, operand1);
31234 /* xa = abs (operand1) */
31235 xa = ix86_expand_sse_fabs (res, &mask);
31237 /* if (!isless (xa, TWO52)) goto label; */
31238 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31240 /* xa2 = xa + TWO52 - TWO52; */
31241 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31242 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
31244 /* dxa = xa2 - xa; */
31245 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
31247 /* generate 0.5, 1.0 and -0.5 */
31248 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
31249 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
31250 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
31254 tmp = gen_reg_rtx (mode);
31255 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
31256 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
31257 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31258 gen_rtx_AND (mode, one, tmp)));
31259 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31260 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
31261 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
31262 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31263 gen_rtx_AND (mode, one, tmp)));
31264 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31266 /* res = copysign (xa2, operand1) */
31267 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
31269 emit_label (label);
31270 LABEL_NUSES (label) = 1;
31272 emit_move_insn (operand0, res);
31275 /* Expand SSE sequence for computing trunc from OPERAND1 storing
31278 ix86_expand_trunc (rtx operand0, rtx operand1)
31280 /* C code for SSE variant we expand below.
31281 double xa = fabs (x), x2;
31282 if (!isless (xa, TWO52))
31284 x2 = (double)(long)x;
31285 if (HONOR_SIGNED_ZEROS (mode))
31286 return copysign (x2, x);
31289 enum machine_mode mode = GET_MODE (operand0);
31290 rtx xa, xi, TWO52, label, res, mask;
31292 TWO52 = ix86_gen_TWO52 (mode);
31294 /* Temporary for holding the result, initialized to the input
31295 operand to ease control flow. */
31296 res = gen_reg_rtx (mode);
31297 emit_move_insn (res, operand1);
31299 /* xa = abs (operand1) */
31300 xa = ix86_expand_sse_fabs (res, &mask);
31302 /* if (!isless (xa, TWO52)) goto label; */
31303 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31305 /* x = (double)(long)x */
31306 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31307 expand_fix (xi, res, 0);
31308 expand_float (res, xi, 0);
31310 if (HONOR_SIGNED_ZEROS (mode))
31311 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31313 emit_label (label);
31314 LABEL_NUSES (label) = 1;
31316 emit_move_insn (operand0, res);
31319 /* Expand SSE sequence for computing trunc from OPERAND1 storing
31322 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
31324 enum machine_mode mode = GET_MODE (operand0);
31325 rtx xa, mask, TWO52, label, one, res, smask, tmp;
31327 /* C code for SSE variant we expand below.
31328 double xa = fabs (x), x2;
31329 if (!isless (xa, TWO52))
31331 xa2 = xa + TWO52 - TWO52;
31335 x2 = copysign (xa2, x);
31339 TWO52 = ix86_gen_TWO52 (mode);
31341 /* Temporary for holding the result, initialized to the input
31342 operand to ease control flow. */
31343 res = gen_reg_rtx (mode);
31344 emit_move_insn (res, operand1);
31346 /* xa = abs (operand1) */
31347 xa = ix86_expand_sse_fabs (res, &smask);
31349 /* if (!isless (xa, TWO52)) goto label; */
31350 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31352 /* res = xa + TWO52 - TWO52; */
31353 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31354 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
31355 emit_move_insn (res, tmp);
31358 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31360 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
31361 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
31362 emit_insn (gen_rtx_SET (VOIDmode, mask,
31363 gen_rtx_AND (mode, mask, one)));
31364 tmp = expand_simple_binop (mode, MINUS,
31365 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
31366 emit_move_insn (res, tmp);
31368 /* res = copysign (res, operand1) */
31369 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
31371 emit_label (label);
31372 LABEL_NUSES (label) = 1;
31374 emit_move_insn (operand0, res);
31377 /* Expand SSE sequence for computing round from OPERAND1 storing
31380 ix86_expand_round (rtx operand0, rtx operand1)
31382 /* C code for the stuff we're doing below:
31383 double xa = fabs (x);
31384 if (!isless (xa, TWO52))
31386 xa = (double)(long)(xa + nextafter (0.5, 0.0));
31387 return copysign (xa, x);
31389 enum machine_mode mode = GET_MODE (operand0);
31390 rtx res, TWO52, xa, label, xi, half, mask;
31391 const struct real_format *fmt;
31392 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31394 /* Temporary for holding the result, initialized to the input
31395 operand to ease control flow. */
31396 res = gen_reg_rtx (mode);
31397 emit_move_insn (res, operand1);
31399 TWO52 = ix86_gen_TWO52 (mode);
31400 xa = ix86_expand_sse_fabs (res, &mask);
31401 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31403 /* load nextafter (0.5, 0.0) */
31404 fmt = REAL_MODE_FORMAT (mode);
31405 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31406 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31408 /* xa = xa + 0.5 */
31409 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
31410 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
31412 /* xa = (double)(int64_t)xa */
31413 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31414 expand_fix (xi, xa, 0);
31415 expand_float (xa, xi, 0);
31417 /* res = copysign (xa, operand1) */
31418 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
31420 emit_label (label);
31421 LABEL_NUSES (label) = 1;
31423 emit_move_insn (operand0, res);
31427 /* Table of valid machine attributes. */
31428 static const struct attribute_spec ix86_attribute_table[] =
31430 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
31431 /* Stdcall attribute says callee is responsible for popping arguments
31432 if they are not variable. */
31433 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31434 /* Fastcall attribute says callee is responsible for popping arguments
31435 if they are not variable. */
31436 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31437 /* Thiscall attribute says callee is responsible for popping arguments
31438 if they are not variable. */
31439 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31440 /* Cdecl attribute says the callee is a normal C declaration */
31441 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31442 /* Regparm attribute specifies how many integer arguments are to be
31443 passed in registers. */
31444 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
31445 /* Sseregparm attribute says we are using x86_64 calling conventions
31446 for FP arguments. */
31447 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31448 /* force_align_arg_pointer says this function realigns the stack at entry. */
31449 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
31450 false, true, true, ix86_handle_cconv_attribute },
31451 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31452 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
31453 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
31454 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
31456 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
31457 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
31458 #ifdef SUBTARGET_ATTRIBUTE_TABLE
31459 SUBTARGET_ATTRIBUTE_TABLE,
31461 /* ms_abi and sysv_abi calling convention function attributes. */
31462 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
31463 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
31464 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
31466 { NULL, 0, 0, false, false, false, NULL }
31469 /* Implement targetm.vectorize.builtin_vectorization_cost. */
31471 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
31472 tree vectype ATTRIBUTE_UNUSED,
31473 int misalign ATTRIBUTE_UNUSED)
31475 switch (type_of_cost)
31478 return ix86_cost->scalar_stmt_cost;
31481 return ix86_cost->scalar_load_cost;
31484 return ix86_cost->scalar_store_cost;
31487 return ix86_cost->vec_stmt_cost;
31490 return ix86_cost->vec_align_load_cost;
31493 return ix86_cost->vec_store_cost;
31495 case vec_to_scalar:
31496 return ix86_cost->vec_to_scalar_cost;
31498 case scalar_to_vec:
31499 return ix86_cost->scalar_to_vec_cost;
31501 case unaligned_load:
31502 case unaligned_store:
31503 return ix86_cost->vec_unalign_load_cost;
31505 case cond_branch_taken:
31506 return ix86_cost->cond_taken_branch_cost;
31508 case cond_branch_not_taken:
31509 return ix86_cost->cond_not_taken_branch_cost;
31515 gcc_unreachable ();
31520 /* Implement targetm.vectorize.builtin_vec_perm. */
31523 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
31525 tree itype = TREE_TYPE (vec_type);
31526 bool u = TYPE_UNSIGNED (itype);
31527 enum machine_mode vmode = TYPE_MODE (vec_type);
31528 enum ix86_builtins fcode;
31529 bool ok = TARGET_SSE2;
31535 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
31538 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
31540 itype = ix86_get_builtin_type (IX86_BT_DI);
31545 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
31549 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
31551 itype = ix86_get_builtin_type (IX86_BT_SI);
31555 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
31558 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
31561 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
31564 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
31574 *mask_type = itype;
31575 return ix86_builtins[(int) fcode];
31578 /* Return a vector mode with twice as many elements as VMODE. */
31579 /* ??? Consider moving this to a table generated by genmodes.c. */
31581 static enum machine_mode
31582 doublesize_vector_mode (enum machine_mode vmode)
31586 case V2SFmode: return V4SFmode;
31587 case V1DImode: return V2DImode;
31588 case V2SImode: return V4SImode;
31589 case V4HImode: return V8HImode;
31590 case V8QImode: return V16QImode;
31592 case V2DFmode: return V4DFmode;
31593 case V4SFmode: return V8SFmode;
31594 case V2DImode: return V4DImode;
31595 case V4SImode: return V8SImode;
31596 case V8HImode: return V16HImode;
31597 case V16QImode: return V32QImode;
31599 case V4DFmode: return V8DFmode;
31600 case V8SFmode: return V16SFmode;
31601 case V4DImode: return V8DImode;
31602 case V8SImode: return V16SImode;
31603 case V16HImode: return V32HImode;
31604 case V32QImode: return V64QImode;
31607 gcc_unreachable ();
31611 /* Construct (set target (vec_select op0 (parallel perm))) and
31612 return true if that's a valid instruction in the active ISA. */
31615 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
31617 rtx rperm[MAX_VECT_LEN], x;
31620 for (i = 0; i < nelt; ++i)
31621 rperm[i] = GEN_INT (perm[i]);
31623 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
31624 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
31625 x = gen_rtx_SET (VOIDmode, target, x);
31628 if (recog_memoized (x) < 0)
31636 /* Similar, but generate a vec_concat from op0 and op1 as well. */
31639 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
31640 const unsigned char *perm, unsigned nelt)
31642 enum machine_mode v2mode;
31645 v2mode = doublesize_vector_mode (GET_MODE (op0));
31646 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
31647 return expand_vselect (target, x, perm, nelt);
31650 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31651 in terms of blendp[sd] / pblendw / pblendvb. */
31654 expand_vec_perm_blend (struct expand_vec_perm_d *d)
31656 enum machine_mode vmode = d->vmode;
31657 unsigned i, mask, nelt = d->nelt;
31658 rtx target, op0, op1, x;
31660 if (!TARGET_SSE4_1 || d->op0 == d->op1)
31662 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31665 /* This is a blend, not a permute. Elements must stay in their
31666 respective lanes. */
31667 for (i = 0; i < nelt; ++i)
31669 unsigned e = d->perm[i];
31670 if (!(e == i || e == i + nelt))
31677 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31678 decision should be extracted elsewhere, so that we only try that
31679 sequence once all budget==3 options have been tried. */
31681 /* For bytes, see if bytes move in pairs so we can use pblendw with
31682 an immediate argument, rather than pblendvb with a vector argument. */
31683 if (vmode == V16QImode)
31685 bool pblendw_ok = true;
31686 for (i = 0; i < 16 && pblendw_ok; i += 2)
31687 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31691 rtx rperm[16], vperm;
31693 for (i = 0; i < nelt; ++i)
31694 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31696 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31697 vperm = force_reg (V16QImode, vperm);
31699 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31704 target = d->target;
31716 for (i = 0; i < nelt; ++i)
31717 mask |= (d->perm[i] >= nelt) << i;
31721 for (i = 0; i < 2; ++i)
31722 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31726 for (i = 0; i < 4; ++i)
31727 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31731 for (i = 0; i < 8; ++i)
31732 mask |= (d->perm[i * 2] >= 16) << i;
31736 target = gen_lowpart (vmode, target);
31737 op0 = gen_lowpart (vmode, op0);
31738 op1 = gen_lowpart (vmode, op1);
31742 gcc_unreachable ();
31745 /* This matches five different patterns with the different modes. */
31746 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31747 x = gen_rtx_SET (VOIDmode, target, x);
31753 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31754 in terms of the variable form of vpermilps.
31756 Note that we will have already failed the immediate input vpermilps,
31757 which requires that the high and low part shuffle be identical; the
31758 variable form doesn't require that. */
31761 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31763 rtx rperm[8], vperm;
31766 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31769 /* We can only permute within the 128-bit lane. */
31770 for (i = 0; i < 8; ++i)
31772 unsigned e = d->perm[i];
31773 if (i < 4 ? e >= 4 : e < 4)
31780 for (i = 0; i < 8; ++i)
31782 unsigned e = d->perm[i];
31784 /* Within each 128-bit lane, the elements of op0 are numbered
31785 from 0 and the elements of op1 are numbered from 4. */
31791 rperm[i] = GEN_INT (e);
31794 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31795 vperm = force_reg (V8SImode, vperm);
31796 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31801 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31802 in terms of pshufb or vpperm. */
31805 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31807 unsigned i, nelt, eltsz;
31808 rtx rperm[16], vperm, target, op0, op1;
31810 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31812 if (GET_MODE_SIZE (d->vmode) != 16)
31819 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31821 for (i = 0; i < nelt; ++i)
31823 unsigned j, e = d->perm[i];
31824 for (j = 0; j < eltsz; ++j)
31825 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31828 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31829 vperm = force_reg (V16QImode, vperm);
31831 target = gen_lowpart (V16QImode, d->target);
31832 op0 = gen_lowpart (V16QImode, d->op0);
31833 if (d->op0 == d->op1)
31834 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31837 op1 = gen_lowpart (V16QImode, d->op1);
31838 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31844 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31845 in a single instruction. */
31848 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31850 unsigned i, nelt = d->nelt;
31851 unsigned char perm2[MAX_VECT_LEN];
31853 /* Check plain VEC_SELECT first, because AVX has instructions that could
31854 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31855 input where SEL+CONCAT may not. */
31856 if (d->op0 == d->op1)
31858 int mask = nelt - 1;
31860 for (i = 0; i < nelt; i++)
31861 perm2[i] = d->perm[i] & mask;
31863 if (expand_vselect (d->target, d->op0, perm2, nelt))
31866 /* There are plenty of patterns in sse.md that are written for
31867 SEL+CONCAT and are not replicated for a single op. Perhaps
31868 that should be changed, to avoid the nastiness here. */
31870 /* Recognize interleave style patterns, which means incrementing
31871 every other permutation operand. */
31872 for (i = 0; i < nelt; i += 2)
31874 perm2[i] = d->perm[i] & mask;
31875 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31877 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31880 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31883 for (i = 0; i < nelt; i += 4)
31885 perm2[i + 0] = d->perm[i + 0] & mask;
31886 perm2[i + 1] = d->perm[i + 1] & mask;
31887 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31888 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31891 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31896 /* Finally, try the fully general two operand permute. */
31897 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31900 /* Recognize interleave style patterns with reversed operands. */
31901 if (d->op0 != d->op1)
31903 for (i = 0; i < nelt; ++i)
31905 unsigned e = d->perm[i];
31913 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31917 /* Try the SSE4.1 blend variable merge instructions. */
31918 if (expand_vec_perm_blend (d))
31921 /* Try one of the AVX vpermil variable permutations. */
31922 if (expand_vec_perm_vpermil (d))
31925 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31926 if (expand_vec_perm_pshufb (d))
31932 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31933 in terms of a pair of pshuflw + pshufhw instructions. */
31936 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31938 unsigned char perm2[MAX_VECT_LEN];
31942 if (d->vmode != V8HImode || d->op0 != d->op1)
31945 /* The two permutations only operate in 64-bit lanes. */
31946 for (i = 0; i < 4; ++i)
31947 if (d->perm[i] >= 4)
31949 for (i = 4; i < 8; ++i)
31950 if (d->perm[i] < 4)
31956 /* Emit the pshuflw. */
31957 memcpy (perm2, d->perm, 4);
31958 for (i = 4; i < 8; ++i)
31960 ok = expand_vselect (d->target, d->op0, perm2, 8);
31963 /* Emit the pshufhw. */
31964 memcpy (perm2 + 4, d->perm + 4, 4);
31965 for (i = 0; i < 4; ++i)
31967 ok = expand_vselect (d->target, d->target, perm2, 8);
31973 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31974 the permutation using the SSSE3 palignr instruction. This succeeds
31975 when all of the elements in PERM fit within one vector and we merely
31976 need to shift them down so that a single vector permutation has a
31977 chance to succeed. */
31980 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31982 unsigned i, nelt = d->nelt;
31987 /* Even with AVX, palignr only operates on 128-bit vectors. */
31988 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31991 min = nelt, max = 0;
31992 for (i = 0; i < nelt; ++i)
31994 unsigned e = d->perm[i];
32000 if (min == 0 || max - min >= nelt)
32003 /* Given that we have SSSE3, we know we'll be able to implement the
32004 single operand permutation after the palignr with pshufb. */
32008 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32009 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32010 gen_lowpart (TImode, d->op1),
32011 gen_lowpart (TImode, d->op0), shift));
32013 d->op0 = d->op1 = d->target;
32016 for (i = 0; i < nelt; ++i)
32018 unsigned e = d->perm[i] - min;
32024 /* Test for the degenerate case where the alignment by itself
32025 produces the desired permutation. */
32029 ok = expand_vec_perm_1 (d);
32035 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32036 a two vector permutation into a single vector permutation by using
32037 an interleave operation to merge the vectors. */
32040 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32042 struct expand_vec_perm_d dremap, dfinal;
32043 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32044 unsigned contents, h1, h2, h3, h4;
32045 unsigned char remap[2 * MAX_VECT_LEN];
32049 if (d->op0 == d->op1)
32052 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32053 lanes. We can use similar techniques with the vperm2f128 instruction,
32054 but it requires slightly different logic. */
32055 if (GET_MODE_SIZE (d->vmode) != 16)
32058 /* Examine from whence the elements come. */
32060 for (i = 0; i < nelt; ++i)
32061 contents |= 1u << d->perm[i];
32063 /* Split the two input vectors into 4 halves. */
32064 h1 = (1u << nelt2) - 1;
32069 memset (remap, 0xff, sizeof (remap));
32072 /* If the elements from the low halves use interleave low, and similarly
32073 for interleave high. If the elements are from mis-matched halves, we
32074 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32075 if ((contents & (h1 | h3)) == contents)
32077 for (i = 0; i < nelt2; ++i)
32080 remap[i + nelt] = i * 2 + 1;
32081 dremap.perm[i * 2] = i;
32082 dremap.perm[i * 2 + 1] = i + nelt;
32085 else if ((contents & (h2 | h4)) == contents)
32087 for (i = 0; i < nelt2; ++i)
32089 remap[i + nelt2] = i * 2;
32090 remap[i + nelt + nelt2] = i * 2 + 1;
32091 dremap.perm[i * 2] = i + nelt2;
32092 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32095 else if ((contents & (h1 | h4)) == contents)
32097 for (i = 0; i < nelt2; ++i)
32100 remap[i + nelt + nelt2] = i + nelt2;
32101 dremap.perm[i] = i;
32102 dremap.perm[i + nelt2] = i + nelt + nelt2;
32106 dremap.vmode = V2DImode;
32108 dremap.perm[0] = 0;
32109 dremap.perm[1] = 3;
32112 else if ((contents & (h2 | h3)) == contents)
32114 for (i = 0; i < nelt2; ++i)
32116 remap[i + nelt2] = i;
32117 remap[i + nelt] = i + nelt2;
32118 dremap.perm[i] = i + nelt2;
32119 dremap.perm[i + nelt2] = i + nelt;
32123 dremap.vmode = V2DImode;
32125 dremap.perm[0] = 1;
32126 dremap.perm[1] = 2;
32132 /* Use the remapping array set up above to move the elements from their
32133 swizzled locations into their final destinations. */
32135 for (i = 0; i < nelt; ++i)
32137 unsigned e = remap[d->perm[i]];
32138 gcc_assert (e < nelt);
32139 dfinal.perm[i] = e;
32141 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
32142 dfinal.op1 = dfinal.op0;
32143 dremap.target = dfinal.op0;
32145 /* Test if the final remap can be done with a single insn. For V4SFmode or
32146 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
32148 ok = expand_vec_perm_1 (&dfinal);
32149 seq = get_insns ();
32155 if (dremap.vmode != dfinal.vmode)
32157 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
32158 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
32159 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
32162 ok = expand_vec_perm_1 (&dremap);
32169 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
32170 permutation with two pshufb insns and an ior. We should have already
32171 failed all two instruction sequences. */
32174 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
32176 rtx rperm[2][16], vperm, l, h, op, m128;
32177 unsigned int i, nelt, eltsz;
32179 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32181 gcc_assert (d->op0 != d->op1);
32184 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32186 /* Generate two permutation masks. If the required element is within
32187 the given vector it is shuffled into the proper lane. If the required
32188 element is in the other vector, force a zero into the lane by setting
32189 bit 7 in the permutation mask. */
32190 m128 = GEN_INT (-128);
32191 for (i = 0; i < nelt; ++i)
32193 unsigned j, e = d->perm[i];
32194 unsigned which = (e >= nelt);
32198 for (j = 0; j < eltsz; ++j)
32200 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
32201 rperm[1-which][i*eltsz + j] = m128;
32205 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
32206 vperm = force_reg (V16QImode, vperm);
32208 l = gen_reg_rtx (V16QImode);
32209 op = gen_lowpart (V16QImode, d->op0);
32210 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
32212 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
32213 vperm = force_reg (V16QImode, vperm);
32215 h = gen_reg_rtx (V16QImode);
32216 op = gen_lowpart (V16QImode, d->op1);
32217 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
32219 op = gen_lowpart (V16QImode, d->target);
32220 emit_insn (gen_iorv16qi3 (op, l, h));
32225 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
32226 and extract-odd permutations. */
32229 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
32236 t1 = gen_reg_rtx (V4DFmode);
32237 t2 = gen_reg_rtx (V4DFmode);
32239 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
32240 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
32241 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
32243 /* Now an unpck[lh]pd will produce the result required. */
32245 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
32247 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
32253 int mask = odd ? 0xdd : 0x88;
32255 t1 = gen_reg_rtx (V8SFmode);
32256 t2 = gen_reg_rtx (V8SFmode);
32257 t3 = gen_reg_rtx (V8SFmode);
32259 /* Shuffle within the 128-bit lanes to produce:
32260 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
32261 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
32264 /* Shuffle the lanes around to produce:
32265 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
32266 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
32269 /* Shuffle within the 128-bit lanes to produce:
32270 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
32271 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
32273 /* Shuffle within the 128-bit lanes to produce:
32274 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
32275 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
32277 /* Shuffle the lanes around to produce:
32278 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
32279 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
32288 /* These are always directly implementable by expand_vec_perm_1. */
32289 gcc_unreachable ();
32293 return expand_vec_perm_pshufb2 (d);
32296 /* We need 2*log2(N)-1 operations to achieve odd/even
32297 with interleave. */
32298 t1 = gen_reg_rtx (V8HImode);
32299 t2 = gen_reg_rtx (V8HImode);
32300 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
32301 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
32302 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
32303 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
32305 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
32307 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
32314 return expand_vec_perm_pshufb2 (d);
32317 t1 = gen_reg_rtx (V16QImode);
32318 t2 = gen_reg_rtx (V16QImode);
32319 t3 = gen_reg_rtx (V16QImode);
32320 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
32321 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
32322 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
32323 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
32324 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
32325 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
32327 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
32329 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
32335 gcc_unreachable ();
32341 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
32342 extract-even and extract-odd permutations. */
32345 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
32347 unsigned i, odd, nelt = d->nelt;
32350 if (odd != 0 && odd != 1)
32353 for (i = 1; i < nelt; ++i)
32354 if (d->perm[i] != 2 * i + odd)
32357 return expand_vec_perm_even_odd_1 (d, odd);
32360 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
32361 permutations. We assume that expand_vec_perm_1 has already failed. */
32364 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
32366 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
32367 enum machine_mode vmode = d->vmode;
32368 unsigned char perm2[4];
32376 /* These are special-cased in sse.md so that we can optionally
32377 use the vbroadcast instruction. They expand to two insns
32378 if the input happens to be in a register. */
32379 gcc_unreachable ();
32385 /* These are always implementable using standard shuffle patterns. */
32386 gcc_unreachable ();
32390 /* These can be implemented via interleave. We save one insn by
32391 stopping once we have promoted to V4SImode and then use pshufd. */
32394 optab otab = vec_interleave_low_optab;
32398 otab = vec_interleave_high_optab;
32403 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
32404 vmode = get_mode_wider_vector (vmode);
32405 op0 = gen_lowpart (vmode, op0);
32407 while (vmode != V4SImode);
32409 memset (perm2, elt, 4);
32410 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
32415 gcc_unreachable ();
32419 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
32420 broadcast permutations. */
32423 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
32425 unsigned i, elt, nelt = d->nelt;
32427 if (d->op0 != d->op1)
32431 for (i = 1; i < nelt; ++i)
32432 if (d->perm[i] != elt)
32435 return expand_vec_perm_broadcast_1 (d);
32438 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
32439 With all of the interface bits taken care of, perform the expansion
32440 in D and return true on success. */
32443 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
32445 /* Try a single instruction expansion. */
32446 if (expand_vec_perm_1 (d))
32449 /* Try sequences of two instructions. */
32451 if (expand_vec_perm_pshuflw_pshufhw (d))
32454 if (expand_vec_perm_palignr (d))
32457 if (expand_vec_perm_interleave2 (d))
32460 if (expand_vec_perm_broadcast (d))
32463 /* Try sequences of three instructions. */
32465 if (expand_vec_perm_pshufb2 (d))
32468 /* ??? Look for narrow permutations whose element orderings would
32469 allow the promotion to a wider mode. */
32471 /* ??? Look for sequences of interleave or a wider permute that place
32472 the data into the correct lanes for a half-vector shuffle like
32473 pshuf[lh]w or vpermilps. */
32475 /* ??? Look for sequences of interleave that produce the desired results.
32476 The combinatorics of punpck[lh] get pretty ugly... */
32478 if (expand_vec_perm_even_odd (d))
32484 /* Extract the values from the vector CST into the permutation array in D.
32485 Return 0 on error, 1 if all values from the permutation come from the
32486 first vector, 2 if all values from the second vector, and 3 otherwise. */
32489 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
32491 tree list = TREE_VECTOR_CST_ELTS (cst);
32492 unsigned i, nelt = d->nelt;
32495 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
32497 unsigned HOST_WIDE_INT e;
32499 if (!host_integerp (TREE_VALUE (list), 1))
32501 e = tree_low_cst (TREE_VALUE (list), 1);
32505 ret |= (e < nelt ? 1 : 2);
32508 gcc_assert (list == NULL);
32510 /* For all elements from second vector, fold the elements to first. */
32512 for (i = 0; i < nelt; ++i)
32513 d->perm[i] -= nelt;
32519 ix86_expand_vec_perm_builtin (tree exp)
32521 struct expand_vec_perm_d d;
32522 tree arg0, arg1, arg2;
32524 arg0 = CALL_EXPR_ARG (exp, 0);
32525 arg1 = CALL_EXPR_ARG (exp, 1);
32526 arg2 = CALL_EXPR_ARG (exp, 2);
32528 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
32529 d.nelt = GET_MODE_NUNITS (d.vmode);
32530 d.testing_p = false;
32531 gcc_assert (VECTOR_MODE_P (d.vmode));
32533 if (TREE_CODE (arg2) != VECTOR_CST)
32535 error_at (EXPR_LOCATION (exp),
32536 "vector permutation requires vector constant");
32540 switch (extract_vec_perm_cst (&d, arg2))
32546 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
32550 if (!operand_equal_p (arg0, arg1, 0))
32552 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32553 d.op0 = force_reg (d.vmode, d.op0);
32554 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32555 d.op1 = force_reg (d.vmode, d.op1);
32559 /* The elements of PERM do not suggest that only the first operand
32560 is used, but both operands are identical. Allow easier matching
32561 of the permutation by folding the permutation into the single
32564 unsigned i, nelt = d.nelt;
32565 for (i = 0; i < nelt; ++i)
32566 if (d.perm[i] >= nelt)
32572 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32573 d.op0 = force_reg (d.vmode, d.op0);
32578 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32579 d.op0 = force_reg (d.vmode, d.op0);
32584 d.target = gen_reg_rtx (d.vmode);
32585 if (ix86_expand_vec_perm_builtin_1 (&d))
32588 /* For compiler generated permutations, we should never got here, because
32589 the compiler should also be checking the ok hook. But since this is a
32590 builtin the user has access too, so don't abort. */
32594 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
32597 sorry ("vector permutation (%d %d %d %d)",
32598 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
32601 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
32602 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32603 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
32606 sorry ("vector permutation "
32607 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
32608 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32609 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
32610 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
32611 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
32614 gcc_unreachable ();
32617 return CONST0_RTX (d.vmode);
32620 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
32623 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
32625 struct expand_vec_perm_d d;
32629 d.vmode = TYPE_MODE (vec_type);
32630 d.nelt = GET_MODE_NUNITS (d.vmode);
32631 d.testing_p = true;
32633 /* Given sufficient ISA support we can just return true here
32634 for selected vector modes. */
32635 if (GET_MODE_SIZE (d.vmode) == 16)
32637 /* All implementable with a single vpperm insn. */
32640 /* All implementable with 2 pshufb + 1 ior. */
32643 /* All implementable with shufpd or unpck[lh]pd. */
32648 vec_mask = extract_vec_perm_cst (&d, mask);
32650 /* This hook is cannot be called in response to something that the
32651 user does (unlike the builtin expander) so we shouldn't ever see
32652 an error generated from the extract. */
32653 gcc_assert (vec_mask > 0 && vec_mask <= 3);
32654 one_vec = (vec_mask != 3);
32656 /* Implementable with shufps or pshufd. */
32657 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
32660 /* Otherwise we have to go through the motions and see if we can
32661 figure out how to generate the requested permutation. */
32662 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32663 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32665 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32668 ret = ix86_expand_vec_perm_builtin_1 (&d);
32675 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32677 struct expand_vec_perm_d d;
32683 d.vmode = GET_MODE (targ);
32684 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32685 d.testing_p = false;
32687 for (i = 0; i < nelt; ++i)
32688 d.perm[i] = i * 2 + odd;
32690 /* We'll either be able to implement the permutation directly... */
32691 if (expand_vec_perm_1 (&d))
32694 /* ... or we use the special-case patterns. */
32695 expand_vec_perm_even_odd_1 (&d, odd);
32698 /* This function returns the calling abi specific va_list type node.
32699 It returns the FNDECL specific va_list type. */
32702 ix86_fn_abi_va_list (tree fndecl)
32705 return va_list_type_node;
32706 gcc_assert (fndecl != NULL_TREE);
32708 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32709 return ms_va_list_type_node;
32711 return sysv_va_list_type_node;
32714 /* Returns the canonical va_list type specified by TYPE. If there
32715 is no valid TYPE provided, it return NULL_TREE. */
32718 ix86_canonical_va_list_type (tree type)
32722 /* Resolve references and pointers to va_list type. */
32723 if (TREE_CODE (type) == MEM_REF)
32724 type = TREE_TYPE (type);
32725 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32726 type = TREE_TYPE (type);
32727 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32728 type = TREE_TYPE (type);
32732 wtype = va_list_type_node;
32733 gcc_assert (wtype != NULL_TREE);
32735 if (TREE_CODE (wtype) == ARRAY_TYPE)
32737 /* If va_list is an array type, the argument may have decayed
32738 to a pointer type, e.g. by being passed to another function.
32739 In that case, unwrap both types so that we can compare the
32740 underlying records. */
32741 if (TREE_CODE (htype) == ARRAY_TYPE
32742 || POINTER_TYPE_P (htype))
32744 wtype = TREE_TYPE (wtype);
32745 htype = TREE_TYPE (htype);
32748 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32749 return va_list_type_node;
32750 wtype = sysv_va_list_type_node;
32751 gcc_assert (wtype != NULL_TREE);
32753 if (TREE_CODE (wtype) == ARRAY_TYPE)
32755 /* If va_list is an array type, the argument may have decayed
32756 to a pointer type, e.g. by being passed to another function.
32757 In that case, unwrap both types so that we can compare the
32758 underlying records. */
32759 if (TREE_CODE (htype) == ARRAY_TYPE
32760 || POINTER_TYPE_P (htype))
32762 wtype = TREE_TYPE (wtype);
32763 htype = TREE_TYPE (htype);
32766 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32767 return sysv_va_list_type_node;
32768 wtype = ms_va_list_type_node;
32769 gcc_assert (wtype != NULL_TREE);
32771 if (TREE_CODE (wtype) == ARRAY_TYPE)
32773 /* If va_list is an array type, the argument may have decayed
32774 to a pointer type, e.g. by being passed to another function.
32775 In that case, unwrap both types so that we can compare the
32776 underlying records. */
32777 if (TREE_CODE (htype) == ARRAY_TYPE
32778 || POINTER_TYPE_P (htype))
32780 wtype = TREE_TYPE (wtype);
32781 htype = TREE_TYPE (htype);
32784 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32785 return ms_va_list_type_node;
32788 return std_canonical_va_list_type (type);
32791 /* Iterate through the target-specific builtin types for va_list.
32792 IDX denotes the iterator, *PTREE is set to the result type of
32793 the va_list builtin, and *PNAME to its internal type.
32794 Returns zero if there is no element for this index, otherwise
32795 IDX should be increased upon the next call.
32796 Note, do not iterate a base builtin's name like __builtin_va_list.
32797 Used from c_common_nodes_and_builtins. */
32800 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32810 *ptree = ms_va_list_type_node;
32811 *pname = "__builtin_ms_va_list";
32815 *ptree = sysv_va_list_type_node;
32816 *pname = "__builtin_sysv_va_list";
32824 #undef TARGET_SCHED_DISPATCH
32825 #define TARGET_SCHED_DISPATCH has_dispatch
32826 #undef TARGET_SCHED_DISPATCH_DO
32827 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32829 /* The size of the dispatch window is the total number of bytes of
32830 object code allowed in a window. */
32831 #define DISPATCH_WINDOW_SIZE 16
32833 /* Number of dispatch windows considered for scheduling. */
32834 #define MAX_DISPATCH_WINDOWS 3
32836 /* Maximum number of instructions in a window. */
32839 /* Maximum number of immediate operands in a window. */
32842 /* Maximum number of immediate bits allowed in a window. */
32843 #define MAX_IMM_SIZE 128
32845 /* Maximum number of 32 bit immediates allowed in a window. */
32846 #define MAX_IMM_32 4
32848 /* Maximum number of 64 bit immediates allowed in a window. */
32849 #define MAX_IMM_64 2
32851 /* Maximum total of loads or prefetches allowed in a window. */
32854 /* Maximum total of stores allowed in a window. */
32855 #define MAX_STORE 1
32861 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32862 enum dispatch_group {
32877 /* Number of allowable groups in a dispatch window. It is an array
32878 indexed by dispatch_group enum. 100 is used as a big number,
32879 because the number of these kind of operations does not have any
32880 effect in dispatch window, but we need them for other reasons in
32882 static unsigned int num_allowable_groups[disp_last] = {
32883 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32886 char group_name[disp_last + 1][16] = {
32887 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32888 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32889 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32892 /* Instruction path. */
32895 path_single, /* Single micro op. */
32896 path_double, /* Double micro op. */
32897 path_multi, /* Instructions with more than 2 micro op.. */
32901 /* sched_insn_info defines a window to the instructions scheduled in
32902 the basic block. It contains a pointer to the insn_info table and
32903 the instruction scheduled.
32905 Windows are allocated for each basic block and are linked
32907 typedef struct sched_insn_info_s {
32909 enum dispatch_group group;
32910 enum insn_path path;
32915 /* Linked list of dispatch windows. This is a two way list of
32916 dispatch windows of a basic block. It contains information about
32917 the number of uops in the window and the total number of
32918 instructions and of bytes in the object code for this dispatch
32920 typedef struct dispatch_windows_s {
32921 int num_insn; /* Number of insn in the window. */
32922 int num_uops; /* Number of uops in the window. */
32923 int window_size; /* Number of bytes in the window. */
32924 int window_num; /* Window number between 0 or 1. */
32925 int num_imm; /* Number of immediates in an insn. */
32926 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32927 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32928 int imm_size; /* Total immediates in the window. */
32929 int num_loads; /* Total memory loads in the window. */
32930 int num_stores; /* Total memory stores in the window. */
32931 int violation; /* Violation exists in window. */
32932 sched_insn_info *window; /* Pointer to the window. */
32933 struct dispatch_windows_s *next;
32934 struct dispatch_windows_s *prev;
32935 } dispatch_windows;
32937 /* Immediate valuse used in an insn. */
32938 typedef struct imm_info_s
32945 static dispatch_windows *dispatch_window_list;
32946 static dispatch_windows *dispatch_window_list1;
32948 /* Get dispatch group of insn. */
32950 static enum dispatch_group
32951 get_mem_group (rtx insn)
32953 enum attr_memory memory;
32955 if (INSN_CODE (insn) < 0)
32956 return disp_no_group;
32957 memory = get_attr_memory (insn);
32958 if (memory == MEMORY_STORE)
32961 if (memory == MEMORY_LOAD)
32964 if (memory == MEMORY_BOTH)
32965 return disp_load_store;
32967 return disp_no_group;
32970 /* Return true if insn is a compare instruction. */
32975 enum attr_type type;
32977 type = get_attr_type (insn);
32978 return (type == TYPE_TEST
32979 || type == TYPE_ICMP
32980 || type == TYPE_FCMP
32981 || GET_CODE (PATTERN (insn)) == COMPARE);
32984 /* Return true if a dispatch violation encountered. */
32987 dispatch_violation (void)
32989 if (dispatch_window_list->next)
32990 return dispatch_window_list->next->violation;
32991 return dispatch_window_list->violation;
32994 /* Return true if insn is a branch instruction. */
32997 is_branch (rtx insn)
32999 return (CALL_P (insn) || JUMP_P (insn));
33002 /* Return true if insn is a prefetch instruction. */
33005 is_prefetch (rtx insn)
33007 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33010 /* This function initializes a dispatch window and the list container holding a
33011 pointer to the window. */
33014 init_window (int window_num)
33017 dispatch_windows *new_list;
33019 if (window_num == 0)
33020 new_list = dispatch_window_list;
33022 new_list = dispatch_window_list1;
33024 new_list->num_insn = 0;
33025 new_list->num_uops = 0;
33026 new_list->window_size = 0;
33027 new_list->next = NULL;
33028 new_list->prev = NULL;
33029 new_list->window_num = window_num;
33030 new_list->num_imm = 0;
33031 new_list->num_imm_32 = 0;
33032 new_list->num_imm_64 = 0;
33033 new_list->imm_size = 0;
33034 new_list->num_loads = 0;
33035 new_list->num_stores = 0;
33036 new_list->violation = false;
33038 for (i = 0; i < MAX_INSN; i++)
33040 new_list->window[i].insn = NULL;
33041 new_list->window[i].group = disp_no_group;
33042 new_list->window[i].path = no_path;
33043 new_list->window[i].byte_len = 0;
33044 new_list->window[i].imm_bytes = 0;
33049 /* This function allocates and initializes a dispatch window and the
33050 list container holding a pointer to the window. */
33052 static dispatch_windows *
33053 allocate_window (void)
33055 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33056 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33061 /* This routine initializes the dispatch scheduling information. It
33062 initiates building dispatch scheduler tables and constructs the
33063 first dispatch window. */
33066 init_dispatch_sched (void)
33068 /* Allocate a dispatch list and a window. */
33069 dispatch_window_list = allocate_window ();
33070 dispatch_window_list1 = allocate_window ();
33075 /* This function returns true if a branch is detected. End of a basic block
33076 does not have to be a branch, but here we assume only branches end a
33080 is_end_basic_block (enum dispatch_group group)
33082 return group == disp_branch;
33085 /* This function is called when the end of a window processing is reached. */
33088 process_end_window (void)
33090 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
33091 if (dispatch_window_list->next)
33093 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
33094 gcc_assert (dispatch_window_list->window_size
33095 + dispatch_window_list1->window_size <= 48);
33101 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
33102 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
33103 for 48 bytes of instructions. Note that these windows are not dispatch
33104 windows that their sizes are DISPATCH_WINDOW_SIZE. */
33106 static dispatch_windows *
33107 allocate_next_window (int window_num)
33109 if (window_num == 0)
33111 if (dispatch_window_list->next)
33114 return dispatch_window_list;
33117 dispatch_window_list->next = dispatch_window_list1;
33118 dispatch_window_list1->prev = dispatch_window_list;
33120 return dispatch_window_list1;
33123 /* Increment the number of immediate operands of an instruction. */
33126 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
33131 switch ( GET_CODE (*in_rtx))
33136 (imm_values->imm)++;
33137 if (x86_64_immediate_operand (*in_rtx, SImode))
33138 (imm_values->imm32)++;
33140 (imm_values->imm64)++;
33144 (imm_values->imm)++;
33145 (imm_values->imm64)++;
33149 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
33151 (imm_values->imm)++;
33152 (imm_values->imm32)++;
33163 /* Compute number of immediate operands of an instruction. */
33166 find_constant (rtx in_rtx, imm_info *imm_values)
33168 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
33169 (rtx_function) find_constant_1, (void *) imm_values);
33172 /* Return total size of immediate operands of an instruction along with number
33173 of corresponding immediate-operands. It initializes its parameters to zero
33174 befor calling FIND_CONSTANT.
33175 INSN is the input instruction. IMM is the total of immediates.
33176 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
33180 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
33182 imm_info imm_values = {0, 0, 0};
33184 find_constant (insn, &imm_values);
33185 *imm = imm_values.imm;
33186 *imm32 = imm_values.imm32;
33187 *imm64 = imm_values.imm64;
33188 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
33191 /* This function indicates if an operand of an instruction is an
33195 has_immediate (rtx insn)
33197 int num_imm_operand;
33198 int num_imm32_operand;
33199 int num_imm64_operand;
33202 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33203 &num_imm64_operand);
33207 /* Return single or double path for instructions. */
33209 static enum insn_path
33210 get_insn_path (rtx insn)
33212 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
33214 if ((int)path == 0)
33215 return path_single;
33217 if ((int)path == 1)
33218 return path_double;
33223 /* Return insn dispatch group. */
33225 static enum dispatch_group
33226 get_insn_group (rtx insn)
33228 enum dispatch_group group = get_mem_group (insn);
33232 if (is_branch (insn))
33233 return disp_branch;
33238 if (has_immediate (insn))
33241 if (is_prefetch (insn))
33242 return disp_prefetch;
33244 return disp_no_group;
33247 /* Count number of GROUP restricted instructions in a dispatch
33248 window WINDOW_LIST. */
33251 count_num_restricted (rtx insn, dispatch_windows *window_list)
33253 enum dispatch_group group = get_insn_group (insn);
33255 int num_imm_operand;
33256 int num_imm32_operand;
33257 int num_imm64_operand;
33259 if (group == disp_no_group)
33262 if (group == disp_imm)
33264 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33265 &num_imm64_operand);
33266 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
33267 || num_imm_operand + window_list->num_imm > MAX_IMM
33268 || (num_imm32_operand > 0
33269 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
33270 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
33271 || (num_imm64_operand > 0
33272 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
33273 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
33274 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
33275 && num_imm64_operand > 0
33276 && ((window_list->num_imm_64 > 0
33277 && window_list->num_insn >= 2)
33278 || window_list->num_insn >= 3)))
33284 if ((group == disp_load_store
33285 && (window_list->num_loads >= MAX_LOAD
33286 || window_list->num_stores >= MAX_STORE))
33287 || ((group == disp_load
33288 || group == disp_prefetch)
33289 && window_list->num_loads >= MAX_LOAD)
33290 || (group == disp_store
33291 && window_list->num_stores >= MAX_STORE))
33297 /* This function returns true if insn satisfies dispatch rules on the
33298 last window scheduled. */
33301 fits_dispatch_window (rtx insn)
33303 dispatch_windows *window_list = dispatch_window_list;
33304 dispatch_windows *window_list_next = dispatch_window_list->next;
33305 unsigned int num_restrict;
33306 enum dispatch_group group = get_insn_group (insn);
33307 enum insn_path path = get_insn_path (insn);
33310 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
33311 instructions should be given the lowest priority in the
33312 scheduling process in Haifa scheduler to make sure they will be
33313 scheduled in the same dispatch window as the refrence to them. */
33314 if (group == disp_jcc || group == disp_cmp)
33317 /* Check nonrestricted. */
33318 if (group == disp_no_group || group == disp_branch)
33321 /* Get last dispatch window. */
33322 if (window_list_next)
33323 window_list = window_list_next;
33325 if (window_list->window_num == 1)
33327 sum = window_list->prev->window_size + window_list->window_size;
33330 || (min_insn_size (insn) + sum) >= 48)
33331 /* Window 1 is full. Go for next window. */
33335 num_restrict = count_num_restricted (insn, window_list);
33337 if (num_restrict > num_allowable_groups[group])
33340 /* See if it fits in the first window. */
33341 if (window_list->window_num == 0)
33343 /* The first widow should have only single and double path
33345 if (path == path_double
33346 && (window_list->num_uops + 2) > MAX_INSN)
33348 else if (path != path_single)
33354 /* Add an instruction INSN with NUM_UOPS micro-operations to the
33355 dispatch window WINDOW_LIST. */
33358 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
33360 int byte_len = min_insn_size (insn);
33361 int num_insn = window_list->num_insn;
33363 sched_insn_info *window = window_list->window;
33364 enum dispatch_group group = get_insn_group (insn);
33365 enum insn_path path = get_insn_path (insn);
33366 int num_imm_operand;
33367 int num_imm32_operand;
33368 int num_imm64_operand;
33370 if (!window_list->violation && group != disp_cmp
33371 && !fits_dispatch_window (insn))
33372 window_list->violation = true;
33374 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33375 &num_imm64_operand);
33377 /* Initialize window with new instruction. */
33378 window[num_insn].insn = insn;
33379 window[num_insn].byte_len = byte_len;
33380 window[num_insn].group = group;
33381 window[num_insn].path = path;
33382 window[num_insn].imm_bytes = imm_size;
33384 window_list->window_size += byte_len;
33385 window_list->num_insn = num_insn + 1;
33386 window_list->num_uops = window_list->num_uops + num_uops;
33387 window_list->imm_size += imm_size;
33388 window_list->num_imm += num_imm_operand;
33389 window_list->num_imm_32 += num_imm32_operand;
33390 window_list->num_imm_64 += num_imm64_operand;
33392 if (group == disp_store)
33393 window_list->num_stores += 1;
33394 else if (group == disp_load
33395 || group == disp_prefetch)
33396 window_list->num_loads += 1;
33397 else if (group == disp_load_store)
33399 window_list->num_stores += 1;
33400 window_list->num_loads += 1;
33404 /* Adds a scheduled instruction, INSN, to the current dispatch window.
33405 If the total bytes of instructions or the number of instructions in
33406 the window exceed allowable, it allocates a new window. */
33409 add_to_dispatch_window (rtx insn)
33412 dispatch_windows *window_list;
33413 dispatch_windows *next_list;
33414 dispatch_windows *window0_list;
33415 enum insn_path path;
33416 enum dispatch_group insn_group;
33424 if (INSN_CODE (insn) < 0)
33427 byte_len = min_insn_size (insn);
33428 window_list = dispatch_window_list;
33429 next_list = window_list->next;
33430 path = get_insn_path (insn);
33431 insn_group = get_insn_group (insn);
33433 /* Get the last dispatch window. */
33435 window_list = dispatch_window_list->next;
33437 if (path == path_single)
33439 else if (path == path_double)
33442 insn_num_uops = (int) path;
33444 /* If current window is full, get a new window.
33445 Window number zero is full, if MAX_INSN uops are scheduled in it.
33446 Window number one is full, if window zero's bytes plus window
33447 one's bytes is 32, or if the bytes of the new instruction added
33448 to the total makes it greater than 48, or it has already MAX_INSN
33449 instructions in it. */
33450 num_insn = window_list->num_insn;
33451 num_uops = window_list->num_uops;
33452 window_num = window_list->window_num;
33453 insn_fits = fits_dispatch_window (insn);
33455 if (num_insn >= MAX_INSN
33456 || num_uops + insn_num_uops > MAX_INSN
33459 window_num = ~window_num & 1;
33460 window_list = allocate_next_window (window_num);
33463 if (window_num == 0)
33465 add_insn_window (insn, window_list, insn_num_uops);
33466 if (window_list->num_insn >= MAX_INSN
33467 && insn_group == disp_branch)
33469 process_end_window ();
33473 else if (window_num == 1)
33475 window0_list = window_list->prev;
33476 sum = window0_list->window_size + window_list->window_size;
33478 || (byte_len + sum) >= 48)
33480 process_end_window ();
33481 window_list = dispatch_window_list;
33484 add_insn_window (insn, window_list, insn_num_uops);
33487 gcc_unreachable ();
33489 if (is_end_basic_block (insn_group))
33491 /* End of basic block is reached do end-basic-block process. */
33492 process_end_window ();
33497 /* Print the dispatch window, WINDOW_NUM, to FILE. */
33499 DEBUG_FUNCTION static void
33500 debug_dispatch_window_file (FILE *file, int window_num)
33502 dispatch_windows *list;
33505 if (window_num == 0)
33506 list = dispatch_window_list;
33508 list = dispatch_window_list1;
33510 fprintf (file, "Window #%d:\n", list->window_num);
33511 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
33512 list->num_insn, list->num_uops, list->window_size);
33513 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
33514 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
33516 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
33518 fprintf (file, " insn info:\n");
33520 for (i = 0; i < MAX_INSN; i++)
33522 if (!list->window[i].insn)
33524 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
33525 i, group_name[list->window[i].group],
33526 i, (void *)list->window[i].insn,
33527 i, list->window[i].path,
33528 i, list->window[i].byte_len,
33529 i, list->window[i].imm_bytes);
33533 /* Print to stdout a dispatch window. */
33535 DEBUG_FUNCTION void
33536 debug_dispatch_window (int window_num)
33538 debug_dispatch_window_file (stdout, window_num);
33541 /* Print INSN dispatch information to FILE. */
33543 DEBUG_FUNCTION static void
33544 debug_insn_dispatch_info_file (FILE *file, rtx insn)
33547 enum insn_path path;
33548 enum dispatch_group group;
33550 int num_imm_operand;
33551 int num_imm32_operand;
33552 int num_imm64_operand;
33554 if (INSN_CODE (insn) < 0)
33557 byte_len = min_insn_size (insn);
33558 path = get_insn_path (insn);
33559 group = get_insn_group (insn);
33560 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33561 &num_imm64_operand);
33563 fprintf (file, " insn info:\n");
33564 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
33565 group_name[group], path, byte_len);
33566 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
33567 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
33570 /* Print to STDERR the status of the ready list with respect to
33571 dispatch windows. */
33573 DEBUG_FUNCTION void
33574 debug_ready_dispatch (void)
33577 int no_ready = number_in_ready ();
33579 fprintf (stdout, "Number of ready: %d\n", no_ready);
33581 for (i = 0; i < no_ready; i++)
33582 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
33585 /* This routine is the driver of the dispatch scheduler. */
33588 do_dispatch (rtx insn, int mode)
33590 if (mode == DISPATCH_INIT)
33591 init_dispatch_sched ();
33592 else if (mode == ADD_TO_DISPATCH_WINDOW)
33593 add_to_dispatch_window (insn);
33596 /* Return TRUE if Dispatch Scheduling is supported. */
33599 has_dispatch (rtx insn, int action)
33601 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
33607 case IS_DISPATCH_ON:
33612 return is_cmp (insn);
33614 case DISPATCH_VIOLATION:
33615 return dispatch_violation ();
33617 case FITS_DISPATCH_WINDOW:
33618 return fits_dispatch_window (insn);
33624 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
33625 place emms and femms instructions. */
33627 static enum machine_mode
33628 ix86_preferred_simd_mode (enum machine_mode mode)
33630 /* Disable double precision vectorizer if needed. */
33631 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
33634 if (!TARGET_AVX && !TARGET_SSE)
33640 return TARGET_AVX ? V8SFmode : V4SFmode;
33642 return TARGET_AVX ? V4DFmode : V2DFmode;
33658 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
33661 static unsigned int
33662 ix86_autovectorize_vector_sizes (void)
33664 return TARGET_AVX ? 32 | 16 : 0;
33667 /* Initialize the GCC target structure. */
33668 #undef TARGET_RETURN_IN_MEMORY
33669 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33671 #undef TARGET_LEGITIMIZE_ADDRESS
33672 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33674 #undef TARGET_ATTRIBUTE_TABLE
33675 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33676 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33677 # undef TARGET_MERGE_DECL_ATTRIBUTES
33678 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33681 #undef TARGET_COMP_TYPE_ATTRIBUTES
33682 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33684 #undef TARGET_INIT_BUILTINS
33685 #define TARGET_INIT_BUILTINS ix86_init_builtins
33686 #undef TARGET_BUILTIN_DECL
33687 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33688 #undef TARGET_EXPAND_BUILTIN
33689 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33693 ix86_builtin_vectorized_function
33695 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33696 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33698 #undef TARGET_BUILTIN_RECIPROCAL
33699 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33701 #undef TARGET_ASM_FUNCTION_EPILOGUE
33702 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33704 #undef TARGET_ENCODE_SECTION_INFO
33705 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33706 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33708 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33711 #undef TARGET_ASM_OPEN_PAREN
33712 #define TARGET_ASM_OPEN_PAREN ""
33713 #undef TARGET_ASM_CLOSE_PAREN
33714 #define TARGET_ASM_CLOSE_PAREN ""
33716 #undef TARGET_ASM_BYTE_OP
33717 #define TARGET_ASM_BYTE_OP ASM_BYTE
33719 #undef TARGET_ASM_ALIGNED_HI_OP
33720 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33721 #undef TARGET_ASM_ALIGNED_SI_OP
33722 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33724 #undef TARGET_ASM_ALIGNED_DI_OP
33725 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33728 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33729 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33731 #undef TARGET_ASM_UNALIGNED_HI_OP
33732 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33733 #undef TARGET_ASM_UNALIGNED_SI_OP
33734 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33735 #undef TARGET_ASM_UNALIGNED_DI_OP
33736 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33738 #undef TARGET_PRINT_OPERAND
33739 #define TARGET_PRINT_OPERAND ix86_print_operand
33740 #undef TARGET_PRINT_OPERAND_ADDRESS
33741 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33742 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33743 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33744 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33745 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33747 #undef TARGET_SCHED_ADJUST_COST
33748 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33749 #undef TARGET_SCHED_ISSUE_RATE
33750 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33751 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33752 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33753 ia32_multipass_dfa_lookahead
33755 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33756 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33759 #undef TARGET_HAVE_TLS
33760 #define TARGET_HAVE_TLS true
33762 #undef TARGET_CANNOT_FORCE_CONST_MEM
33763 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33764 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33765 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33767 #undef TARGET_DELEGITIMIZE_ADDRESS
33768 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33770 #undef TARGET_MS_BITFIELD_LAYOUT_P
33771 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33774 #undef TARGET_BINDS_LOCAL_P
33775 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33777 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33778 #undef TARGET_BINDS_LOCAL_P
33779 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33782 #undef TARGET_ASM_OUTPUT_MI_THUNK
33783 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33784 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33785 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33787 #undef TARGET_ASM_FILE_START
33788 #define TARGET_ASM_FILE_START x86_file_start
33790 #undef TARGET_DEFAULT_TARGET_FLAGS
33791 #define TARGET_DEFAULT_TARGET_FLAGS \
33793 | TARGET_SUBTARGET_DEFAULT \
33794 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33797 #undef TARGET_HANDLE_OPTION
33798 #define TARGET_HANDLE_OPTION ix86_handle_option
33800 #undef TARGET_OPTION_OVERRIDE
33801 #define TARGET_OPTION_OVERRIDE ix86_option_override
33802 #undef TARGET_OPTION_OPTIMIZATION_TABLE
33803 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
33804 #undef TARGET_OPTION_INIT_STRUCT
33805 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
33807 #undef TARGET_REGISTER_MOVE_COST
33808 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33809 #undef TARGET_MEMORY_MOVE_COST
33810 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33811 #undef TARGET_RTX_COSTS
33812 #define TARGET_RTX_COSTS ix86_rtx_costs
33813 #undef TARGET_ADDRESS_COST
33814 #define TARGET_ADDRESS_COST ix86_address_cost
33816 #undef TARGET_FIXED_CONDITION_CODE_REGS
33817 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33818 #undef TARGET_CC_MODES_COMPATIBLE
33819 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33821 #undef TARGET_MACHINE_DEPENDENT_REORG
33822 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33824 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33825 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33827 #undef TARGET_BUILD_BUILTIN_VA_LIST
33828 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33830 #undef TARGET_ENUM_VA_LIST_P
33831 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33833 #undef TARGET_FN_ABI_VA_LIST
33834 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33836 #undef TARGET_CANONICAL_VA_LIST_TYPE
33837 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33839 #undef TARGET_EXPAND_BUILTIN_VA_START
33840 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33842 #undef TARGET_MD_ASM_CLOBBERS
33843 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33845 #undef TARGET_PROMOTE_PROTOTYPES
33846 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33847 #undef TARGET_STRUCT_VALUE_RTX
33848 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33849 #undef TARGET_SETUP_INCOMING_VARARGS
33850 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33851 #undef TARGET_MUST_PASS_IN_STACK
33852 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33853 #undef TARGET_FUNCTION_ARG_ADVANCE
33854 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33855 #undef TARGET_FUNCTION_ARG
33856 #define TARGET_FUNCTION_ARG ix86_function_arg
33857 #undef TARGET_PASS_BY_REFERENCE
33858 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33859 #undef TARGET_INTERNAL_ARG_POINTER
33860 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33861 #undef TARGET_UPDATE_STACK_BOUNDARY
33862 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33863 #undef TARGET_GET_DRAP_RTX
33864 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33865 #undef TARGET_STRICT_ARGUMENT_NAMING
33866 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33867 #undef TARGET_STATIC_CHAIN
33868 #define TARGET_STATIC_CHAIN ix86_static_chain
33869 #undef TARGET_TRAMPOLINE_INIT
33870 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33871 #undef TARGET_RETURN_POPS_ARGS
33872 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33874 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33875 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33877 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33878 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33880 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33881 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33883 #undef TARGET_C_MODE_FOR_SUFFIX
33884 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33887 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33888 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33891 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33892 #undef TARGET_INSERT_ATTRIBUTES
33893 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33896 #undef TARGET_MANGLE_TYPE
33897 #define TARGET_MANGLE_TYPE ix86_mangle_type
33899 #undef TARGET_STACK_PROTECT_FAIL
33900 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33902 #undef TARGET_SUPPORTS_SPLIT_STACK
33903 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33905 #undef TARGET_FUNCTION_VALUE
33906 #define TARGET_FUNCTION_VALUE ix86_function_value
33908 #undef TARGET_FUNCTION_VALUE_REGNO_P
33909 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33911 #undef TARGET_SECONDARY_RELOAD
33912 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33914 #undef TARGET_PREFERRED_RELOAD_CLASS
33915 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
33916 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
33917 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
33918 #undef TARGET_CLASS_LIKELY_SPILLED_P
33919 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33921 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33922 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33923 ix86_builtin_vectorization_cost
33924 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33925 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33926 ix86_vectorize_builtin_vec_perm
33927 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33928 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33929 ix86_vectorize_builtin_vec_perm_ok
33930 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
33931 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
33932 ix86_preferred_simd_mode
33933 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
33934 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
33935 ix86_autovectorize_vector_sizes
33937 #undef TARGET_SET_CURRENT_FUNCTION
33938 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33940 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33941 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33943 #undef TARGET_OPTION_SAVE
33944 #define TARGET_OPTION_SAVE ix86_function_specific_save
33946 #undef TARGET_OPTION_RESTORE
33947 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33949 #undef TARGET_OPTION_PRINT
33950 #define TARGET_OPTION_PRINT ix86_function_specific_print
33952 #undef TARGET_CAN_INLINE_P
33953 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33955 #undef TARGET_EXPAND_TO_RTL_HOOK
33956 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33958 #undef TARGET_LEGITIMATE_ADDRESS_P
33959 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33961 #undef TARGET_IRA_COVER_CLASSES
33962 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33964 #undef TARGET_FRAME_POINTER_REQUIRED
33965 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33967 #undef TARGET_CAN_ELIMINATE
33968 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33970 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33971 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33973 #undef TARGET_ASM_CODE_END
33974 #define TARGET_ASM_CODE_END ix86_code_end
33976 struct gcc_target targetm = TARGET_INITIALIZER;
33978 #include "gt-i386.h"