1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 typedef struct block_info_def
62 /* TRUE if the upper 128bits of any AVX registers are live at exit. */
63 bool upper_128bits_set;
64 /* TRUE if block has been processed. */
68 #define BLOCK_INFO(B) ((block_info) (B)->aux)
70 enum call_avx256_state
72 /* Callee returns 256bit AVX register. */
73 callee_return_avx256 = -1,
74 /* Callee returns and passes 256bit AVX register. */
75 callee_return_pass_avx256,
76 /* Callee passes 256bit AVX register. */
78 /* Callee doesn't return nor passe 256bit AVX register, or no
79 256bit AVX register in function return. */
81 /* vzeroupper intrinsic. */
85 /* Check if a 256bit AVX register is referenced in stores. */
88 check_avx256_stores (rtx dest, const_rtx set, void *data)
91 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
92 || (GET_CODE (set) == SET
93 && REG_P (SET_SRC (set))
94 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
96 bool *upper_128bits_set = (bool *) data;
97 *upper_128bits_set = true;
101 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
102 in basic block BB. Delete it if upper 128bit AVX registers are
103 unused. If it isn't deleted, move it to just before a jump insn.
105 UPPER_128BITS_LIVE is TRUE if the upper 128bits of any AVX registers
106 are live at entry. */
109 move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
111 rtx curr_insn, next_insn, prev_insn, insn;
114 fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
115 bb->index, upper_128bits_set);
117 for (curr_insn = BB_HEAD (bb);
118 curr_insn && curr_insn != NEXT_INSN (BB_END (bb));
119 curr_insn = next_insn)
123 next_insn = NEXT_INSN (curr_insn);
125 if (!NONDEBUG_INSN_P (curr_insn))
128 /* Search for vzeroupper. */
129 insn = PATTERN (curr_insn);
130 if (GET_CODE (insn) == UNSPEC_VOLATILE
131 && XINT (insn, 1) == UNSPECV_VZEROUPPER)
133 /* Found vzeroupper. */
136 fprintf (dump_file, "Found vzeroupper:\n");
137 print_rtl_single (dump_file, curr_insn);
142 /* Check vzeroall intrinsic. */
143 if (GET_CODE (insn) == PARALLEL
144 && GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE
145 && XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL)
146 upper_128bits_set = false;
147 else if (!upper_128bits_set)
149 /* Check if upper 128bits of AVX registers are used. */
150 note_stores (insn, check_avx256_stores,
156 avx256 = INTVAL (XVECEXP (insn, 0, 0));
158 if (!upper_128bits_set)
160 /* Since the upper 128bits are cleared, callee must not pass
161 256bit AVX register. We only need to check if callee
162 returns 256bit AVX register. */
163 upper_128bits_set = avx256 == callee_return_avx256;
165 /* Remove unnecessary vzeroupper since upper 128bits are
169 fprintf (dump_file, "Delete redundant vzeroupper:\n");
170 print_rtl_single (dump_file, curr_insn);
172 delete_insn (curr_insn);
175 else if (avx256 == callee_return_pass_avx256
176 || avx256 == callee_pass_avx256)
178 /* Callee passes 256bit AVX register. Check if callee
179 returns 256bit AVX register. */
180 upper_128bits_set = avx256 == callee_return_pass_avx256;
182 /* Must remove vzeroupper since callee passes 256bit AVX
186 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
187 print_rtl_single (dump_file, curr_insn);
189 delete_insn (curr_insn);
193 /* Find the jump after vzeroupper. */
194 prev_insn = curr_insn;
195 if (avx256 == vzeroupper_intrinsic)
197 /* For vzeroupper intrinsic, check if there is another
199 insn = NEXT_INSN (curr_insn);
202 if (NONJUMP_INSN_P (insn)
203 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
204 && XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER)
209 "Delete redundant vzeroupper intrinsic:\n");
210 print_rtl_single (dump_file, curr_insn);
212 delete_insn (curr_insn);
217 if (JUMP_P (insn) || CALL_P (insn))
220 insn = NEXT_INSN (insn);
221 if (insn == NEXT_INSN (BB_END (bb)))
225 /* Continue if redundant vzeroupper intrinsic is deleted. */
231 /* Find the next jump/call. */
232 insn = NEXT_INSN (curr_insn);
235 if (JUMP_P (insn) || CALL_P (insn))
238 insn = NEXT_INSN (insn);
239 if (insn == NEXT_INSN (BB_END (bb)))
247 /* Keep vzeroupper. */
248 upper_128bits_set = false;
250 /* Also allow label as the next instruction. */
251 if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn))
254 /* Move vzeroupper before jump/call if neeeded. */
255 if (curr_insn != prev_insn)
257 reorder_insns_nobb (curr_insn, curr_insn, prev_insn);
260 fprintf (dump_file, "Move vzeroupper after:\n");
261 print_rtl_single (dump_file, prev_insn);
262 fprintf (dump_file, "before:\n");
263 print_rtl_single (dump_file, insn);
267 next_insn = NEXT_INSN (insn);
270 BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
273 fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
274 bb->index, upper_128bits_set);
277 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
278 in BLOCK and its predecessor blocks recursively. */
281 move_or_delete_vzeroupper_1 (basic_block block)
285 bool upper_128bits_set;
288 fprintf (dump_file, " Process BB [%i]: status: %d\n",
289 block->index, BLOCK_INFO (block)->done);
291 if (BLOCK_INFO (block)->done)
294 BLOCK_INFO (block)->done = true;
296 upper_128bits_set = false;
298 /* Process all predecessor edges of this block. */
299 FOR_EACH_EDGE (e, ei, block->preds)
303 move_or_delete_vzeroupper_1 (e->src);
304 if (BLOCK_INFO (e->src)->upper_128bits_set)
305 upper_128bits_set = true;
308 /* Process this block. */
309 move_or_delete_vzeroupper_2 (block, upper_128bits_set);
312 /* Go through the instruction stream looking for vzeroupper. Delete
313 it if upper 128bit AVX registers are unused. If it isn't deleted,
314 move it to just before a jump insn. */
317 move_or_delete_vzeroupper (void)
322 /* Set up block info for each basic block. */
323 alloc_aux_for_blocks (sizeof (struct block_info_def));
325 /* Process successor blocks of all entry points. */
327 fprintf (dump_file, "Process all entry points\n");
329 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
331 move_or_delete_vzeroupper_2 (e->dest,
332 cfun->machine->caller_pass_avx256_p);
333 BLOCK_INFO (e->dest)->done = true;
336 /* Process predecessor blocks of all exit points. */
338 fprintf (dump_file, "Process all exit points\n");
340 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
341 move_or_delete_vzeroupper_1 (e->src);
343 free_aux_for_blocks ();
346 static rtx legitimize_dllimport_symbol (rtx, bool);
348 #ifndef CHECK_STACK_LIMIT
349 #define CHECK_STACK_LIMIT (-1)
352 /* Return index of given mode in mult and division cost tables. */
353 #define MODE_INDEX(mode) \
354 ((mode) == QImode ? 0 \
355 : (mode) == HImode ? 1 \
356 : (mode) == SImode ? 2 \
357 : (mode) == DImode ? 3 \
360 /* Processor costs (relative to an add) */
361 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
362 #define COSTS_N_BYTES(N) ((N) * 2)
364 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
367 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
368 COSTS_N_BYTES (2), /* cost of an add instruction */
369 COSTS_N_BYTES (3), /* cost of a lea instruction */
370 COSTS_N_BYTES (2), /* variable shift costs */
371 COSTS_N_BYTES (3), /* constant shift costs */
372 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
373 COSTS_N_BYTES (3), /* HI */
374 COSTS_N_BYTES (3), /* SI */
375 COSTS_N_BYTES (3), /* DI */
376 COSTS_N_BYTES (5)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
379 COSTS_N_BYTES (3), /* HI */
380 COSTS_N_BYTES (3), /* SI */
381 COSTS_N_BYTES (3), /* DI */
382 COSTS_N_BYTES (5)}, /* other */
383 COSTS_N_BYTES (3), /* cost of movsx */
384 COSTS_N_BYTES (3), /* cost of movzx */
385 0, /* "large" insn */
387 2, /* cost for loading QImode using movzbl */
388 {2, 2, 2}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {2, 2, 2}, /* cost of storing integer registers */
392 2, /* cost of reg,reg fld/fst */
393 {2, 2, 2}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {2, 2, 2}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397 3, /* cost of moving MMX register */
398 {3, 3}, /* cost of loading MMX registers
399 in SImode and DImode */
400 {3, 3}, /* cost of storing MMX registers
401 in SImode and DImode */
402 3, /* cost of moving SSE register */
403 {3, 3, 3}, /* cost of loading SSE registers
404 in SImode, DImode and TImode */
405 {3, 3, 3}, /* cost of storing SSE registers
406 in SImode, DImode and TImode */
407 3, /* MMX or SSE register to integer */
408 0, /* size of l1 cache */
409 0, /* size of l2 cache */
410 0, /* size of prefetch block */
411 0, /* number of parallel prefetches */
413 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
414 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
415 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
416 COSTS_N_BYTES (2), /* cost of FABS instruction. */
417 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
418 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
419 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
420 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
421 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
422 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
423 1, /* scalar_stmt_cost. */
424 1, /* scalar load_cost. */
425 1, /* scalar_store_cost. */
426 1, /* vec_stmt_cost. */
427 1, /* vec_to_scalar_cost. */
428 1, /* scalar_to_vec_cost. */
429 1, /* vec_align_load_cost. */
430 1, /* vec_unalign_load_cost. */
431 1, /* vec_store_cost. */
432 1, /* cond_taken_branch_cost. */
433 1, /* cond_not_taken_branch_cost. */
436 /* Processor costs (relative to an add) */
438 struct processor_costs i386_cost = { /* 386 specific costs */
439 COSTS_N_INSNS (1), /* cost of an add instruction */
440 COSTS_N_INSNS (1), /* cost of a lea instruction */
441 COSTS_N_INSNS (3), /* variable shift costs */
442 COSTS_N_INSNS (2), /* constant shift costs */
443 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
444 COSTS_N_INSNS (6), /* HI */
445 COSTS_N_INSNS (6), /* SI */
446 COSTS_N_INSNS (6), /* DI */
447 COSTS_N_INSNS (6)}, /* other */
448 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
449 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
450 COSTS_N_INSNS (23), /* HI */
451 COSTS_N_INSNS (23), /* SI */
452 COSTS_N_INSNS (23), /* DI */
453 COSTS_N_INSNS (23)}, /* other */
454 COSTS_N_INSNS (3), /* cost of movsx */
455 COSTS_N_INSNS (2), /* cost of movzx */
456 15, /* "large" insn */
458 4, /* cost for loading QImode using movzbl */
459 {2, 4, 2}, /* cost of loading integer registers
460 in QImode, HImode and SImode.
461 Relative to reg-reg move (2). */
462 {2, 4, 2}, /* cost of storing integer registers */
463 2, /* cost of reg,reg fld/fst */
464 {8, 8, 8}, /* cost of loading fp registers
465 in SFmode, DFmode and XFmode */
466 {8, 8, 8}, /* cost of storing fp registers
467 in SFmode, DFmode and XFmode */
468 2, /* cost of moving MMX register */
469 {4, 8}, /* cost of loading MMX registers
470 in SImode and DImode */
471 {4, 8}, /* cost of storing MMX registers
472 in SImode and DImode */
473 2, /* cost of moving SSE register */
474 {4, 8, 16}, /* cost of loading SSE registers
475 in SImode, DImode and TImode */
476 {4, 8, 16}, /* cost of storing SSE registers
477 in SImode, DImode and TImode */
478 3, /* MMX or SSE register to integer */
479 0, /* size of l1 cache */
480 0, /* size of l2 cache */
481 0, /* size of prefetch block */
482 0, /* number of parallel prefetches */
484 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
485 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
486 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
487 COSTS_N_INSNS (22), /* cost of FABS instruction. */
488 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
489 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
490 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
491 DUMMY_STRINGOP_ALGS},
492 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
493 DUMMY_STRINGOP_ALGS},
494 1, /* scalar_stmt_cost. */
495 1, /* scalar load_cost. */
496 1, /* scalar_store_cost. */
497 1, /* vec_stmt_cost. */
498 1, /* vec_to_scalar_cost. */
499 1, /* scalar_to_vec_cost. */
500 1, /* vec_align_load_cost. */
501 2, /* vec_unalign_load_cost. */
502 1, /* vec_store_cost. */
503 3, /* cond_taken_branch_cost. */
504 1, /* cond_not_taken_branch_cost. */
508 struct processor_costs i486_cost = { /* 486 specific costs */
509 COSTS_N_INSNS (1), /* cost of an add instruction */
510 COSTS_N_INSNS (1), /* cost of a lea instruction */
511 COSTS_N_INSNS (3), /* variable shift costs */
512 COSTS_N_INSNS (2), /* constant shift costs */
513 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
514 COSTS_N_INSNS (12), /* HI */
515 COSTS_N_INSNS (12), /* SI */
516 COSTS_N_INSNS (12), /* DI */
517 COSTS_N_INSNS (12)}, /* other */
518 1, /* cost of multiply per each bit set */
519 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
520 COSTS_N_INSNS (40), /* HI */
521 COSTS_N_INSNS (40), /* SI */
522 COSTS_N_INSNS (40), /* DI */
523 COSTS_N_INSNS (40)}, /* other */
524 COSTS_N_INSNS (3), /* cost of movsx */
525 COSTS_N_INSNS (2), /* cost of movzx */
526 15, /* "large" insn */
528 4, /* cost for loading QImode using movzbl */
529 {2, 4, 2}, /* cost of loading integer registers
530 in QImode, HImode and SImode.
531 Relative to reg-reg move (2). */
532 {2, 4, 2}, /* cost of storing integer registers */
533 2, /* cost of reg,reg fld/fst */
534 {8, 8, 8}, /* cost of loading fp registers
535 in SFmode, DFmode and XFmode */
536 {8, 8, 8}, /* cost of storing fp registers
537 in SFmode, DFmode and XFmode */
538 2, /* cost of moving MMX register */
539 {4, 8}, /* cost of loading MMX registers
540 in SImode and DImode */
541 {4, 8}, /* cost of storing MMX registers
542 in SImode and DImode */
543 2, /* cost of moving SSE register */
544 {4, 8, 16}, /* cost of loading SSE registers
545 in SImode, DImode and TImode */
546 {4, 8, 16}, /* cost of storing SSE registers
547 in SImode, DImode and TImode */
548 3, /* MMX or SSE register to integer */
549 4, /* size of l1 cache. 486 has 8kB cache
550 shared for code and data, so 4kB is
551 not really precise. */
552 4, /* size of l2 cache */
553 0, /* size of prefetch block */
554 0, /* number of parallel prefetches */
556 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
557 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
558 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
559 COSTS_N_INSNS (3), /* cost of FABS instruction. */
560 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
561 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
562 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
563 DUMMY_STRINGOP_ALGS},
564 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
565 DUMMY_STRINGOP_ALGS},
566 1, /* scalar_stmt_cost. */
567 1, /* scalar load_cost. */
568 1, /* scalar_store_cost. */
569 1, /* vec_stmt_cost. */
570 1, /* vec_to_scalar_cost. */
571 1, /* scalar_to_vec_cost. */
572 1, /* vec_align_load_cost. */
573 2, /* vec_unalign_load_cost. */
574 1, /* vec_store_cost. */
575 3, /* cond_taken_branch_cost. */
576 1, /* cond_not_taken_branch_cost. */
580 struct processor_costs pentium_cost = {
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (1), /* cost of a lea instruction */
583 COSTS_N_INSNS (4), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (11), /* HI */
587 COSTS_N_INSNS (11), /* SI */
588 COSTS_N_INSNS (11), /* DI */
589 COSTS_N_INSNS (11)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (25), /* HI */
593 COSTS_N_INSNS (25), /* SI */
594 COSTS_N_INSNS (25), /* DI */
595 COSTS_N_INSNS (25)}, /* other */
596 COSTS_N_INSNS (3), /* cost of movsx */
597 COSTS_N_INSNS (2), /* cost of movzx */
598 8, /* "large" insn */
600 6, /* cost for loading QImode using movzbl */
601 {2, 4, 2}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {2, 4, 2}, /* cost of storing integer registers */
605 2, /* cost of reg,reg fld/fst */
606 {2, 2, 6}, /* cost of loading fp registers
607 in SFmode, DFmode and XFmode */
608 {4, 4, 6}, /* cost of storing fp registers
609 in SFmode, DFmode and XFmode */
610 8, /* cost of moving MMX register */
611 {8, 8}, /* cost of loading MMX registers
612 in SImode and DImode */
613 {8, 8}, /* cost of storing MMX registers
614 in SImode and DImode */
615 2, /* cost of moving SSE register */
616 {4, 8, 16}, /* cost of loading SSE registers
617 in SImode, DImode and TImode */
618 {4, 8, 16}, /* cost of storing SSE registers
619 in SImode, DImode and TImode */
620 3, /* MMX or SSE register to integer */
621 8, /* size of l1 cache. */
622 8, /* size of l2 cache */
623 0, /* size of prefetch block */
624 0, /* number of parallel prefetches */
626 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (1), /* cost of FABS instruction. */
630 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
632 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
633 DUMMY_STRINGOP_ALGS},
634 {{libcall, {{-1, rep_prefix_4_byte}}},
635 DUMMY_STRINGOP_ALGS},
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
650 struct processor_costs pentiumpro_cost = {
651 COSTS_N_INSNS (1), /* cost of an add instruction */
652 COSTS_N_INSNS (1), /* cost of a lea instruction */
653 COSTS_N_INSNS (1), /* variable shift costs */
654 COSTS_N_INSNS (1), /* constant shift costs */
655 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
656 COSTS_N_INSNS (4), /* HI */
657 COSTS_N_INSNS (4), /* SI */
658 COSTS_N_INSNS (4), /* DI */
659 COSTS_N_INSNS (4)}, /* other */
660 0, /* cost of multiply per each bit set */
661 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
662 COSTS_N_INSNS (17), /* HI */
663 COSTS_N_INSNS (17), /* SI */
664 COSTS_N_INSNS (17), /* DI */
665 COSTS_N_INSNS (17)}, /* other */
666 COSTS_N_INSNS (1), /* cost of movsx */
667 COSTS_N_INSNS (1), /* cost of movzx */
668 8, /* "large" insn */
670 2, /* cost for loading QImode using movzbl */
671 {4, 4, 4}, /* cost of loading integer registers
672 in QImode, HImode and SImode.
673 Relative to reg-reg move (2). */
674 {2, 2, 2}, /* cost of storing integer registers */
675 2, /* cost of reg,reg fld/fst */
676 {2, 2, 6}, /* cost of loading fp registers
677 in SFmode, DFmode and XFmode */
678 {4, 4, 6}, /* cost of storing fp registers
679 in SFmode, DFmode and XFmode */
680 2, /* cost of moving MMX register */
681 {2, 2}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {2, 2}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {2, 2, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {2, 2, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 3, /* MMX or SSE register to integer */
691 8, /* size of l1 cache. */
692 256, /* size of l2 cache */
693 32, /* size of prefetch block */
694 6, /* number of parallel prefetches */
696 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
697 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
698 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
699 COSTS_N_INSNS (2), /* cost of FABS instruction. */
700 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
701 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
702 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
703 (we ensure the alignment). For small blocks inline loop is still a
704 noticeable win, for bigger blocks either rep movsl or rep movsb is
705 way to go. Rep movsb has apparently more expensive startup time in CPU,
706 but after 4K the difference is down in the noise. */
707 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
708 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
709 DUMMY_STRINGOP_ALGS},
710 {{rep_prefix_4_byte, {{1024, unrolled_loop},
711 {8192, rep_prefix_4_byte}, {-1, libcall}}},
712 DUMMY_STRINGOP_ALGS},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs geode_cost = {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (2), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (4), /* HI */
734 COSTS_N_INSNS (7), /* SI */
735 COSTS_N_INSNS (7), /* DI */
736 COSTS_N_INSNS (7)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (23), /* HI */
740 COSTS_N_INSNS (39), /* SI */
741 COSTS_N_INSNS (39), /* DI */
742 COSTS_N_INSNS (39)}, /* other */
743 COSTS_N_INSNS (1), /* cost of movsx */
744 COSTS_N_INSNS (1), /* cost of movzx */
745 8, /* "large" insn */
747 1, /* cost for loading QImode using movzbl */
748 {1, 1, 1}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {1, 1, 1}, /* cost of storing integer registers */
752 1, /* cost of reg,reg fld/fst */
753 {1, 1, 1}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 6, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
758 1, /* cost of moving MMX register */
759 {1, 1}, /* cost of loading MMX registers
760 in SImode and DImode */
761 {1, 1}, /* cost of storing MMX registers
762 in SImode and DImode */
763 1, /* cost of moving SSE register */
764 {1, 1, 1}, /* cost of loading SSE registers
765 in SImode, DImode and TImode */
766 {1, 1, 1}, /* cost of storing SSE registers
767 in SImode, DImode and TImode */
768 1, /* MMX or SSE register to integer */
769 64, /* size of l1 cache. */
770 128, /* size of l2 cache. */
771 32, /* size of prefetch block */
772 1, /* number of parallel prefetches */
774 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
775 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
776 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
777 COSTS_N_INSNS (1), /* cost of FABS instruction. */
778 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
779 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
780 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
781 DUMMY_STRINGOP_ALGS},
782 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
783 DUMMY_STRINGOP_ALGS},
784 1, /* scalar_stmt_cost. */
785 1, /* scalar load_cost. */
786 1, /* scalar_store_cost. */
787 1, /* vec_stmt_cost. */
788 1, /* vec_to_scalar_cost. */
789 1, /* scalar_to_vec_cost. */
790 1, /* vec_align_load_cost. */
791 2, /* vec_unalign_load_cost. */
792 1, /* vec_store_cost. */
793 3, /* cond_taken_branch_cost. */
794 1, /* cond_not_taken_branch_cost. */
798 struct processor_costs k6_cost = {
799 COSTS_N_INSNS (1), /* cost of an add instruction */
800 COSTS_N_INSNS (2), /* cost of a lea instruction */
801 COSTS_N_INSNS (1), /* variable shift costs */
802 COSTS_N_INSNS (1), /* constant shift costs */
803 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
804 COSTS_N_INSNS (3), /* HI */
805 COSTS_N_INSNS (3), /* SI */
806 COSTS_N_INSNS (3), /* DI */
807 COSTS_N_INSNS (3)}, /* other */
808 0, /* cost of multiply per each bit set */
809 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
810 COSTS_N_INSNS (18), /* HI */
811 COSTS_N_INSNS (18), /* SI */
812 COSTS_N_INSNS (18), /* DI */
813 COSTS_N_INSNS (18)}, /* other */
814 COSTS_N_INSNS (2), /* cost of movsx */
815 COSTS_N_INSNS (2), /* cost of movzx */
816 8, /* "large" insn */
818 3, /* cost for loading QImode using movzbl */
819 {4, 5, 4}, /* cost of loading integer registers
820 in QImode, HImode and SImode.
821 Relative to reg-reg move (2). */
822 {2, 3, 2}, /* cost of storing integer registers */
823 4, /* cost of reg,reg fld/fst */
824 {6, 6, 6}, /* cost of loading fp registers
825 in SFmode, DFmode and XFmode */
826 {4, 4, 4}, /* cost of storing fp registers
827 in SFmode, DFmode and XFmode */
828 2, /* cost of moving MMX register */
829 {2, 2}, /* cost of loading MMX registers
830 in SImode and DImode */
831 {2, 2}, /* cost of storing MMX registers
832 in SImode and DImode */
833 2, /* cost of moving SSE register */
834 {2, 2, 8}, /* cost of loading SSE registers
835 in SImode, DImode and TImode */
836 {2, 2, 8}, /* cost of storing SSE registers
837 in SImode, DImode and TImode */
838 6, /* MMX or SSE register to integer */
839 32, /* size of l1 cache. */
840 32, /* size of l2 cache. Some models
841 have integrated l2 cache, but
842 optimizing for k6 is not important
843 enough to worry about that. */
844 32, /* size of prefetch block */
845 1, /* number of parallel prefetches */
847 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
848 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
849 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
850 COSTS_N_INSNS (2), /* cost of FABS instruction. */
851 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
852 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
853 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
854 DUMMY_STRINGOP_ALGS},
855 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
856 DUMMY_STRINGOP_ALGS},
857 1, /* scalar_stmt_cost. */
858 1, /* scalar load_cost. */
859 1, /* scalar_store_cost. */
860 1, /* vec_stmt_cost. */
861 1, /* vec_to_scalar_cost. */
862 1, /* scalar_to_vec_cost. */
863 1, /* vec_align_load_cost. */
864 2, /* vec_unalign_load_cost. */
865 1, /* vec_store_cost. */
866 3, /* cond_taken_branch_cost. */
867 1, /* cond_not_taken_branch_cost. */
871 struct processor_costs athlon_cost = {
872 COSTS_N_INSNS (1), /* cost of an add instruction */
873 COSTS_N_INSNS (2), /* cost of a lea instruction */
874 COSTS_N_INSNS (1), /* variable shift costs */
875 COSTS_N_INSNS (1), /* constant shift costs */
876 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
877 COSTS_N_INSNS (5), /* HI */
878 COSTS_N_INSNS (5), /* SI */
879 COSTS_N_INSNS (5), /* DI */
880 COSTS_N_INSNS (5)}, /* other */
881 0, /* cost of multiply per each bit set */
882 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
883 COSTS_N_INSNS (26), /* HI */
884 COSTS_N_INSNS (42), /* SI */
885 COSTS_N_INSNS (74), /* DI */
886 COSTS_N_INSNS (74)}, /* other */
887 COSTS_N_INSNS (1), /* cost of movsx */
888 COSTS_N_INSNS (1), /* cost of movzx */
889 8, /* "large" insn */
891 4, /* cost for loading QImode using movzbl */
892 {3, 4, 3}, /* cost of loading integer registers
893 in QImode, HImode and SImode.
894 Relative to reg-reg move (2). */
895 {3, 4, 3}, /* cost of storing integer registers */
896 4, /* cost of reg,reg fld/fst */
897 {4, 4, 12}, /* cost of loading fp registers
898 in SFmode, DFmode and XFmode */
899 {6, 6, 8}, /* cost of storing fp registers
900 in SFmode, DFmode and XFmode */
901 2, /* cost of moving MMX register */
902 {4, 4}, /* cost of loading MMX registers
903 in SImode and DImode */
904 {4, 4}, /* cost of storing MMX registers
905 in SImode and DImode */
906 2, /* cost of moving SSE register */
907 {4, 4, 6}, /* cost of loading SSE registers
908 in SImode, DImode and TImode */
909 {4, 4, 5}, /* cost of storing SSE registers
910 in SImode, DImode and TImode */
911 5, /* MMX or SSE register to integer */
912 64, /* size of l1 cache. */
913 256, /* size of l2 cache. */
914 64, /* size of prefetch block */
915 6, /* number of parallel prefetches */
917 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
918 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
919 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
920 COSTS_N_INSNS (2), /* cost of FABS instruction. */
921 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
922 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
923 /* For some reason, Athlon deals better with REP prefix (relative to loops)
924 compared to K8. Alignment becomes important after 8 bytes for memcpy and
925 128 bytes for memset. */
926 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
927 DUMMY_STRINGOP_ALGS},
928 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
929 DUMMY_STRINGOP_ALGS},
930 1, /* scalar_stmt_cost. */
931 1, /* scalar load_cost. */
932 1, /* scalar_store_cost. */
933 1, /* vec_stmt_cost. */
934 1, /* vec_to_scalar_cost. */
935 1, /* scalar_to_vec_cost. */
936 1, /* vec_align_load_cost. */
937 2, /* vec_unalign_load_cost. */
938 1, /* vec_store_cost. */
939 3, /* cond_taken_branch_cost. */
940 1, /* cond_not_taken_branch_cost. */
944 struct processor_costs k8_cost = {
945 COSTS_N_INSNS (1), /* cost of an add instruction */
946 COSTS_N_INSNS (2), /* cost of a lea instruction */
947 COSTS_N_INSNS (1), /* variable shift costs */
948 COSTS_N_INSNS (1), /* constant shift costs */
949 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
950 COSTS_N_INSNS (4), /* HI */
951 COSTS_N_INSNS (3), /* SI */
952 COSTS_N_INSNS (4), /* DI */
953 COSTS_N_INSNS (5)}, /* other */
954 0, /* cost of multiply per each bit set */
955 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
956 COSTS_N_INSNS (26), /* HI */
957 COSTS_N_INSNS (42), /* SI */
958 COSTS_N_INSNS (74), /* DI */
959 COSTS_N_INSNS (74)}, /* other */
960 COSTS_N_INSNS (1), /* cost of movsx */
961 COSTS_N_INSNS (1), /* cost of movzx */
962 8, /* "large" insn */
964 4, /* cost for loading QImode using movzbl */
965 {3, 4, 3}, /* cost of loading integer registers
966 in QImode, HImode and SImode.
967 Relative to reg-reg move (2). */
968 {3, 4, 3}, /* cost of storing integer registers */
969 4, /* cost of reg,reg fld/fst */
970 {4, 4, 12}, /* cost of loading fp registers
971 in SFmode, DFmode and XFmode */
972 {6, 6, 8}, /* cost of storing fp registers
973 in SFmode, DFmode and XFmode */
974 2, /* cost of moving MMX register */
975 {3, 3}, /* cost of loading MMX registers
976 in SImode and DImode */
977 {4, 4}, /* cost of storing MMX registers
978 in SImode and DImode */
979 2, /* cost of moving SSE register */
980 {4, 3, 6}, /* cost of loading SSE registers
981 in SImode, DImode and TImode */
982 {4, 4, 5}, /* cost of storing SSE registers
983 in SImode, DImode and TImode */
984 5, /* MMX or SSE register to integer */
985 64, /* size of l1 cache. */
986 512, /* size of l2 cache. */
987 64, /* size of prefetch block */
988 /* New AMD processors never drop prefetches; if they cannot be performed
989 immediately, they are queued. We set number of simultaneous prefetches
990 to a large constant to reflect this (it probably is not a good idea not
991 to limit number of prefetches at all, as their execution also takes some
993 100, /* number of parallel prefetches */
995 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
996 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
997 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
998 COSTS_N_INSNS (2), /* cost of FABS instruction. */
999 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1000 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1001 /* K8 has optimized REP instruction for medium sized blocks, but for very
1002 small blocks it is better to use loop. For large blocks, libcall can
1003 do nontemporary accesses and beat inline considerably. */
1004 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1005 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1006 {{libcall, {{8, loop}, {24, unrolled_loop},
1007 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1008 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1009 4, /* scalar_stmt_cost. */
1010 2, /* scalar load_cost. */
1011 2, /* scalar_store_cost. */
1012 5, /* vec_stmt_cost. */
1013 0, /* vec_to_scalar_cost. */
1014 2, /* scalar_to_vec_cost. */
1015 2, /* vec_align_load_cost. */
1016 3, /* vec_unalign_load_cost. */
1017 3, /* vec_store_cost. */
1018 3, /* cond_taken_branch_cost. */
1019 2, /* cond_not_taken_branch_cost. */
1022 struct processor_costs amdfam10_cost = {
1023 COSTS_N_INSNS (1), /* cost of an add instruction */
1024 COSTS_N_INSNS (2), /* cost of a lea instruction */
1025 COSTS_N_INSNS (1), /* variable shift costs */
1026 COSTS_N_INSNS (1), /* constant shift costs */
1027 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1028 COSTS_N_INSNS (4), /* HI */
1029 COSTS_N_INSNS (3), /* SI */
1030 COSTS_N_INSNS (4), /* DI */
1031 COSTS_N_INSNS (5)}, /* other */
1032 0, /* cost of multiply per each bit set */
1033 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1034 COSTS_N_INSNS (35), /* HI */
1035 COSTS_N_INSNS (51), /* SI */
1036 COSTS_N_INSNS (83), /* DI */
1037 COSTS_N_INSNS (83)}, /* other */
1038 COSTS_N_INSNS (1), /* cost of movsx */
1039 COSTS_N_INSNS (1), /* cost of movzx */
1040 8, /* "large" insn */
1042 4, /* cost for loading QImode using movzbl */
1043 {3, 4, 3}, /* cost of loading integer registers
1044 in QImode, HImode and SImode.
1045 Relative to reg-reg move (2). */
1046 {3, 4, 3}, /* cost of storing integer registers */
1047 4, /* cost of reg,reg fld/fst */
1048 {4, 4, 12}, /* cost of loading fp registers
1049 in SFmode, DFmode and XFmode */
1050 {6, 6, 8}, /* cost of storing fp registers
1051 in SFmode, DFmode and XFmode */
1052 2, /* cost of moving MMX register */
1053 {3, 3}, /* cost of loading MMX registers
1054 in SImode and DImode */
1055 {4, 4}, /* cost of storing MMX registers
1056 in SImode and DImode */
1057 2, /* cost of moving SSE register */
1058 {4, 4, 3}, /* cost of loading SSE registers
1059 in SImode, DImode and TImode */
1060 {4, 4, 5}, /* cost of storing SSE registers
1061 in SImode, DImode and TImode */
1062 3, /* MMX or SSE register to integer */
1064 MOVD reg64, xmmreg Double FSTORE 4
1065 MOVD reg32, xmmreg Double FSTORE 4
1067 MOVD reg64, xmmreg Double FADD 3
1069 MOVD reg32, xmmreg Double FADD 3
1071 64, /* size of l1 cache. */
1072 512, /* size of l2 cache. */
1073 64, /* size of prefetch block */
1074 /* New AMD processors never drop prefetches; if they cannot be performed
1075 immediately, they are queued. We set number of simultaneous prefetches
1076 to a large constant to reflect this (it probably is not a good idea not
1077 to limit number of prefetches at all, as their execution also takes some
1079 100, /* number of parallel prefetches */
1080 2, /* Branch cost */
1081 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1082 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1083 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1084 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1085 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1086 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1088 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1089 very small blocks it is better to use loop. For large blocks, libcall can
1090 do nontemporary accesses and beat inline considerably. */
1091 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1093 {{libcall, {{8, loop}, {24, unrolled_loop},
1094 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1095 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1096 4, /* scalar_stmt_cost. */
1097 2, /* scalar load_cost. */
1098 2, /* scalar_store_cost. */
1099 6, /* vec_stmt_cost. */
1100 0, /* vec_to_scalar_cost. */
1101 2, /* scalar_to_vec_cost. */
1102 2, /* vec_align_load_cost. */
1103 2, /* vec_unalign_load_cost. */
1104 2, /* vec_store_cost. */
1105 2, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1109 struct processor_costs bdver1_cost = {
1110 COSTS_N_INSNS (1), /* cost of an add instruction */
1111 COSTS_N_INSNS (1), /* cost of a lea instruction */
1112 COSTS_N_INSNS (1), /* variable shift costs */
1113 COSTS_N_INSNS (1), /* constant shift costs */
1114 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1115 COSTS_N_INSNS (4), /* HI */
1116 COSTS_N_INSNS (4), /* SI */
1117 COSTS_N_INSNS (6), /* DI */
1118 COSTS_N_INSNS (6)}, /* other */
1119 0, /* cost of multiply per each bit set */
1120 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1121 COSTS_N_INSNS (35), /* HI */
1122 COSTS_N_INSNS (51), /* SI */
1123 COSTS_N_INSNS (83), /* DI */
1124 COSTS_N_INSNS (83)}, /* other */
1125 COSTS_N_INSNS (1), /* cost of movsx */
1126 COSTS_N_INSNS (1), /* cost of movzx */
1127 8, /* "large" insn */
1129 4, /* cost for loading QImode using movzbl */
1130 {5, 5, 4}, /* cost of loading integer registers
1131 in QImode, HImode and SImode.
1132 Relative to reg-reg move (2). */
1133 {4, 4, 4}, /* cost of storing integer registers */
1134 2, /* cost of reg,reg fld/fst */
1135 {5, 5, 12}, /* cost of loading fp registers
1136 in SFmode, DFmode and XFmode */
1137 {4, 4, 8}, /* cost of storing fp registers
1138 in SFmode, DFmode and XFmode */
1139 2, /* cost of moving MMX register */
1140 {4, 4}, /* cost of loading MMX registers
1141 in SImode and DImode */
1142 {4, 4}, /* cost of storing MMX registers
1143 in SImode and DImode */
1144 2, /* cost of moving SSE register */
1145 {4, 4, 4}, /* cost of loading SSE registers
1146 in SImode, DImode and TImode */
1147 {4, 4, 4}, /* cost of storing SSE registers
1148 in SImode, DImode and TImode */
1149 2, /* MMX or SSE register to integer */
1151 MOVD reg64, xmmreg Double FSTORE 4
1152 MOVD reg32, xmmreg Double FSTORE 4
1154 MOVD reg64, xmmreg Double FADD 3
1156 MOVD reg32, xmmreg Double FADD 3
1158 16, /* size of l1 cache. */
1159 2048, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 /* New AMD processors never drop prefetches; if they cannot be performed
1162 immediately, they are queued. We set number of simultaneous prefetches
1163 to a large constant to reflect this (it probably is not a good idea not
1164 to limit number of prefetches at all, as their execution also takes some
1166 100, /* number of parallel prefetches */
1167 2, /* Branch cost */
1168 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1169 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1170 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1171 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1172 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1173 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1175 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1176 very small blocks it is better to use loop. For large blocks, libcall
1177 can do nontemporary accesses and beat inline considerably. */
1178 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1179 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1180 {{libcall, {{8, loop}, {24, unrolled_loop},
1181 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1182 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1183 6, /* scalar_stmt_cost. */
1184 4, /* scalar load_cost. */
1185 4, /* scalar_store_cost. */
1186 6, /* vec_stmt_cost. */
1187 0, /* vec_to_scalar_cost. */
1188 2, /* scalar_to_vec_cost. */
1189 4, /* vec_align_load_cost. */
1190 4, /* vec_unalign_load_cost. */
1191 4, /* vec_store_cost. */
1192 2, /* cond_taken_branch_cost. */
1193 1, /* cond_not_taken_branch_cost. */
1197 struct processor_costs pentium4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (3), /* cost of a lea instruction */
1200 COSTS_N_INSNS (4), /* variable shift costs */
1201 COSTS_N_INSNS (4), /* constant shift costs */
1202 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (15), /* HI */
1204 COSTS_N_INSNS (15), /* SI */
1205 COSTS_N_INSNS (15), /* DI */
1206 COSTS_N_INSNS (15)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (56), /* HI */
1210 COSTS_N_INSNS (56), /* SI */
1211 COSTS_N_INSNS (56), /* DI */
1212 COSTS_N_INSNS (56)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 16, /* "large" insn */
1217 2, /* cost for loading QImode using movzbl */
1218 {4, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {2, 3, 2}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {2, 2, 6}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 6}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {2, 2}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {2, 2}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 12, /* cost of moving SSE register */
1233 {12, 12, 12}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {2, 2, 8}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 10, /* MMX or SSE register to integer */
1238 8, /* size of l1 cache. */
1239 256, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 6, /* number of parallel prefetches */
1242 2, /* Branch cost */
1243 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1244 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1245 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1246 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1247 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1248 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1249 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1250 DUMMY_STRINGOP_ALGS},
1251 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1253 DUMMY_STRINGOP_ALGS},
1254 1, /* scalar_stmt_cost. */
1255 1, /* scalar load_cost. */
1256 1, /* scalar_store_cost. */
1257 1, /* vec_stmt_cost. */
1258 1, /* vec_to_scalar_cost. */
1259 1, /* scalar_to_vec_cost. */
1260 1, /* vec_align_load_cost. */
1261 2, /* vec_unalign_load_cost. */
1262 1, /* vec_store_cost. */
1263 3, /* cond_taken_branch_cost. */
1264 1, /* cond_not_taken_branch_cost. */
1268 struct processor_costs nocona_cost = {
1269 COSTS_N_INSNS (1), /* cost of an add instruction */
1270 COSTS_N_INSNS (1), /* cost of a lea instruction */
1271 COSTS_N_INSNS (1), /* variable shift costs */
1272 COSTS_N_INSNS (1), /* constant shift costs */
1273 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1274 COSTS_N_INSNS (10), /* HI */
1275 COSTS_N_INSNS (10), /* SI */
1276 COSTS_N_INSNS (10), /* DI */
1277 COSTS_N_INSNS (10)}, /* other */
1278 0, /* cost of multiply per each bit set */
1279 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1280 COSTS_N_INSNS (66), /* HI */
1281 COSTS_N_INSNS (66), /* SI */
1282 COSTS_N_INSNS (66), /* DI */
1283 COSTS_N_INSNS (66)}, /* other */
1284 COSTS_N_INSNS (1), /* cost of movsx */
1285 COSTS_N_INSNS (1), /* cost of movzx */
1286 16, /* "large" insn */
1287 17, /* MOVE_RATIO */
1288 4, /* cost for loading QImode using movzbl */
1289 {4, 4, 4}, /* cost of loading integer registers
1290 in QImode, HImode and SImode.
1291 Relative to reg-reg move (2). */
1292 {4, 4, 4}, /* cost of storing integer registers */
1293 3, /* cost of reg,reg fld/fst */
1294 {12, 12, 12}, /* cost of loading fp registers
1295 in SFmode, DFmode and XFmode */
1296 {4, 4, 4}, /* cost of storing fp registers
1297 in SFmode, DFmode and XFmode */
1298 6, /* cost of moving MMX register */
1299 {12, 12}, /* cost of loading MMX registers
1300 in SImode and DImode */
1301 {12, 12}, /* cost of storing MMX registers
1302 in SImode and DImode */
1303 6, /* cost of moving SSE register */
1304 {12, 12, 12}, /* cost of loading SSE registers
1305 in SImode, DImode and TImode */
1306 {12, 12, 12}, /* cost of storing SSE registers
1307 in SImode, DImode and TImode */
1308 8, /* MMX or SSE register to integer */
1309 8, /* size of l1 cache. */
1310 1024, /* size of l2 cache. */
1311 128, /* size of prefetch block */
1312 8, /* number of parallel prefetches */
1313 1, /* Branch cost */
1314 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1315 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1316 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1317 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1318 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1319 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1320 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1321 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1322 {100000, unrolled_loop}, {-1, libcall}}}},
1323 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1325 {libcall, {{24, loop}, {64, unrolled_loop},
1326 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1327 1, /* scalar_stmt_cost. */
1328 1, /* scalar load_cost. */
1329 1, /* scalar_store_cost. */
1330 1, /* vec_stmt_cost. */
1331 1, /* vec_to_scalar_cost. */
1332 1, /* scalar_to_vec_cost. */
1333 1, /* vec_align_load_cost. */
1334 2, /* vec_unalign_load_cost. */
1335 1, /* vec_store_cost. */
1336 3, /* cond_taken_branch_cost. */
1337 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs core2_cost = {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (3), /* HI */
1348 COSTS_N_INSNS (3), /* SI */
1349 COSTS_N_INSNS (3), /* DI */
1350 COSTS_N_INSNS (3)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (22), /* HI */
1354 COSTS_N_INSNS (22), /* SI */
1355 COSTS_N_INSNS (22), /* DI */
1356 COSTS_N_INSNS (22)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1360 16, /* MOVE_RATIO */
1361 2, /* cost for loading QImode using movzbl */
1362 {6, 6, 6}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {6, 6, 6}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 4}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {6, 6}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {6, 6, 6}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1382 32, /* size of l1 cache. */
1383 2048, /* size of l2 cache. */
1384 128, /* size of prefetch block */
1385 8, /* number of parallel prefetches */
1386 3, /* Branch cost */
1387 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1388 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1389 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1390 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1391 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1392 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1393 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1394 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1395 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1396 {{libcall, {{8, loop}, {15, unrolled_loop},
1397 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1398 {libcall, {{24, loop}, {32, unrolled_loop},
1399 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1400 1, /* scalar_stmt_cost. */
1401 1, /* scalar load_cost. */
1402 1, /* scalar_store_cost. */
1403 1, /* vec_stmt_cost. */
1404 1, /* vec_to_scalar_cost. */
1405 1, /* scalar_to_vec_cost. */
1406 1, /* vec_align_load_cost. */
1407 2, /* vec_unalign_load_cost. */
1408 1, /* vec_store_cost. */
1409 3, /* cond_taken_branch_cost. */
1410 1, /* cond_not_taken_branch_cost. */
1414 struct processor_costs atom_cost = {
1415 COSTS_N_INSNS (1), /* cost of an add instruction */
1416 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1417 COSTS_N_INSNS (1), /* variable shift costs */
1418 COSTS_N_INSNS (1), /* constant shift costs */
1419 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1420 COSTS_N_INSNS (4), /* HI */
1421 COSTS_N_INSNS (3), /* SI */
1422 COSTS_N_INSNS (4), /* DI */
1423 COSTS_N_INSNS (2)}, /* other */
1424 0, /* cost of multiply per each bit set */
1425 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1426 COSTS_N_INSNS (26), /* HI */
1427 COSTS_N_INSNS (42), /* SI */
1428 COSTS_N_INSNS (74), /* DI */
1429 COSTS_N_INSNS (74)}, /* other */
1430 COSTS_N_INSNS (1), /* cost of movsx */
1431 COSTS_N_INSNS (1), /* cost of movzx */
1432 8, /* "large" insn */
1433 17, /* MOVE_RATIO */
1434 2, /* cost for loading QImode using movzbl */
1435 {4, 4, 4}, /* cost of loading integer registers
1436 in QImode, HImode and SImode.
1437 Relative to reg-reg move (2). */
1438 {4, 4, 4}, /* cost of storing integer registers */
1439 4, /* cost of reg,reg fld/fst */
1440 {12, 12, 12}, /* cost of loading fp registers
1441 in SFmode, DFmode and XFmode */
1442 {6, 6, 8}, /* cost of storing fp registers
1443 in SFmode, DFmode and XFmode */
1444 2, /* cost of moving MMX register */
1445 {8, 8}, /* cost of loading MMX registers
1446 in SImode and DImode */
1447 {8, 8}, /* cost of storing MMX registers
1448 in SImode and DImode */
1449 2, /* cost of moving SSE register */
1450 {8, 8, 8}, /* cost of loading SSE registers
1451 in SImode, DImode and TImode */
1452 {8, 8, 8}, /* cost of storing SSE registers
1453 in SImode, DImode and TImode */
1454 5, /* MMX or SSE register to integer */
1455 32, /* size of l1 cache. */
1456 256, /* size of l2 cache. */
1457 64, /* size of prefetch block */
1458 6, /* number of parallel prefetches */
1459 3, /* Branch cost */
1460 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1461 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1462 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1463 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1464 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1465 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1466 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1467 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1468 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1469 {{libcall, {{8, loop}, {15, unrolled_loop},
1470 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1471 {libcall, {{24, loop}, {32, unrolled_loop},
1472 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1473 1, /* scalar_stmt_cost. */
1474 1, /* scalar load_cost. */
1475 1, /* scalar_store_cost. */
1476 1, /* vec_stmt_cost. */
1477 1, /* vec_to_scalar_cost. */
1478 1, /* scalar_to_vec_cost. */
1479 1, /* vec_align_load_cost. */
1480 2, /* vec_unalign_load_cost. */
1481 1, /* vec_store_cost. */
1482 3, /* cond_taken_branch_cost. */
1483 1, /* cond_not_taken_branch_cost. */
1486 /* Generic64 should produce code tuned for Nocona and K8. */
1488 struct processor_costs generic64_cost = {
1489 COSTS_N_INSNS (1), /* cost of an add instruction */
1490 /* On all chips taken into consideration lea is 2 cycles and more. With
1491 this cost however our current implementation of synth_mult results in
1492 use of unnecessary temporary registers causing regression on several
1493 SPECfp benchmarks. */
1494 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1495 COSTS_N_INSNS (1), /* variable shift costs */
1496 COSTS_N_INSNS (1), /* constant shift costs */
1497 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1498 COSTS_N_INSNS (4), /* HI */
1499 COSTS_N_INSNS (3), /* SI */
1500 COSTS_N_INSNS (4), /* DI */
1501 COSTS_N_INSNS (2)}, /* other */
1502 0, /* cost of multiply per each bit set */
1503 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1504 COSTS_N_INSNS (26), /* HI */
1505 COSTS_N_INSNS (42), /* SI */
1506 COSTS_N_INSNS (74), /* DI */
1507 COSTS_N_INSNS (74)}, /* other */
1508 COSTS_N_INSNS (1), /* cost of movsx */
1509 COSTS_N_INSNS (1), /* cost of movzx */
1510 8, /* "large" insn */
1511 17, /* MOVE_RATIO */
1512 4, /* cost for loading QImode using movzbl */
1513 {4, 4, 4}, /* cost of loading integer registers
1514 in QImode, HImode and SImode.
1515 Relative to reg-reg move (2). */
1516 {4, 4, 4}, /* cost of storing integer registers */
1517 4, /* cost of reg,reg fld/fst */
1518 {12, 12, 12}, /* cost of loading fp registers
1519 in SFmode, DFmode and XFmode */
1520 {6, 6, 8}, /* cost of storing fp registers
1521 in SFmode, DFmode and XFmode */
1522 2, /* cost of moving MMX register */
1523 {8, 8}, /* cost of loading MMX registers
1524 in SImode and DImode */
1525 {8, 8}, /* cost of storing MMX registers
1526 in SImode and DImode */
1527 2, /* cost of moving SSE register */
1528 {8, 8, 8}, /* cost of loading SSE registers
1529 in SImode, DImode and TImode */
1530 {8, 8, 8}, /* cost of storing SSE registers
1531 in SImode, DImode and TImode */
1532 5, /* MMX or SSE register to integer */
1533 32, /* size of l1 cache. */
1534 512, /* size of l2 cache. */
1535 64, /* size of prefetch block */
1536 6, /* number of parallel prefetches */
1537 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1538 value is increased to perhaps more appropriate value of 5. */
1539 3, /* Branch cost */
1540 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1541 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1542 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1543 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1544 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1545 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1546 {DUMMY_STRINGOP_ALGS,
1547 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1548 {DUMMY_STRINGOP_ALGS,
1549 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1550 1, /* scalar_stmt_cost. */
1551 1, /* scalar load_cost. */
1552 1, /* scalar_store_cost. */
1553 1, /* vec_stmt_cost. */
1554 1, /* vec_to_scalar_cost. */
1555 1, /* scalar_to_vec_cost. */
1556 1, /* vec_align_load_cost. */
1557 2, /* vec_unalign_load_cost. */
1558 1, /* vec_store_cost. */
1559 3, /* cond_taken_branch_cost. */
1560 1, /* cond_not_taken_branch_cost. */
1563 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1566 struct processor_costs generic32_cost = {
1567 COSTS_N_INSNS (1), /* cost of an add instruction */
1568 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1569 COSTS_N_INSNS (1), /* variable shift costs */
1570 COSTS_N_INSNS (1), /* constant shift costs */
1571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1572 COSTS_N_INSNS (4), /* HI */
1573 COSTS_N_INSNS (3), /* SI */
1574 COSTS_N_INSNS (4), /* DI */
1575 COSTS_N_INSNS (2)}, /* other */
1576 0, /* cost of multiply per each bit set */
1577 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1578 COSTS_N_INSNS (26), /* HI */
1579 COSTS_N_INSNS (42), /* SI */
1580 COSTS_N_INSNS (74), /* DI */
1581 COSTS_N_INSNS (74)}, /* other */
1582 COSTS_N_INSNS (1), /* cost of movsx */
1583 COSTS_N_INSNS (1), /* cost of movzx */
1584 8, /* "large" insn */
1585 17, /* MOVE_RATIO */
1586 4, /* cost for loading QImode using movzbl */
1587 {4, 4, 4}, /* cost of loading integer registers
1588 in QImode, HImode and SImode.
1589 Relative to reg-reg move (2). */
1590 {4, 4, 4}, /* cost of storing integer registers */
1591 4, /* cost of reg,reg fld/fst */
1592 {12, 12, 12}, /* cost of loading fp registers
1593 in SFmode, DFmode and XFmode */
1594 {6, 6, 8}, /* cost of storing fp registers
1595 in SFmode, DFmode and XFmode */
1596 2, /* cost of moving MMX register */
1597 {8, 8}, /* cost of loading MMX registers
1598 in SImode and DImode */
1599 {8, 8}, /* cost of storing MMX registers
1600 in SImode and DImode */
1601 2, /* cost of moving SSE register */
1602 {8, 8, 8}, /* cost of loading SSE registers
1603 in SImode, DImode and TImode */
1604 {8, 8, 8}, /* cost of storing SSE registers
1605 in SImode, DImode and TImode */
1606 5, /* MMX or SSE register to integer */
1607 32, /* size of l1 cache. */
1608 256, /* size of l2 cache. */
1609 64, /* size of prefetch block */
1610 6, /* number of parallel prefetches */
1611 3, /* Branch cost */
1612 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1613 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1614 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1615 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1616 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1617 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1618 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1619 DUMMY_STRINGOP_ALGS},
1620 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1621 DUMMY_STRINGOP_ALGS},
1622 1, /* scalar_stmt_cost. */
1623 1, /* scalar load_cost. */
1624 1, /* scalar_store_cost. */
1625 1, /* vec_stmt_cost. */
1626 1, /* vec_to_scalar_cost. */
1627 1, /* scalar_to_vec_cost. */
1628 1, /* vec_align_load_cost. */
1629 2, /* vec_unalign_load_cost. */
1630 1, /* vec_store_cost. */
1631 3, /* cond_taken_branch_cost. */
1632 1, /* cond_not_taken_branch_cost. */
1635 const struct processor_costs *ix86_cost = &pentium_cost;
1637 /* Processor feature/optimization bitmasks. */
1638 #define m_386 (1<<PROCESSOR_I386)
1639 #define m_486 (1<<PROCESSOR_I486)
1640 #define m_PENT (1<<PROCESSOR_PENTIUM)
1641 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1642 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1643 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1644 #define m_CORE2 (1<<PROCESSOR_CORE2)
1645 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1646 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1647 #define m_ATOM (1<<PROCESSOR_ATOM)
1649 #define m_GEODE (1<<PROCESSOR_GEODE)
1650 #define m_K6 (1<<PROCESSOR_K6)
1651 #define m_K6_GEODE (m_K6 | m_GEODE)
1652 #define m_K8 (1<<PROCESSOR_K8)
1653 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1654 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1655 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1656 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1657 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1659 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32)
1660 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64)
1662 /* Generic instruction choice should be common subset of supported CPUs
1663 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1664 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1666 /* Feature tests against the various tunings. */
1667 unsigned char ix86_tune_features[X86_TUNE_LAST];
1669 /* Feature tests against the various tunings used to create ix86_tune_features
1670 based on the processor mask. */
1671 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1672 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1673 negatively, so enabling for Generic64 seems like good code size
1674 tradeoff. We can't enable it for 32bit generic because it does not
1675 work well with PPro base chips. */
1676 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1678 /* X86_TUNE_PUSH_MEMORY */
1679 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1680 | m_NOCONA | m_CORE2 | m_GENERIC,
1682 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1685 /* X86_TUNE_UNROLL_STRLEN */
1686 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1687 | m_CORE2 | m_GENERIC,
1689 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1690 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1692 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1693 on simulation result. But after P4 was made, no performance benefit
1694 was observed with branch hints. It also increases the code size.
1695 As a result, icc never generates branch hints. */
1698 /* X86_TUNE_DOUBLE_WITH_ADD */
1701 /* X86_TUNE_USE_SAHF */
1702 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1703 | m_NOCONA | m_CORE2 | m_GENERIC,
1705 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1706 partial dependencies. */
1707 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1708 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1710 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1711 register stalls on Generic32 compilation setting as well. However
1712 in current implementation the partial register stalls are not eliminated
1713 very well - they can be introduced via subregs synthesized by combine
1714 and can happen in caller/callee saving sequences. Because this option
1715 pays back little on PPro based chips and is in conflict with partial reg
1716 dependencies used by Athlon/P4 based chips, it is better to leave it off
1717 for generic32 for now. */
1720 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1721 m_CORE2 | m_GENERIC,
1723 /* X86_TUNE_USE_HIMODE_FIOP */
1724 m_386 | m_486 | m_K6_GEODE,
1726 /* X86_TUNE_USE_SIMODE_FIOP */
1727 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1729 /* X86_TUNE_USE_MOV0 */
1732 /* X86_TUNE_USE_CLTD */
1733 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1735 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1738 /* X86_TUNE_SPLIT_LONG_MOVES */
1741 /* X86_TUNE_READ_MODIFY_WRITE */
1744 /* X86_TUNE_READ_MODIFY */
1747 /* X86_TUNE_PROMOTE_QIMODE */
1748 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1749 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1751 /* X86_TUNE_FAST_PREFIX */
1752 ~(m_PENT | m_486 | m_386),
1754 /* X86_TUNE_SINGLE_STRINGOP */
1755 m_386 | m_PENT4 | m_NOCONA,
1757 /* X86_TUNE_QIMODE_MATH */
1760 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1761 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1762 might be considered for Generic32 if our scheme for avoiding partial
1763 stalls was more effective. */
1766 /* X86_TUNE_PROMOTE_QI_REGS */
1769 /* X86_TUNE_PROMOTE_HI_REGS */
1772 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1773 over esp addition. */
1774 m_386 | m_486 | m_PENT | m_PPRO,
1776 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1777 over esp addition. */
1780 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1781 over esp subtraction. */
1782 m_386 | m_486 | m_PENT | m_K6_GEODE,
1784 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1785 over esp subtraction. */
1786 m_PENT | m_K6_GEODE,
1788 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1789 for DFmode copies */
1790 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1791 | m_GENERIC | m_GEODE),
1793 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1794 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1796 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1797 conflict here in between PPro/Pentium4 based chips that thread 128bit
1798 SSE registers as single units versus K8 based chips that divide SSE
1799 registers to two 64bit halves. This knob promotes all store destinations
1800 to be 128bit to allow register renaming on 128bit SSE units, but usually
1801 results in one extra microop on 64bit SSE units. Experimental results
1802 shows that disabling this option on P4 brings over 20% SPECfp regression,
1803 while enabling it on K8 brings roughly 2.4% regression that can be partly
1804 masked by careful scheduling of moves. */
1805 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1806 | m_AMDFAM10 | m_BDVER1,
1808 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1809 m_AMDFAM10 | m_BDVER1,
1811 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1814 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1817 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1818 are resolved on SSE register parts instead of whole registers, so we may
1819 maintain just lower part of scalar values in proper format leaving the
1820 upper part undefined. */
1823 /* X86_TUNE_SSE_TYPELESS_STORES */
1826 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1827 m_PPRO | m_PENT4 | m_NOCONA,
1829 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1830 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1832 /* X86_TUNE_PROLOGUE_USING_MOVE */
1833 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1835 /* X86_TUNE_EPILOGUE_USING_MOVE */
1836 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1838 /* X86_TUNE_SHIFT1 */
1841 /* X86_TUNE_USE_FFREEP */
1844 /* X86_TUNE_INTER_UNIT_MOVES */
1845 ~(m_AMD_MULTIPLE | m_GENERIC),
1847 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1848 ~(m_AMDFAM10 | m_BDVER1),
1850 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1851 than 4 branch instructions in the 16 byte window. */
1852 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1855 /* X86_TUNE_SCHEDULE */
1856 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1859 /* X86_TUNE_USE_BT */
1860 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1862 /* X86_TUNE_USE_INCDEC */
1863 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1865 /* X86_TUNE_PAD_RETURNS */
1866 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1868 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1871 /* X86_TUNE_EXT_80387_CONSTANTS */
1872 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1873 | m_CORE2 | m_GENERIC,
1875 /* X86_TUNE_SHORTEN_X87_SSE */
1878 /* X86_TUNE_AVOID_VECTOR_DECODE */
1881 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1882 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1885 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1886 vector path on AMD machines. */
1887 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1889 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1891 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1893 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1897 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1898 but one byte longer. */
1901 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1902 operand that cannot be represented using a modRM byte. The XOR
1903 replacement is long decoded, so this split helps here as well. */
1906 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1908 m_AMDFAM10 | m_GENERIC,
1910 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1911 from integer to FP. */
1914 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1915 with a subsequent conditional jump instruction into a single
1916 compare-and-branch uop. */
1919 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1920 will impact LEA instruction selection. */
1923 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1928 /* Feature tests against the various architecture variations. */
1929 unsigned char ix86_arch_features[X86_ARCH_LAST];
1931 /* Feature tests against the various architecture variations, used to create
1932 ix86_arch_features based on the processor mask. */
1933 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1934 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1935 ~(m_386 | m_486 | m_PENT | m_K6),
1937 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1940 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1943 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1946 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1950 static const unsigned int x86_accumulate_outgoing_args
1951 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1954 static const unsigned int x86_arch_always_fancy_math_387
1955 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1956 | m_NOCONA | m_CORE2 | m_GENERIC;
1958 static enum stringop_alg stringop_alg = no_stringop;
1960 /* In case the average insn count for single function invocation is
1961 lower than this constant, emit fast (but longer) prologue and
1963 #define FAST_PROLOGUE_INSN_COUNT 20
1965 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1966 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1967 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1968 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1970 /* Array of the smallest class containing reg number REGNO, indexed by
1971 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1973 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1975 /* ax, dx, cx, bx */
1976 AREG, DREG, CREG, BREG,
1977 /* si, di, bp, sp */
1978 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1980 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1981 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1984 /* flags, fpsr, fpcr, frame */
1985 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1987 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1990 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1993 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1994 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1995 /* SSE REX registers */
1996 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2000 /* The "default" register map used in 32bit mode. */
2002 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2004 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2005 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2006 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2007 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2008 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2009 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2010 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2013 /* The "default" register map used in 64bit mode. */
2015 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2017 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2018 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2019 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2020 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2021 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2022 8,9,10,11,12,13,14,15, /* extended integer registers */
2023 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2026 /* Define the register numbers to be used in Dwarf debugging information.
2027 The SVR4 reference port C compiler uses the following register numbers
2028 in its Dwarf output code:
2029 0 for %eax (gcc regno = 0)
2030 1 for %ecx (gcc regno = 2)
2031 2 for %edx (gcc regno = 1)
2032 3 for %ebx (gcc regno = 3)
2033 4 for %esp (gcc regno = 7)
2034 5 for %ebp (gcc regno = 6)
2035 6 for %esi (gcc regno = 4)
2036 7 for %edi (gcc regno = 5)
2037 The following three DWARF register numbers are never generated by
2038 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2039 believes these numbers have these meanings.
2040 8 for %eip (no gcc equivalent)
2041 9 for %eflags (gcc regno = 17)
2042 10 for %trapno (no gcc equivalent)
2043 It is not at all clear how we should number the FP stack registers
2044 for the x86 architecture. If the version of SDB on x86/svr4 were
2045 a bit less brain dead with respect to floating-point then we would
2046 have a precedent to follow with respect to DWARF register numbers
2047 for x86 FP registers, but the SDB on x86/svr4 is so completely
2048 broken with respect to FP registers that it is hardly worth thinking
2049 of it as something to strive for compatibility with.
2050 The version of x86/svr4 SDB I have at the moment does (partially)
2051 seem to believe that DWARF register number 11 is associated with
2052 the x86 register %st(0), but that's about all. Higher DWARF
2053 register numbers don't seem to be associated with anything in
2054 particular, and even for DWARF regno 11, SDB only seems to under-
2055 stand that it should say that a variable lives in %st(0) (when
2056 asked via an `=' command) if we said it was in DWARF regno 11,
2057 but SDB still prints garbage when asked for the value of the
2058 variable in question (via a `/' command).
2059 (Also note that the labels SDB prints for various FP stack regs
2060 when doing an `x' command are all wrong.)
2061 Note that these problems generally don't affect the native SVR4
2062 C compiler because it doesn't allow the use of -O with -g and
2063 because when it is *not* optimizing, it allocates a memory
2064 location for each floating-point variable, and the memory
2065 location is what gets described in the DWARF AT_location
2066 attribute for the variable in question.
2067 Regardless of the severe mental illness of the x86/svr4 SDB, we
2068 do something sensible here and we use the following DWARF
2069 register numbers. Note that these are all stack-top-relative
2071 11 for %st(0) (gcc regno = 8)
2072 12 for %st(1) (gcc regno = 9)
2073 13 for %st(2) (gcc regno = 10)
2074 14 for %st(3) (gcc regno = 11)
2075 15 for %st(4) (gcc regno = 12)
2076 16 for %st(5) (gcc regno = 13)
2077 17 for %st(6) (gcc regno = 14)
2078 18 for %st(7) (gcc regno = 15)
2080 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2082 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2083 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2084 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2085 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2086 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2087 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2088 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2091 /* Define parameter passing and return registers. */
2093 static int const x86_64_int_parameter_registers[6] =
2095 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2098 static int const x86_64_ms_abi_int_parameter_registers[4] =
2100 CX_REG, DX_REG, R8_REG, R9_REG
2103 static int const x86_64_int_return_registers[4] =
2105 AX_REG, DX_REG, DI_REG, SI_REG
2108 /* Define the structure for the machine field in struct function. */
2110 struct GTY(()) stack_local_entry {
2111 unsigned short mode;
2114 struct stack_local_entry *next;
2117 /* Structure describing stack frame layout.
2118 Stack grows downward:
2124 saved static chain if ix86_static_chain_on_stack
2126 saved frame pointer if frame_pointer_needed
2127 <- HARD_FRAME_POINTER
2133 <- sse_regs_save_offset
2136 [va_arg registers] |
2140 [padding2] | = to_allocate
2149 int outgoing_arguments_size;
2150 HOST_WIDE_INT frame;
2152 /* The offsets relative to ARG_POINTER. */
2153 HOST_WIDE_INT frame_pointer_offset;
2154 HOST_WIDE_INT hard_frame_pointer_offset;
2155 HOST_WIDE_INT stack_pointer_offset;
2156 HOST_WIDE_INT hfp_save_offset;
2157 HOST_WIDE_INT reg_save_offset;
2158 HOST_WIDE_INT sse_reg_save_offset;
2160 /* When save_regs_using_mov is set, emit prologue using
2161 move instead of push instructions. */
2162 bool save_regs_using_mov;
2165 /* Code model option. */
2166 enum cmodel ix86_cmodel;
2168 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2170 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2172 /* Which unit we are generating floating point math for. */
2173 enum fpmath_unit ix86_fpmath;
2175 /* Which cpu are we scheduling for. */
2176 enum attr_cpu ix86_schedule;
2178 /* Which cpu are we optimizing for. */
2179 enum processor_type ix86_tune;
2181 /* Which instruction set architecture to use. */
2182 enum processor_type ix86_arch;
2184 /* true if sse prefetch instruction is not NOOP. */
2185 int x86_prefetch_sse;
2187 /* ix86_regparm_string as a number */
2188 static int ix86_regparm;
2190 /* -mstackrealign option */
2191 static const char ix86_force_align_arg_pointer_string[]
2192 = "force_align_arg_pointer";
2194 static rtx (*ix86_gen_leave) (void);
2195 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2196 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2197 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2198 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2199 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2200 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2201 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2202 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2203 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2205 /* Preferred alignment for stack boundary in bits. */
2206 unsigned int ix86_preferred_stack_boundary;
2208 /* Alignment for incoming stack boundary in bits specified at
2210 static unsigned int ix86_user_incoming_stack_boundary;
2212 /* Default alignment for incoming stack boundary in bits. */
2213 static unsigned int ix86_default_incoming_stack_boundary;
2215 /* Alignment for incoming stack boundary in bits. */
2216 unsigned int ix86_incoming_stack_boundary;
2218 /* The abi used by target. */
2219 enum calling_abi ix86_abi;
2221 /* Values 1-5: see jump.c */
2222 int ix86_branch_cost;
2224 /* Calling abi specific va_list type nodes. */
2225 static GTY(()) tree sysv_va_list_type_node;
2226 static GTY(()) tree ms_va_list_type_node;
2228 /* Variables which are this size or smaller are put in the data/bss
2229 or ldata/lbss sections. */
2231 int ix86_section_threshold = 65536;
2233 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2234 char internal_label_prefix[16];
2235 int internal_label_prefix_len;
2237 /* Fence to use after loop using movnt. */
2240 /* Register class used for passing given 64bit part of the argument.
2241 These represent classes as documented by the PS ABI, with the exception
2242 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2243 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2245 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2246 whenever possible (upper half does contain padding). */
2247 enum x86_64_reg_class
2250 X86_64_INTEGER_CLASS,
2251 X86_64_INTEGERSI_CLASS,
2258 X86_64_COMPLEX_X87_CLASS,
2262 #define MAX_CLASSES 4
2264 /* Table of constants used by fldpi, fldln2, etc.... */
2265 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2266 static bool ext_80387_constants_init = 0;
2269 static struct machine_function * ix86_init_machine_status (void);
2270 static rtx ix86_function_value (const_tree, const_tree, bool);
2271 static bool ix86_function_value_regno_p (const unsigned int);
2272 static rtx ix86_static_chain (const_tree, bool);
2273 static int ix86_function_regparm (const_tree, const_tree);
2274 static void ix86_compute_frame_layout (struct ix86_frame *);
2275 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2277 static void ix86_add_new_builtins (int);
2278 static rtx ix86_expand_vec_perm_builtin (tree);
2279 static tree ix86_canonical_va_list_type (tree);
2280 static void predict_jump (int);
2281 static unsigned int split_stack_prologue_scratch_regno (void);
2282 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2284 enum ix86_function_specific_strings
2286 IX86_FUNCTION_SPECIFIC_ARCH,
2287 IX86_FUNCTION_SPECIFIC_TUNE,
2288 IX86_FUNCTION_SPECIFIC_FPMATH,
2289 IX86_FUNCTION_SPECIFIC_MAX
2292 static char *ix86_target_string (int, int, const char *, const char *,
2293 const char *, bool);
2294 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2295 static void ix86_function_specific_save (struct cl_target_option *);
2296 static void ix86_function_specific_restore (struct cl_target_option *);
2297 static void ix86_function_specific_print (FILE *, int,
2298 struct cl_target_option *);
2299 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2300 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2301 static bool ix86_can_inline_p (tree, tree);
2302 static void ix86_set_current_function (tree);
2303 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2305 static enum calling_abi ix86_function_abi (const_tree);
2308 #ifndef SUBTARGET32_DEFAULT_CPU
2309 #define SUBTARGET32_DEFAULT_CPU "i386"
2312 /* The svr4 ABI for the i386 says that records and unions are returned
2314 #ifndef DEFAULT_PCC_STRUCT_RETURN
2315 #define DEFAULT_PCC_STRUCT_RETURN 1
2318 /* Whether -mtune= or -march= were specified */
2319 static int ix86_tune_defaulted;
2320 static int ix86_arch_specified;
2322 /* A mask of ix86_isa_flags that includes bit X if X
2323 was set or cleared on the command line. */
2324 static int ix86_isa_flags_explicit;
2326 /* Define a set of ISAs which are available when a given ISA is
2327 enabled. MMX and SSE ISAs are handled separately. */
2329 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2330 #define OPTION_MASK_ISA_3DNOW_SET \
2331 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2333 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2334 #define OPTION_MASK_ISA_SSE2_SET \
2335 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2336 #define OPTION_MASK_ISA_SSE3_SET \
2337 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2338 #define OPTION_MASK_ISA_SSSE3_SET \
2339 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2340 #define OPTION_MASK_ISA_SSE4_1_SET \
2341 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2342 #define OPTION_MASK_ISA_SSE4_2_SET \
2343 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2344 #define OPTION_MASK_ISA_AVX_SET \
2345 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2346 #define OPTION_MASK_ISA_FMA_SET \
2347 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2349 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2351 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2353 #define OPTION_MASK_ISA_SSE4A_SET \
2354 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2355 #define OPTION_MASK_ISA_FMA4_SET \
2356 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2357 | OPTION_MASK_ISA_AVX_SET)
2358 #define OPTION_MASK_ISA_XOP_SET \
2359 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2360 #define OPTION_MASK_ISA_LWP_SET \
2363 /* AES and PCLMUL need SSE2 because they use xmm registers */
2364 #define OPTION_MASK_ISA_AES_SET \
2365 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2366 #define OPTION_MASK_ISA_PCLMUL_SET \
2367 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2369 #define OPTION_MASK_ISA_ABM_SET \
2370 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2372 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2373 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2374 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2375 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2376 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2378 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2379 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2380 #define OPTION_MASK_ISA_F16C_SET \
2381 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2383 /* Define a set of ISAs which aren't available when a given ISA is
2384 disabled. MMX and SSE ISAs are handled separately. */
2386 #define OPTION_MASK_ISA_MMX_UNSET \
2387 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2388 #define OPTION_MASK_ISA_3DNOW_UNSET \
2389 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2390 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2392 #define OPTION_MASK_ISA_SSE_UNSET \
2393 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2394 #define OPTION_MASK_ISA_SSE2_UNSET \
2395 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2396 #define OPTION_MASK_ISA_SSE3_UNSET \
2397 (OPTION_MASK_ISA_SSE3 \
2398 | OPTION_MASK_ISA_SSSE3_UNSET \
2399 | OPTION_MASK_ISA_SSE4A_UNSET )
2400 #define OPTION_MASK_ISA_SSSE3_UNSET \
2401 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2402 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2403 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2404 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2405 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2406 #define OPTION_MASK_ISA_AVX_UNSET \
2407 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2408 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2409 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2411 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2413 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2415 #define OPTION_MASK_ISA_SSE4A_UNSET \
2416 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2418 #define OPTION_MASK_ISA_FMA4_UNSET \
2419 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2420 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2421 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2423 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2424 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2425 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2426 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2427 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2428 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2429 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2430 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2432 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2433 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2434 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2436 /* Vectorization library interface and handlers. */
2437 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2439 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2440 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2442 /* Processor target table, indexed by processor number */
2445 const struct processor_costs *cost; /* Processor costs */
2446 const int align_loop; /* Default alignments. */
2447 const int align_loop_max_skip;
2448 const int align_jump;
2449 const int align_jump_max_skip;
2450 const int align_func;
2453 static const struct ptt processor_target_table[PROCESSOR_max] =
2455 {&i386_cost, 4, 3, 4, 3, 4},
2456 {&i486_cost, 16, 15, 16, 15, 16},
2457 {&pentium_cost, 16, 7, 16, 7, 16},
2458 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2459 {&geode_cost, 0, 0, 0, 0, 0},
2460 {&k6_cost, 32, 7, 32, 7, 32},
2461 {&athlon_cost, 16, 7, 16, 7, 16},
2462 {&pentium4_cost, 0, 0, 0, 0, 0},
2463 {&k8_cost, 16, 7, 16, 7, 16},
2464 {&nocona_cost, 0, 0, 0, 0, 0},
2465 {&core2_cost, 16, 10, 16, 10, 16},
2466 /* Core i7 32-bit. */
2467 {&generic32_cost, 16, 10, 16, 10, 16},
2468 /* Core i7 64-bit. */
2469 {&generic64_cost, 16, 10, 16, 10, 16},
2470 {&generic32_cost, 16, 7, 16, 7, 16},
2471 {&generic64_cost, 16, 10, 16, 10, 16},
2472 {&amdfam10_cost, 32, 24, 32, 7, 32},
2473 {&bdver1_cost, 32, 24, 32, 7, 32},
2474 {&atom_cost, 16, 7, 16, 7, 16}
2477 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2505 /* Return true if a red-zone is in use. */
2508 ix86_using_red_zone (void)
2510 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2513 /* Implement TARGET_HANDLE_OPTION. */
2516 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2523 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2524 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2528 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2529 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2536 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2537 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2541 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2542 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2552 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2553 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2557 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2558 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2565 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2566 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2570 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2571 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2578 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2579 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2583 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2584 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2591 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2592 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2596 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2597 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2604 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2605 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2609 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2610 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2617 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2618 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2622 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2630 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2631 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2635 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2643 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2644 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2648 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2654 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2655 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2659 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2660 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2666 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2671 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2672 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2679 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2680 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2684 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2685 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2692 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2693 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2697 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2698 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2705 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2706 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2710 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2711 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2718 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2719 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2723 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2724 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2731 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2732 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2736 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2737 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2744 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2745 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2749 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2750 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2757 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2758 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2762 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2763 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2770 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2771 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2775 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2776 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2783 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2784 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2788 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2789 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2796 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2797 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2801 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2802 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2809 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2810 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2814 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2815 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2822 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2823 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2827 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2828 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2835 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2836 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2840 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2841 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2848 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2853 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2863 /* Return a string that documents the current -m options. The caller is
2864 responsible for freeing the string. */
2867 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2868 const char *fpmath, bool add_nl_p)
2870 struct ix86_target_opts
2872 const char *option; /* option string */
2873 int mask; /* isa mask options */
2876 /* This table is ordered so that options like -msse4.2 that imply
2877 preceding options while match those first. */
2878 static struct ix86_target_opts isa_opts[] =
2880 { "-m64", OPTION_MASK_ISA_64BIT },
2881 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2882 { "-mfma", OPTION_MASK_ISA_FMA },
2883 { "-mxop", OPTION_MASK_ISA_XOP },
2884 { "-mlwp", OPTION_MASK_ISA_LWP },
2885 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2886 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2887 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2888 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2889 { "-msse3", OPTION_MASK_ISA_SSE3 },
2890 { "-msse2", OPTION_MASK_ISA_SSE2 },
2891 { "-msse", OPTION_MASK_ISA_SSE },
2892 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2893 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2894 { "-mmmx", OPTION_MASK_ISA_MMX },
2895 { "-mabm", OPTION_MASK_ISA_ABM },
2896 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2897 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2898 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2899 { "-maes", OPTION_MASK_ISA_AES },
2900 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2901 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2902 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2903 { "-mf16c", OPTION_MASK_ISA_F16C },
2907 static struct ix86_target_opts flag_opts[] =
2909 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2910 { "-m80387", MASK_80387 },
2911 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2912 { "-malign-double", MASK_ALIGN_DOUBLE },
2913 { "-mcld", MASK_CLD },
2914 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2915 { "-mieee-fp", MASK_IEEE_FP },
2916 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2917 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2918 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2919 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2920 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2921 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2922 { "-mno-red-zone", MASK_NO_RED_ZONE },
2923 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2924 { "-mrecip", MASK_RECIP },
2925 { "-mrtd", MASK_RTD },
2926 { "-msseregparm", MASK_SSEREGPARM },
2927 { "-mstack-arg-probe", MASK_STACK_PROBE },
2928 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2929 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2930 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2931 { "-mvzeroupper", MASK_VZEROUPPER },
2934 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2937 char target_other[40];
2946 memset (opts, '\0', sizeof (opts));
2948 /* Add -march= option. */
2951 opts[num][0] = "-march=";
2952 opts[num++][1] = arch;
2955 /* Add -mtune= option. */
2958 opts[num][0] = "-mtune=";
2959 opts[num++][1] = tune;
2962 /* Pick out the options in isa options. */
2963 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2965 if ((isa & isa_opts[i].mask) != 0)
2967 opts[num++][0] = isa_opts[i].option;
2968 isa &= ~ isa_opts[i].mask;
2972 if (isa && add_nl_p)
2974 opts[num++][0] = isa_other;
2975 sprintf (isa_other, "(other isa: %#x)", isa);
2978 /* Add flag options. */
2979 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2981 if ((flags & flag_opts[i].mask) != 0)
2983 opts[num++][0] = flag_opts[i].option;
2984 flags &= ~ flag_opts[i].mask;
2988 if (flags && add_nl_p)
2990 opts[num++][0] = target_other;
2991 sprintf (target_other, "(other flags: %#x)", flags);
2994 /* Add -fpmath= option. */
2997 opts[num][0] = "-mfpmath=";
2998 opts[num++][1] = fpmath;
3005 gcc_assert (num < ARRAY_SIZE (opts));
3007 /* Size the string. */
3009 sep_len = (add_nl_p) ? 3 : 1;
3010 for (i = 0; i < num; i++)
3013 for (j = 0; j < 2; j++)
3015 len += strlen (opts[i][j]);
3018 /* Build the string. */
3019 ret = ptr = (char *) xmalloc (len);
3022 for (i = 0; i < num; i++)
3026 for (j = 0; j < 2; j++)
3027 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3034 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3042 for (j = 0; j < 2; j++)
3045 memcpy (ptr, opts[i][j], len2[j]);
3047 line_len += len2[j];
3052 gcc_assert (ret + len >= ptr);
3057 /* Return TRUE if software prefetching is beneficial for the
3061 software_prefetching_beneficial_p (void)
3065 case PROCESSOR_GEODE:
3067 case PROCESSOR_ATHLON:
3069 case PROCESSOR_AMDFAM10:
3077 /* Return true, if profiling code should be emitted before
3078 prologue. Otherwise it returns false.
3079 Note: For x86 with "hotfix" it is sorried. */
3081 ix86_profile_before_prologue (void)
3083 return flag_fentry != 0;
3086 /* Function that is callable from the debugger to print the current
3089 ix86_debug_options (void)
3091 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3092 ix86_arch_string, ix86_tune_string,
3093 ix86_fpmath_string, true);
3097 fprintf (stderr, "%s\n\n", opts);
3101 fputs ("<no options>\n\n", stderr);
3106 /* Override various settings based on options. If MAIN_ARGS_P, the
3107 options are from the command line, otherwise they are from
3111 ix86_option_override_internal (bool main_args_p)
3114 unsigned int ix86_arch_mask, ix86_tune_mask;
3115 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3120 /* Comes from final.c -- no real reason to change it. */
3121 #define MAX_CODE_ALIGN 16
3129 PTA_PREFETCH_SSE = 1 << 4,
3131 PTA_3DNOW_A = 1 << 6,
3135 PTA_POPCNT = 1 << 10,
3137 PTA_SSE4A = 1 << 12,
3138 PTA_NO_SAHF = 1 << 13,
3139 PTA_SSE4_1 = 1 << 14,
3140 PTA_SSE4_2 = 1 << 15,
3142 PTA_PCLMUL = 1 << 17,
3145 PTA_MOVBE = 1 << 20,
3149 PTA_FSGSBASE = 1 << 24,
3150 PTA_RDRND = 1 << 25,
3156 const char *const name; /* processor name or nickname. */
3157 const enum processor_type processor;
3158 const enum attr_cpu schedule;
3159 const unsigned /*enum pta_flags*/ flags;
3161 const processor_alias_table[] =
3163 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3164 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3165 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3166 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3168 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3169 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3170 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3171 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3172 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3173 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3174 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3175 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3177 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3179 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_SSE2},
3181 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3182 PTA_MMX |PTA_SSE | PTA_SSE2},
3183 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX | PTA_SSE | PTA_SSE2},
3185 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3187 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3189 | PTA_CX16 | PTA_NO_SAHF},
3190 {"core2", PROCESSOR_CORE2, CPU_CORE2,
3191 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3192 | PTA_SSSE3 | PTA_CX16},
3193 {"corei7", PROCESSOR_COREI7_64, CPU_GENERIC64,
3194 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3195 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3196 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3197 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3198 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3199 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3200 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3201 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3202 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3203 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3204 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3205 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3206 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3207 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3208 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3209 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3210 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3211 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3212 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3214 {"x86-64", PROCESSOR_K8, CPU_K8,
3215 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3216 {"k8", PROCESSOR_K8, CPU_K8,
3217 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3218 | PTA_SSE2 | PTA_NO_SAHF},
3219 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3220 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3221 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3222 {"opteron", PROCESSOR_K8, CPU_K8,
3223 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3224 | PTA_SSE2 | PTA_NO_SAHF},
3225 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3226 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3227 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3228 {"athlon64", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3230 | PTA_SSE2 | PTA_NO_SAHF},
3231 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3232 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3233 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3234 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3235 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3236 | PTA_SSE2 | PTA_NO_SAHF},
3237 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3238 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3239 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3240 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3241 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3242 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3243 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3244 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3245 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
3246 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
3247 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
3248 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3249 0 /* flags are only used for -march switch. */ },
3250 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3251 PTA_64BIT /* flags are only used for -march switch. */ },
3254 int const pta_size = ARRAY_SIZE (processor_alias_table);
3256 /* Set up prefix/suffix so the error messages refer to either the command
3257 line argument, or the attribute(target). */
3266 prefix = "option(\"";
3271 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3272 SUBTARGET_OVERRIDE_OPTIONS;
3275 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3276 SUBSUBTARGET_OVERRIDE_OPTIONS;
3279 /* -fPIC is the default for x86_64. */
3280 if (TARGET_MACHO && TARGET_64BIT)
3283 /* Need to check -mtune=generic first. */
3284 if (ix86_tune_string)
3286 if (!strcmp (ix86_tune_string, "generic")
3287 || !strcmp (ix86_tune_string, "i686")
3288 /* As special support for cross compilers we read -mtune=native
3289 as -mtune=generic. With native compilers we won't see the
3290 -mtune=native, as it was changed by the driver. */
3291 || !strcmp (ix86_tune_string, "native"))
3294 ix86_tune_string = "generic64";
3296 ix86_tune_string = "generic32";
3298 /* If this call is for setting the option attribute, allow the
3299 generic32/generic64 that was previously set. */
3300 else if (!main_args_p
3301 && (!strcmp (ix86_tune_string, "generic32")
3302 || !strcmp (ix86_tune_string, "generic64")))
3304 else if (!strncmp (ix86_tune_string, "generic", 7))
3305 error ("bad value (%s) for %stune=%s %s",
3306 ix86_tune_string, prefix, suffix, sw);
3307 else if (!strcmp (ix86_tune_string, "x86-64"))
3308 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3309 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3310 prefix, suffix, prefix, suffix, prefix, suffix);
3314 if (ix86_arch_string)
3315 ix86_tune_string = ix86_arch_string;
3316 if (!ix86_tune_string)
3318 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3319 ix86_tune_defaulted = 1;
3322 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3323 need to use a sensible tune option. */
3324 if (!strcmp (ix86_tune_string, "generic")
3325 || !strcmp (ix86_tune_string, "x86-64")
3326 || !strcmp (ix86_tune_string, "i686"))
3329 ix86_tune_string = "generic64";
3331 ix86_tune_string = "generic32";
3335 if (ix86_stringop_string)
3337 if (!strcmp (ix86_stringop_string, "rep_byte"))
3338 stringop_alg = rep_prefix_1_byte;
3339 else if (!strcmp (ix86_stringop_string, "libcall"))
3340 stringop_alg = libcall;
3341 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3342 stringop_alg = rep_prefix_4_byte;
3343 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3345 /* rep; movq isn't available in 32-bit code. */
3346 stringop_alg = rep_prefix_8_byte;
3347 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3348 stringop_alg = loop_1_byte;
3349 else if (!strcmp (ix86_stringop_string, "loop"))
3350 stringop_alg = loop;
3351 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3352 stringop_alg = unrolled_loop;
3354 error ("bad value (%s) for %sstringop-strategy=%s %s",
3355 ix86_stringop_string, prefix, suffix, sw);
3358 if (!ix86_arch_string)
3359 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3361 ix86_arch_specified = 1;
3363 /* Validate -mabi= value. */
3364 if (ix86_abi_string)
3366 if (strcmp (ix86_abi_string, "sysv") == 0)
3367 ix86_abi = SYSV_ABI;
3368 else if (strcmp (ix86_abi_string, "ms") == 0)
3371 error ("unknown ABI (%s) for %sabi=%s %s",
3372 ix86_abi_string, prefix, suffix, sw);
3375 ix86_abi = DEFAULT_ABI;
3377 if (ix86_cmodel_string != 0)
3379 if (!strcmp (ix86_cmodel_string, "small"))
3380 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3381 else if (!strcmp (ix86_cmodel_string, "medium"))
3382 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3383 else if (!strcmp (ix86_cmodel_string, "large"))
3384 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3386 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3387 else if (!strcmp (ix86_cmodel_string, "32"))
3388 ix86_cmodel = CM_32;
3389 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3390 ix86_cmodel = CM_KERNEL;
3392 error ("bad value (%s) for %scmodel=%s %s",
3393 ix86_cmodel_string, prefix, suffix, sw);
3397 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3398 use of rip-relative addressing. This eliminates fixups that
3399 would otherwise be needed if this object is to be placed in a
3400 DLL, and is essentially just as efficient as direct addressing. */
3401 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3402 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3403 else if (TARGET_64BIT)
3404 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3406 ix86_cmodel = CM_32;
3408 if (ix86_asm_string != 0)
3411 && !strcmp (ix86_asm_string, "intel"))
3412 ix86_asm_dialect = ASM_INTEL;
3413 else if (!strcmp (ix86_asm_string, "att"))
3414 ix86_asm_dialect = ASM_ATT;
3416 error ("bad value (%s) for %sasm=%s %s",
3417 ix86_asm_string, prefix, suffix, sw);
3419 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3420 error ("code model %qs not supported in the %s bit mode",
3421 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3422 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3423 sorry ("%i-bit mode not compiled in",
3424 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3426 for (i = 0; i < pta_size; i++)
3427 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3429 ix86_schedule = processor_alias_table[i].schedule;
3430 ix86_arch = processor_alias_table[i].processor;
3431 /* Default cpu tuning to the architecture. */
3432 ix86_tune = ix86_arch;
3434 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3435 error ("CPU you selected does not support x86-64 "
3438 if (processor_alias_table[i].flags & PTA_MMX
3439 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3440 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3441 if (processor_alias_table[i].flags & PTA_3DNOW
3442 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3443 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3444 if (processor_alias_table[i].flags & PTA_3DNOW_A
3445 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3446 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3447 if (processor_alias_table[i].flags & PTA_SSE
3448 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3449 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3450 if (processor_alias_table[i].flags & PTA_SSE2
3451 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3452 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3453 if (processor_alias_table[i].flags & PTA_SSE3
3454 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3455 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3456 if (processor_alias_table[i].flags & PTA_SSSE3
3457 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3458 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3459 if (processor_alias_table[i].flags & PTA_SSE4_1
3460 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3461 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3462 if (processor_alias_table[i].flags & PTA_SSE4_2
3463 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3464 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3465 if (processor_alias_table[i].flags & PTA_AVX
3466 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3467 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3468 if (processor_alias_table[i].flags & PTA_FMA
3469 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3470 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3471 if (processor_alias_table[i].flags & PTA_SSE4A
3472 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3473 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3474 if (processor_alias_table[i].flags & PTA_FMA4
3475 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3476 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3477 if (processor_alias_table[i].flags & PTA_XOP
3478 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3479 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3480 if (processor_alias_table[i].flags & PTA_LWP
3481 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3482 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3483 if (processor_alias_table[i].flags & PTA_ABM
3484 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3485 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3486 if (processor_alias_table[i].flags & PTA_CX16
3487 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3488 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3489 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3490 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3491 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3492 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3493 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3494 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3495 if (processor_alias_table[i].flags & PTA_MOVBE
3496 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3497 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3498 if (processor_alias_table[i].flags & PTA_AES
3499 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3500 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3501 if (processor_alias_table[i].flags & PTA_PCLMUL
3502 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3503 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3504 if (processor_alias_table[i].flags & PTA_FSGSBASE
3505 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3506 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3507 if (processor_alias_table[i].flags & PTA_RDRND
3508 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3509 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3510 if (processor_alias_table[i].flags & PTA_F16C
3511 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3512 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3513 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3514 x86_prefetch_sse = true;
3519 if (!strcmp (ix86_arch_string, "generic"))
3520 error ("generic CPU can be used only for %stune=%s %s",
3521 prefix, suffix, sw);
3522 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3523 error ("bad value (%s) for %sarch=%s %s",
3524 ix86_arch_string, prefix, suffix, sw);
3526 ix86_arch_mask = 1u << ix86_arch;
3527 for (i = 0; i < X86_ARCH_LAST; ++i)
3528 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3530 for (i = 0; i < pta_size; i++)
3531 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3533 ix86_schedule = processor_alias_table[i].schedule;
3534 ix86_tune = processor_alias_table[i].processor;
3537 if (!(processor_alias_table[i].flags & PTA_64BIT))
3539 if (ix86_tune_defaulted)
3541 ix86_tune_string = "x86-64";
3542 for (i = 0; i < pta_size; i++)
3543 if (! strcmp (ix86_tune_string,
3544 processor_alias_table[i].name))
3546 ix86_schedule = processor_alias_table[i].schedule;
3547 ix86_tune = processor_alias_table[i].processor;
3550 error ("CPU you selected does not support x86-64 "
3556 /* Adjust tuning when compiling for 32-bit ABI. */
3559 case PROCESSOR_GENERIC64:
3560 ix86_tune = PROCESSOR_GENERIC32;
3561 ix86_schedule = CPU_PENTIUMPRO;
3564 case PROCESSOR_COREI7_64:
3565 ix86_tune = PROCESSOR_COREI7_32;
3566 ix86_schedule = CPU_PENTIUMPRO;
3573 /* Intel CPUs have always interpreted SSE prefetch instructions as
3574 NOPs; so, we can enable SSE prefetch instructions even when
3575 -mtune (rather than -march) points us to a processor that has them.
3576 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3577 higher processors. */
3579 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3580 x86_prefetch_sse = true;
3584 if (ix86_tune_specified && i == pta_size)
3585 error ("bad value (%s) for %stune=%s %s",
3586 ix86_tune_string, prefix, suffix, sw);
3588 ix86_tune_mask = 1u << ix86_tune;
3589 for (i = 0; i < X86_TUNE_LAST; ++i)
3590 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3592 #ifndef USE_IX86_FRAME_POINTER
3593 #define USE_IX86_FRAME_POINTER 0
3596 #ifndef USE_X86_64_FRAME_POINTER
3597 #define USE_X86_64_FRAME_POINTER 0
3600 /* Set the default values for switches whose default depends on TARGET_64BIT
3601 in case they weren't overwritten by command line options. */
3604 if (optimize > 1 && !global_options_set.x_flag_zee)
3606 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3607 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3608 if (flag_asynchronous_unwind_tables == 2)
3609 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3610 if (flag_pcc_struct_return == 2)
3611 flag_pcc_struct_return = 0;
3615 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3616 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3617 if (flag_asynchronous_unwind_tables == 2)
3618 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3619 if (flag_pcc_struct_return == 2)
3620 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3624 ix86_cost = &ix86_size_cost;
3626 ix86_cost = processor_target_table[ix86_tune].cost;
3628 /* Arrange to set up i386_stack_locals for all functions. */
3629 init_machine_status = ix86_init_machine_status;
3631 /* Validate -mregparm= value. */
3632 if (ix86_regparm_string)
3635 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3636 i = atoi (ix86_regparm_string);
3637 if (i < 0 || i > REGPARM_MAX)
3638 error ("%sregparm=%d%s is not between 0 and %d",
3639 prefix, i, suffix, REGPARM_MAX);
3644 ix86_regparm = REGPARM_MAX;
3646 /* If the user has provided any of the -malign-* options,
3647 warn and use that value only if -falign-* is not set.
3648 Remove this code in GCC 3.2 or later. */
3649 if (ix86_align_loops_string)
3651 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3652 prefix, suffix, suffix);
3653 if (align_loops == 0)
3655 i = atoi (ix86_align_loops_string);
3656 if (i < 0 || i > MAX_CODE_ALIGN)
3657 error ("%salign-loops=%d%s is not between 0 and %d",
3658 prefix, i, suffix, MAX_CODE_ALIGN);
3660 align_loops = 1 << i;
3664 if (ix86_align_jumps_string)
3666 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3667 prefix, suffix, suffix);
3668 if (align_jumps == 0)
3670 i = atoi (ix86_align_jumps_string);
3671 if (i < 0 || i > MAX_CODE_ALIGN)
3672 error ("%salign-loops=%d%s is not between 0 and %d",
3673 prefix, i, suffix, MAX_CODE_ALIGN);
3675 align_jumps = 1 << i;
3679 if (ix86_align_funcs_string)
3681 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3682 prefix, suffix, suffix);
3683 if (align_functions == 0)
3685 i = atoi (ix86_align_funcs_string);
3686 if (i < 0 || i > MAX_CODE_ALIGN)
3687 error ("%salign-loops=%d%s is not between 0 and %d",
3688 prefix, i, suffix, MAX_CODE_ALIGN);
3690 align_functions = 1 << i;
3694 /* Default align_* from the processor table. */
3695 if (align_loops == 0)
3697 align_loops = processor_target_table[ix86_tune].align_loop;
3698 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3700 if (align_jumps == 0)
3702 align_jumps = processor_target_table[ix86_tune].align_jump;
3703 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3705 if (align_functions == 0)
3707 align_functions = processor_target_table[ix86_tune].align_func;
3710 /* Validate -mbranch-cost= value, or provide default. */
3711 ix86_branch_cost = ix86_cost->branch_cost;
3712 if (ix86_branch_cost_string)
3714 i = atoi (ix86_branch_cost_string);
3716 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3718 ix86_branch_cost = i;
3720 if (ix86_section_threshold_string)
3722 i = atoi (ix86_section_threshold_string);
3724 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3726 ix86_section_threshold = i;
3729 if (ix86_tls_dialect_string)
3731 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3732 ix86_tls_dialect = TLS_DIALECT_GNU;
3733 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3734 ix86_tls_dialect = TLS_DIALECT_GNU2;
3736 error ("bad value (%s) for %stls-dialect=%s %s",
3737 ix86_tls_dialect_string, prefix, suffix, sw);
3740 if (ix87_precision_string)
3742 i = atoi (ix87_precision_string);
3743 if (i != 32 && i != 64 && i != 80)
3744 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3749 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3751 /* Enable by default the SSE and MMX builtins. Do allow the user to
3752 explicitly disable any of these. In particular, disabling SSE and
3753 MMX for kernel code is extremely useful. */
3754 if (!ix86_arch_specified)
3756 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3757 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3760 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3764 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3766 if (!ix86_arch_specified)
3768 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3770 /* i386 ABI does not specify red zone. It still makes sense to use it
3771 when programmer takes care to stack from being destroyed. */
3772 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3773 target_flags |= MASK_NO_RED_ZONE;
3776 /* Keep nonleaf frame pointers. */
3777 if (flag_omit_frame_pointer)
3778 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3779 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3780 flag_omit_frame_pointer = 1;
3782 /* If we're doing fast math, we don't care about comparison order
3783 wrt NaNs. This lets us use a shorter comparison sequence. */
3784 if (flag_finite_math_only)
3785 target_flags &= ~MASK_IEEE_FP;
3787 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3788 since the insns won't need emulation. */
3789 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3790 target_flags &= ~MASK_NO_FANCY_MATH_387;
3792 /* Likewise, if the target doesn't have a 387, or we've specified
3793 software floating point, don't use 387 inline intrinsics. */
3795 target_flags |= MASK_NO_FANCY_MATH_387;
3797 /* Turn on MMX builtins for -msse. */
3800 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3801 x86_prefetch_sse = true;
3804 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3805 if (TARGET_SSE4_2 || TARGET_ABM)
3806 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3808 /* Validate -mpreferred-stack-boundary= value or default it to
3809 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3810 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3811 if (ix86_preferred_stack_boundary_string)
3813 int min = (TARGET_64BIT ? 4 : 2);
3814 int max = (TARGET_SEH ? 4 : 12);
3816 i = atoi (ix86_preferred_stack_boundary_string);
3817 if (i < min || i > max)
3820 error ("%spreferred-stack-boundary%s is not supported "
3821 "for this target", prefix, suffix);
3823 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
3824 prefix, i, suffix, min, max);
3827 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3830 /* Set the default value for -mstackrealign. */
3831 if (ix86_force_align_arg_pointer == -1)
3832 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3834 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3836 /* Validate -mincoming-stack-boundary= value or default it to
3837 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3838 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3839 if (ix86_incoming_stack_boundary_string)
3841 i = atoi (ix86_incoming_stack_boundary_string);
3842 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3843 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3844 i, TARGET_64BIT ? 4 : 2);
3847 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3848 ix86_incoming_stack_boundary
3849 = ix86_user_incoming_stack_boundary;
3853 /* Accept -msseregparm only if at least SSE support is enabled. */
3854 if (TARGET_SSEREGPARM
3856 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3858 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3859 if (ix86_fpmath_string != 0)
3861 if (! strcmp (ix86_fpmath_string, "387"))
3862 ix86_fpmath = FPMATH_387;
3863 else if (! strcmp (ix86_fpmath_string, "sse"))
3867 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3868 ix86_fpmath = FPMATH_387;
3871 ix86_fpmath = FPMATH_SSE;
3873 else if (! strcmp (ix86_fpmath_string, "387,sse")
3874 || ! strcmp (ix86_fpmath_string, "387+sse")
3875 || ! strcmp (ix86_fpmath_string, "sse,387")
3876 || ! strcmp (ix86_fpmath_string, "sse+387")
3877 || ! strcmp (ix86_fpmath_string, "both"))
3881 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3882 ix86_fpmath = FPMATH_387;
3884 else if (!TARGET_80387)
3886 warning (0, "387 instruction set disabled, using SSE arithmetics");
3887 ix86_fpmath = FPMATH_SSE;
3890 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3893 error ("bad value (%s) for %sfpmath=%s %s",
3894 ix86_fpmath_string, prefix, suffix, sw);
3897 /* If the i387 is disabled, then do not return values in it. */
3899 target_flags &= ~MASK_FLOAT_RETURNS;
3901 /* Use external vectorized library in vectorizing intrinsics. */
3902 if (ix86_veclibabi_string)
3904 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3905 ix86_veclib_handler = ix86_veclibabi_svml;
3906 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3907 ix86_veclib_handler = ix86_veclibabi_acml;
3909 error ("unknown vectorization library ABI type (%s) for "
3910 "%sveclibabi=%s %s", ix86_veclibabi_string,
3911 prefix, suffix, sw);
3914 if ((!USE_IX86_FRAME_POINTER
3915 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3916 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3918 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3920 /* ??? Unwind info is not correct around the CFG unless either a frame
3921 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3922 unwind info generation to be aware of the CFG and propagating states
3924 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3925 || flag_exceptions || flag_non_call_exceptions)
3926 && flag_omit_frame_pointer
3927 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3929 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3930 warning (0, "unwind tables currently require either a frame pointer "
3931 "or %saccumulate-outgoing-args%s for correctness",
3933 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3936 /* If stack probes are required, the space used for large function
3937 arguments on the stack must also be probed, so enable
3938 -maccumulate-outgoing-args so this happens in the prologue. */
3939 if (TARGET_STACK_PROBE
3940 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3942 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3943 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3944 "for correctness", prefix, suffix);
3945 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3948 /* For sane SSE instruction set generation we need fcomi instruction.
3949 It is safe to enable all CMOVE instructions. */
3953 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3956 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3957 p = strchr (internal_label_prefix, 'X');
3958 internal_label_prefix_len = p - internal_label_prefix;
3962 /* When scheduling description is not available, disable scheduler pass
3963 so it won't slow down the compilation and make x87 code slower. */
3964 if (!TARGET_SCHEDULE)
3965 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3967 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3968 ix86_cost->simultaneous_prefetches,
3969 global_options.x_param_values,
3970 global_options_set.x_param_values);
3971 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3972 global_options.x_param_values,
3973 global_options_set.x_param_values);
3974 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3975 global_options.x_param_values,
3976 global_options_set.x_param_values);
3977 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3978 global_options.x_param_values,
3979 global_options_set.x_param_values);
3981 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3982 if (flag_prefetch_loop_arrays < 0
3985 && software_prefetching_beneficial_p ())
3986 flag_prefetch_loop_arrays = 1;
3988 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3989 can be optimized to ap = __builtin_next_arg (0). */
3990 if (!TARGET_64BIT && !flag_split_stack)
3991 targetm.expand_builtin_va_start = NULL;
3995 ix86_gen_leave = gen_leave_rex64;
3996 ix86_gen_add3 = gen_adddi3;
3997 ix86_gen_sub3 = gen_subdi3;
3998 ix86_gen_sub3_carry = gen_subdi3_carry;
3999 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4000 ix86_gen_monitor = gen_sse3_monitor64;
4001 ix86_gen_andsp = gen_anddi3;
4002 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4003 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4004 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4008 ix86_gen_leave = gen_leave;
4009 ix86_gen_add3 = gen_addsi3;
4010 ix86_gen_sub3 = gen_subsi3;
4011 ix86_gen_sub3_carry = gen_subsi3_carry;
4012 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4013 ix86_gen_monitor = gen_sse3_monitor;
4014 ix86_gen_andsp = gen_andsi3;
4015 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4016 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4017 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4021 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4023 target_flags |= MASK_CLD & ~target_flags_explicit;
4026 if (!TARGET_64BIT && flag_pic)
4028 if (flag_fentry > 0)
4029 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
4032 else if (TARGET_SEH)
4034 if (flag_fentry == 0)
4035 sorry ("-mno-fentry isn't compatible with SEH");
4038 else if (flag_fentry < 0)
4040 #if defined(PROFILE_BEFORE_PROLOGUE)
4047 /* Save the initial options in case the user does function specific options */
4049 target_option_default_node = target_option_current_node
4050 = build_target_option_node ();
4054 /* Enable vzeroupper pass by default for TARGET_AVX. */
4055 if (!(target_flags_explicit & MASK_VZEROUPPER))
4056 target_flags |= MASK_VZEROUPPER;
4060 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4061 target_flags &= ~MASK_VZEROUPPER;
4065 /* Return TRUE if type TYPE and mode MODE use 256bit AVX modes. */
4068 use_avx256_p (enum machine_mode mode, const_tree type)
4070 return (VALID_AVX256_REG_MODE (mode)
4072 && TREE_CODE (type) == VECTOR_TYPE
4073 && int_size_in_bytes (type) == 32));
4076 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4079 function_pass_avx256_p (const_rtx val)
4084 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4087 if (GET_CODE (val) == PARALLEL)
4092 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4094 r = XVECEXP (val, 0, i);
4095 if (GET_CODE (r) == EXPR_LIST
4097 && REG_P (XEXP (r, 0))
4098 && (GET_MODE (XEXP (r, 0)) == OImode
4099 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4107 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4110 ix86_option_override (void)
4112 ix86_option_override_internal (true);
4115 /* Update register usage after having seen the compiler flags. */
4118 ix86_conditional_register_usage (void)
4123 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4125 if (fixed_regs[i] > 1)
4126 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4127 if (call_used_regs[i] > 1)
4128 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4131 /* The PIC register, if it exists, is fixed. */
4132 j = PIC_OFFSET_TABLE_REGNUM;
4133 if (j != INVALID_REGNUM)
4134 fixed_regs[j] = call_used_regs[j] = 1;
4136 /* The MS_ABI changes the set of call-used registers. */
4137 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4139 call_used_regs[SI_REG] = 0;
4140 call_used_regs[DI_REG] = 0;
4141 call_used_regs[XMM6_REG] = 0;
4142 call_used_regs[XMM7_REG] = 0;
4143 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4144 call_used_regs[i] = 0;
4147 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4148 other call-clobbered regs for 64-bit. */
4151 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4153 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4154 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4155 && call_used_regs[i])
4156 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4159 /* If MMX is disabled, squash the registers. */
4161 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4162 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4163 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4165 /* If SSE is disabled, squash the registers. */
4167 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4168 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4169 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4171 /* If the FPU is disabled, squash the registers. */
4172 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4173 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4174 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4175 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4177 /* If 32-bit, squash the 64-bit registers. */
4180 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4182 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4188 /* Save the current options */
4191 ix86_function_specific_save (struct cl_target_option *ptr)
4193 ptr->arch = ix86_arch;
4194 ptr->schedule = ix86_schedule;
4195 ptr->tune = ix86_tune;
4196 ptr->fpmath = ix86_fpmath;
4197 ptr->branch_cost = ix86_branch_cost;
4198 ptr->tune_defaulted = ix86_tune_defaulted;
4199 ptr->arch_specified = ix86_arch_specified;
4200 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4201 ptr->ix86_target_flags_explicit = target_flags_explicit;
4203 /* The fields are char but the variables are not; make sure the
4204 values fit in the fields. */
4205 gcc_assert (ptr->arch == ix86_arch);
4206 gcc_assert (ptr->schedule == ix86_schedule);
4207 gcc_assert (ptr->tune == ix86_tune);
4208 gcc_assert (ptr->fpmath == ix86_fpmath);
4209 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4212 /* Restore the current options */
4215 ix86_function_specific_restore (struct cl_target_option *ptr)
4217 enum processor_type old_tune = ix86_tune;
4218 enum processor_type old_arch = ix86_arch;
4219 unsigned int ix86_arch_mask, ix86_tune_mask;
4222 ix86_arch = (enum processor_type) ptr->arch;
4223 ix86_schedule = (enum attr_cpu) ptr->schedule;
4224 ix86_tune = (enum processor_type) ptr->tune;
4225 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4226 ix86_branch_cost = ptr->branch_cost;
4227 ix86_tune_defaulted = ptr->tune_defaulted;
4228 ix86_arch_specified = ptr->arch_specified;
4229 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4230 target_flags_explicit = ptr->ix86_target_flags_explicit;
4232 /* Recreate the arch feature tests if the arch changed */
4233 if (old_arch != ix86_arch)
4235 ix86_arch_mask = 1u << ix86_arch;
4236 for (i = 0; i < X86_ARCH_LAST; ++i)
4237 ix86_arch_features[i]
4238 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4241 /* Recreate the tune optimization tests */
4242 if (old_tune != ix86_tune)
4244 ix86_tune_mask = 1u << ix86_tune;
4245 for (i = 0; i < X86_TUNE_LAST; ++i)
4246 ix86_tune_features[i]
4247 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4251 /* Print the current options */
4254 ix86_function_specific_print (FILE *file, int indent,
4255 struct cl_target_option *ptr)
4258 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4259 NULL, NULL, NULL, false);
4261 fprintf (file, "%*sarch = %d (%s)\n",
4264 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4265 ? cpu_names[ptr->arch]
4268 fprintf (file, "%*stune = %d (%s)\n",
4271 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4272 ? cpu_names[ptr->tune]
4275 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4276 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4277 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4278 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4282 fprintf (file, "%*s%s\n", indent, "", target_string);
4283 free (target_string);
4288 /* Inner function to process the attribute((target(...))), take an argument and
4289 set the current options from the argument. If we have a list, recursively go
4293 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4298 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4299 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4300 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4301 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4316 enum ix86_opt_type type;
4321 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4322 IX86_ATTR_ISA ("abm", OPT_mabm),
4323 IX86_ATTR_ISA ("aes", OPT_maes),
4324 IX86_ATTR_ISA ("avx", OPT_mavx),
4325 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4326 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4327 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4328 IX86_ATTR_ISA ("sse", OPT_msse),
4329 IX86_ATTR_ISA ("sse2", OPT_msse2),
4330 IX86_ATTR_ISA ("sse3", OPT_msse3),
4331 IX86_ATTR_ISA ("sse4", OPT_msse4),
4332 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4333 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4334 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4335 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4336 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4337 IX86_ATTR_ISA ("xop", OPT_mxop),
4338 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4339 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4340 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4341 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4343 /* string options */
4344 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4345 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4346 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4349 IX86_ATTR_YES ("cld",
4353 IX86_ATTR_NO ("fancy-math-387",
4354 OPT_mfancy_math_387,
4355 MASK_NO_FANCY_MATH_387),
4357 IX86_ATTR_YES ("ieee-fp",
4361 IX86_ATTR_YES ("inline-all-stringops",
4362 OPT_minline_all_stringops,
4363 MASK_INLINE_ALL_STRINGOPS),
4365 IX86_ATTR_YES ("inline-stringops-dynamically",
4366 OPT_minline_stringops_dynamically,
4367 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4369 IX86_ATTR_NO ("align-stringops",
4370 OPT_mno_align_stringops,
4371 MASK_NO_ALIGN_STRINGOPS),
4373 IX86_ATTR_YES ("recip",
4379 /* If this is a list, recurse to get the options. */
4380 if (TREE_CODE (args) == TREE_LIST)
4384 for (; args; args = TREE_CHAIN (args))
4385 if (TREE_VALUE (args)
4386 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4392 else if (TREE_CODE (args) != STRING_CST)
4395 /* Handle multiple arguments separated by commas. */
4396 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4398 while (next_optstr && *next_optstr != '\0')
4400 char *p = next_optstr;
4402 char *comma = strchr (next_optstr, ',');
4403 const char *opt_string;
4404 size_t len, opt_len;
4409 enum ix86_opt_type type = ix86_opt_unknown;
4415 len = comma - next_optstr;
4416 next_optstr = comma + 1;
4424 /* Recognize no-xxx. */
4425 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4434 /* Find the option. */
4437 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4439 type = attrs[i].type;
4440 opt_len = attrs[i].len;
4441 if (ch == attrs[i].string[0]
4442 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4443 && memcmp (p, attrs[i].string, opt_len) == 0)
4446 mask = attrs[i].mask;
4447 opt_string = attrs[i].string;
4452 /* Process the option. */
4455 error ("attribute(target(\"%s\")) is unknown", orig_p);
4459 else if (type == ix86_opt_isa)
4460 ix86_handle_option (opt, p, opt_set_p);
4462 else if (type == ix86_opt_yes || type == ix86_opt_no)
4464 if (type == ix86_opt_no)
4465 opt_set_p = !opt_set_p;
4468 target_flags |= mask;
4470 target_flags &= ~mask;
4473 else if (type == ix86_opt_str)
4477 error ("option(\"%s\") was already specified", opt_string);
4481 p_strings[opt] = xstrdup (p + opt_len);
4491 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4494 ix86_valid_target_attribute_tree (tree args)
4496 const char *orig_arch_string = ix86_arch_string;
4497 const char *orig_tune_string = ix86_tune_string;
4498 const char *orig_fpmath_string = ix86_fpmath_string;
4499 int orig_tune_defaulted = ix86_tune_defaulted;
4500 int orig_arch_specified = ix86_arch_specified;
4501 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4504 struct cl_target_option *def
4505 = TREE_TARGET_OPTION (target_option_default_node);
4507 /* Process each of the options on the chain. */
4508 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4511 /* If the changed options are different from the default, rerun
4512 ix86_option_override_internal, and then save the options away.
4513 The string options are are attribute options, and will be undone
4514 when we copy the save structure. */
4515 if (ix86_isa_flags != def->x_ix86_isa_flags
4516 || target_flags != def->x_target_flags
4517 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4518 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4519 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4521 /* If we are using the default tune= or arch=, undo the string assigned,
4522 and use the default. */
4523 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4524 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4525 else if (!orig_arch_specified)
4526 ix86_arch_string = NULL;
4528 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4529 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4530 else if (orig_tune_defaulted)
4531 ix86_tune_string = NULL;
4533 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4534 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4535 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4536 else if (!TARGET_64BIT && TARGET_SSE)
4537 ix86_fpmath_string = "sse,387";
4539 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4540 ix86_option_override_internal (false);
4542 /* Add any builtin functions with the new isa if any. */
4543 ix86_add_new_builtins (ix86_isa_flags);
4545 /* Save the current options unless we are validating options for
4547 t = build_target_option_node ();
4549 ix86_arch_string = orig_arch_string;
4550 ix86_tune_string = orig_tune_string;
4551 ix86_fpmath_string = orig_fpmath_string;
4553 /* Free up memory allocated to hold the strings */
4554 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4555 if (option_strings[i])
4556 free (option_strings[i]);
4562 /* Hook to validate attribute((target("string"))). */
4565 ix86_valid_target_attribute_p (tree fndecl,
4566 tree ARG_UNUSED (name),
4568 int ARG_UNUSED (flags))
4570 struct cl_target_option cur_target;
4572 tree old_optimize = build_optimization_node ();
4573 tree new_target, new_optimize;
4574 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4576 /* If the function changed the optimization levels as well as setting target
4577 options, start with the optimizations specified. */
4578 if (func_optimize && func_optimize != old_optimize)
4579 cl_optimization_restore (&global_options,
4580 TREE_OPTIMIZATION (func_optimize));
4582 /* The target attributes may also change some optimization flags, so update
4583 the optimization options if necessary. */
4584 cl_target_option_save (&cur_target, &global_options);
4585 new_target = ix86_valid_target_attribute_tree (args);
4586 new_optimize = build_optimization_node ();
4593 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4595 if (old_optimize != new_optimize)
4596 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4599 cl_target_option_restore (&global_options, &cur_target);
4601 if (old_optimize != new_optimize)
4602 cl_optimization_restore (&global_options,
4603 TREE_OPTIMIZATION (old_optimize));
4609 /* Hook to determine if one function can safely inline another. */
4612 ix86_can_inline_p (tree caller, tree callee)
4615 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4616 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4618 /* If callee has no option attributes, then it is ok to inline. */
4622 /* If caller has no option attributes, but callee does then it is not ok to
4624 else if (!caller_tree)
4629 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4630 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4632 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4633 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4635 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4636 != callee_opts->x_ix86_isa_flags)
4639 /* See if we have the same non-isa options. */
4640 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4643 /* See if arch, tune, etc. are the same. */
4644 else if (caller_opts->arch != callee_opts->arch)
4647 else if (caller_opts->tune != callee_opts->tune)
4650 else if (caller_opts->fpmath != callee_opts->fpmath)
4653 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4664 /* Remember the last target of ix86_set_current_function. */
4665 static GTY(()) tree ix86_previous_fndecl;
4667 /* Establish appropriate back-end context for processing the function
4668 FNDECL. The argument might be NULL to indicate processing at top
4669 level, outside of any function scope. */
4671 ix86_set_current_function (tree fndecl)
4673 /* Only change the context if the function changes. This hook is called
4674 several times in the course of compiling a function, and we don't want to
4675 slow things down too much or call target_reinit when it isn't safe. */
4676 if (fndecl && fndecl != ix86_previous_fndecl)
4678 tree old_tree = (ix86_previous_fndecl
4679 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4682 tree new_tree = (fndecl
4683 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4686 ix86_previous_fndecl = fndecl;
4687 if (old_tree == new_tree)
4692 cl_target_option_restore (&global_options,
4693 TREE_TARGET_OPTION (new_tree));
4699 struct cl_target_option *def
4700 = TREE_TARGET_OPTION (target_option_current_node);
4702 cl_target_option_restore (&global_options, def);
4709 /* Return true if this goes in large data/bss. */
4712 ix86_in_large_data_p (tree exp)
4714 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4717 /* Functions are never large data. */
4718 if (TREE_CODE (exp) == FUNCTION_DECL)
4721 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4723 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4724 if (strcmp (section, ".ldata") == 0
4725 || strcmp (section, ".lbss") == 0)
4731 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4733 /* If this is an incomplete type with size 0, then we can't put it
4734 in data because it might be too big when completed. */
4735 if (!size || size > ix86_section_threshold)
4742 /* Switch to the appropriate section for output of DECL.
4743 DECL is either a `VAR_DECL' node or a constant of some sort.
4744 RELOC indicates whether forming the initial value of DECL requires
4745 link-time relocations. */
4747 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4751 x86_64_elf_select_section (tree decl, int reloc,
4752 unsigned HOST_WIDE_INT align)
4754 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4755 && ix86_in_large_data_p (decl))
4757 const char *sname = NULL;
4758 unsigned int flags = SECTION_WRITE;
4759 switch (categorize_decl_for_section (decl, reloc))
4764 case SECCAT_DATA_REL:
4765 sname = ".ldata.rel";
4767 case SECCAT_DATA_REL_LOCAL:
4768 sname = ".ldata.rel.local";
4770 case SECCAT_DATA_REL_RO:
4771 sname = ".ldata.rel.ro";
4773 case SECCAT_DATA_REL_RO_LOCAL:
4774 sname = ".ldata.rel.ro.local";
4778 flags |= SECTION_BSS;
4781 case SECCAT_RODATA_MERGE_STR:
4782 case SECCAT_RODATA_MERGE_STR_INIT:
4783 case SECCAT_RODATA_MERGE_CONST:
4787 case SECCAT_SRODATA:
4794 /* We don't split these for medium model. Place them into
4795 default sections and hope for best. */
4800 /* We might get called with string constants, but get_named_section
4801 doesn't like them as they are not DECLs. Also, we need to set
4802 flags in that case. */
4804 return get_section (sname, flags, NULL);
4805 return get_named_section (decl, sname, reloc);
4808 return default_elf_select_section (decl, reloc, align);
4811 /* Build up a unique section name, expressed as a
4812 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4813 RELOC indicates whether the initial value of EXP requires
4814 link-time relocations. */
4816 static void ATTRIBUTE_UNUSED
4817 x86_64_elf_unique_section (tree decl, int reloc)
4819 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4820 && ix86_in_large_data_p (decl))
4822 const char *prefix = NULL;
4823 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4824 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4826 switch (categorize_decl_for_section (decl, reloc))
4829 case SECCAT_DATA_REL:
4830 case SECCAT_DATA_REL_LOCAL:
4831 case SECCAT_DATA_REL_RO:
4832 case SECCAT_DATA_REL_RO_LOCAL:
4833 prefix = one_only ? ".ld" : ".ldata";
4836 prefix = one_only ? ".lb" : ".lbss";
4839 case SECCAT_RODATA_MERGE_STR:
4840 case SECCAT_RODATA_MERGE_STR_INIT:
4841 case SECCAT_RODATA_MERGE_CONST:
4842 prefix = one_only ? ".lr" : ".lrodata";
4844 case SECCAT_SRODATA:
4851 /* We don't split these for medium model. Place them into
4852 default sections and hope for best. */
4857 const char *name, *linkonce;
4860 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4861 name = targetm.strip_name_encoding (name);
4863 /* If we're using one_only, then there needs to be a .gnu.linkonce
4864 prefix to the section name. */
4865 linkonce = one_only ? ".gnu.linkonce" : "";
4867 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4869 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4873 default_unique_section (decl, reloc);
4876 #ifdef COMMON_ASM_OP
4877 /* This says how to output assembler code to declare an
4878 uninitialized external linkage data object.
4880 For medium model x86-64 we need to use .largecomm opcode for
4883 x86_elf_aligned_common (FILE *file,
4884 const char *name, unsigned HOST_WIDE_INT size,
4887 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4888 && size > (unsigned int)ix86_section_threshold)
4889 fputs (".largecomm\t", file);
4891 fputs (COMMON_ASM_OP, file);
4892 assemble_name (file, name);
4893 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4894 size, align / BITS_PER_UNIT);
4898 /* Utility function for targets to use in implementing
4899 ASM_OUTPUT_ALIGNED_BSS. */
4902 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4903 const char *name, unsigned HOST_WIDE_INT size,
4906 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4907 && size > (unsigned int)ix86_section_threshold)
4908 switch_to_section (get_named_section (decl, ".lbss", 0));
4910 switch_to_section (bss_section);
4911 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4912 #ifdef ASM_DECLARE_OBJECT_NAME
4913 last_assemble_variable_decl = decl;
4914 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4916 /* Standard thing is just output label for the object. */
4917 ASM_OUTPUT_LABEL (file, name);
4918 #endif /* ASM_DECLARE_OBJECT_NAME */
4919 ASM_OUTPUT_SKIP (file, size ? size : 1);
4922 static const struct default_options ix86_option_optimization_table[] =
4924 /* Turn off -fschedule-insns by default. It tends to make the
4925 problem with not enough registers even worse. */
4926 #ifdef INSN_SCHEDULING
4927 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
4930 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4931 SUBTARGET_OPTIMIZATION_OPTIONS,
4933 { OPT_LEVELS_NONE, 0, NULL, 0 }
4936 /* Implement TARGET_OPTION_INIT_STRUCT. */
4939 ix86_option_init_struct (struct gcc_options *opts)
4942 /* The Darwin libraries never set errno, so we might as well
4943 avoid calling them when that's the only reason we would. */
4944 opts->x_flag_errno_math = 0;
4946 opts->x_flag_pcc_struct_return = 2;
4947 opts->x_flag_asynchronous_unwind_tables = 2;
4948 opts->x_flag_vect_cost_model = 1;
4951 /* Decide whether we must probe the stack before any space allocation
4952 on this target. It's essentially TARGET_STACK_PROBE except when
4953 -fstack-check causes the stack to be already probed differently. */
4956 ix86_target_stack_probe (void)
4958 /* Do not probe the stack twice if static stack checking is enabled. */
4959 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4962 return TARGET_STACK_PROBE;
4965 /* Decide whether we can make a sibling call to a function. DECL is the
4966 declaration of the function being targeted by the call and EXP is the
4967 CALL_EXPR representing the call. */
4970 ix86_function_ok_for_sibcall (tree decl, tree exp)
4972 tree type, decl_or_type;
4975 /* If we are generating position-independent code, we cannot sibcall
4976 optimize any indirect call, or a direct call to a global function,
4977 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4981 && (!decl || !targetm.binds_local_p (decl)))
4984 /* If we need to align the outgoing stack, then sibcalling would
4985 unalign the stack, which may break the called function. */
4986 if (ix86_minimum_incoming_stack_boundary (true)
4987 < PREFERRED_STACK_BOUNDARY)
4992 decl_or_type = decl;
4993 type = TREE_TYPE (decl);
4997 /* We're looking at the CALL_EXPR, we need the type of the function. */
4998 type = CALL_EXPR_FN (exp); /* pointer expression */
4999 type = TREE_TYPE (type); /* pointer type */
5000 type = TREE_TYPE (type); /* function type */
5001 decl_or_type = type;
5004 /* Check that the return value locations are the same. Like
5005 if we are returning floats on the 80387 register stack, we cannot
5006 make a sibcall from a function that doesn't return a float to a
5007 function that does or, conversely, from a function that does return
5008 a float to a function that doesn't; the necessary stack adjustment
5009 would not be executed. This is also the place we notice
5010 differences in the return value ABI. Note that it is ok for one
5011 of the functions to have void return type as long as the return
5012 value of the other is passed in a register. */
5013 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5014 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5016 if (STACK_REG_P (a) || STACK_REG_P (b))
5018 if (!rtx_equal_p (a, b))
5021 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5023 /* Disable sibcall if we need to generate vzeroupper after
5025 if (TARGET_VZEROUPPER
5026 && cfun->machine->callee_return_avx256_p
5027 && !cfun->machine->caller_return_avx256_p)
5030 else if (!rtx_equal_p (a, b))
5035 /* The SYSV ABI has more call-clobbered registers;
5036 disallow sibcalls from MS to SYSV. */
5037 if (cfun->machine->call_abi == MS_ABI
5038 && ix86_function_type_abi (type) == SYSV_ABI)
5043 /* If this call is indirect, we'll need to be able to use a
5044 call-clobbered register for the address of the target function.
5045 Make sure that all such registers are not used for passing
5046 parameters. Note that DLLIMPORT functions are indirect. */
5048 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5050 if (ix86_function_regparm (type, NULL) >= 3)
5052 /* ??? Need to count the actual number of registers to be used,
5053 not the possible number of registers. Fix later. */
5059 /* Otherwise okay. That also includes certain types of indirect calls. */
5063 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5064 and "sseregparm" calling convention attributes;
5065 arguments as in struct attribute_spec.handler. */
5068 ix86_handle_cconv_attribute (tree *node, tree name,
5070 int flags ATTRIBUTE_UNUSED,
5073 if (TREE_CODE (*node) != FUNCTION_TYPE
5074 && TREE_CODE (*node) != METHOD_TYPE
5075 && TREE_CODE (*node) != FIELD_DECL
5076 && TREE_CODE (*node) != TYPE_DECL)
5078 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5080 *no_add_attrs = true;
5084 /* Can combine regparm with all attributes but fastcall. */
5085 if (is_attribute_p ("regparm", name))
5089 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5091 error ("fastcall and regparm attributes are not compatible");
5094 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5096 error ("regparam and thiscall attributes are not compatible");
5099 cst = TREE_VALUE (args);
5100 if (TREE_CODE (cst) != INTEGER_CST)
5102 warning (OPT_Wattributes,
5103 "%qE attribute requires an integer constant argument",
5105 *no_add_attrs = true;
5107 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5109 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5111 *no_add_attrs = true;
5119 /* Do not warn when emulating the MS ABI. */
5120 if ((TREE_CODE (*node) != FUNCTION_TYPE
5121 && TREE_CODE (*node) != METHOD_TYPE)
5122 || ix86_function_type_abi (*node) != MS_ABI)
5123 warning (OPT_Wattributes, "%qE attribute ignored",
5125 *no_add_attrs = true;
5129 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5130 if (is_attribute_p ("fastcall", name))
5132 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5134 error ("fastcall and cdecl attributes are not compatible");
5136 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5138 error ("fastcall and stdcall attributes are not compatible");
5140 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5142 error ("fastcall and regparm attributes are not compatible");
5144 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5146 error ("fastcall and thiscall attributes are not compatible");
5150 /* Can combine stdcall with fastcall (redundant), regparm and
5152 else if (is_attribute_p ("stdcall", name))
5154 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5156 error ("stdcall and cdecl attributes are not compatible");
5158 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5160 error ("stdcall and fastcall attributes are not compatible");
5162 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5164 error ("stdcall and thiscall attributes are not compatible");
5168 /* Can combine cdecl with regparm and sseregparm. */
5169 else if (is_attribute_p ("cdecl", name))
5171 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5173 error ("stdcall and cdecl attributes are not compatible");
5175 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5177 error ("fastcall and cdecl attributes are not compatible");
5179 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5181 error ("cdecl and thiscall attributes are not compatible");
5184 else if (is_attribute_p ("thiscall", name))
5186 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5187 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5189 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5191 error ("stdcall and thiscall attributes are not compatible");
5193 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5195 error ("fastcall and thiscall attributes are not compatible");
5197 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5199 error ("cdecl and thiscall attributes are not compatible");
5203 /* Can combine sseregparm with all attributes. */
5208 /* Return 0 if the attributes for two types are incompatible, 1 if they
5209 are compatible, and 2 if they are nearly compatible (which causes a
5210 warning to be generated). */
5213 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5215 /* Check for mismatch of non-default calling convention. */
5216 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5218 if (TREE_CODE (type1) != FUNCTION_TYPE
5219 && TREE_CODE (type1) != METHOD_TYPE)
5222 /* Check for mismatched fastcall/regparm types. */
5223 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5224 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5225 || (ix86_function_regparm (type1, NULL)
5226 != ix86_function_regparm (type2, NULL)))
5229 /* Check for mismatched sseregparm types. */
5230 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5231 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5234 /* Check for mismatched thiscall types. */
5235 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5236 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5239 /* Check for mismatched return types (cdecl vs stdcall). */
5240 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5241 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5247 /* Return the regparm value for a function with the indicated TYPE and DECL.
5248 DECL may be NULL when calling function indirectly
5249 or considering a libcall. */
5252 ix86_function_regparm (const_tree type, const_tree decl)
5258 return (ix86_function_type_abi (type) == SYSV_ABI
5259 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5261 regparm = ix86_regparm;
5262 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5265 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5269 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5272 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5275 /* Use register calling convention for local functions when possible. */
5277 && TREE_CODE (decl) == FUNCTION_DECL
5279 && !(profile_flag && !flag_fentry))
5281 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5282 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5285 int local_regparm, globals = 0, regno;
5287 /* Make sure no regparm register is taken by a
5288 fixed register variable. */
5289 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5290 if (fixed_regs[local_regparm])
5293 /* We don't want to use regparm(3) for nested functions as
5294 these use a static chain pointer in the third argument. */
5295 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5298 /* In 32-bit mode save a register for the split stack. */
5299 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5302 /* Each fixed register usage increases register pressure,
5303 so less registers should be used for argument passing.
5304 This functionality can be overriden by an explicit
5306 for (regno = 0; regno <= DI_REG; regno++)
5307 if (fixed_regs[regno])
5311 = globals < local_regparm ? local_regparm - globals : 0;
5313 if (local_regparm > regparm)
5314 regparm = local_regparm;
5321 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5322 DFmode (2) arguments in SSE registers for a function with the
5323 indicated TYPE and DECL. DECL may be NULL when calling function
5324 indirectly or considering a libcall. Otherwise return 0. */
5327 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5329 gcc_assert (!TARGET_64BIT);
5331 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5332 by the sseregparm attribute. */
5333 if (TARGET_SSEREGPARM
5334 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5341 error ("Calling %qD with attribute sseregparm without "
5342 "SSE/SSE2 enabled", decl);
5344 error ("Calling %qT with attribute sseregparm without "
5345 "SSE/SSE2 enabled", type);
5353 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5354 (and DFmode for SSE2) arguments in SSE registers. */
5355 if (decl && TARGET_SSE_MATH && optimize
5356 && !(profile_flag && !flag_fentry))
5358 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5359 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5361 return TARGET_SSE2 ? 2 : 1;
5367 /* Return true if EAX is live at the start of the function. Used by
5368 ix86_expand_prologue to determine if we need special help before
5369 calling allocate_stack_worker. */
5372 ix86_eax_live_at_start_p (void)
5374 /* Cheat. Don't bother working forward from ix86_function_regparm
5375 to the function type to whether an actual argument is located in
5376 eax. Instead just look at cfg info, which is still close enough
5377 to correct at this point. This gives false positives for broken
5378 functions that might use uninitialized data that happens to be
5379 allocated in eax, but who cares? */
5380 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5383 /* Value is the number of bytes of arguments automatically
5384 popped when returning from a subroutine call.
5385 FUNDECL is the declaration node of the function (as a tree),
5386 FUNTYPE is the data type of the function (as a tree),
5387 or for a library call it is an identifier node for the subroutine name.
5388 SIZE is the number of bytes of arguments passed on the stack.
5390 On the 80386, the RTD insn may be used to pop them if the number
5391 of args is fixed, but if the number is variable then the caller
5392 must pop them all. RTD can't be used for library calls now
5393 because the library is compiled with the Unix compiler.
5394 Use of RTD is a selectable option, since it is incompatible with
5395 standard Unix calling sequences. If the option is not selected,
5396 the caller must always pop the args.
5398 The attribute stdcall is equivalent to RTD on a per module basis. */
5401 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5405 /* None of the 64-bit ABIs pop arguments. */
5409 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5411 /* Cdecl functions override -mrtd, and never pop the stack. */
5412 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5414 /* Stdcall and fastcall functions will pop the stack if not
5416 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5417 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5418 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5421 if (rtd && ! stdarg_p (funtype))
5425 /* Lose any fake structure return argument if it is passed on the stack. */
5426 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5427 && !KEEP_AGGREGATE_RETURN_POINTER)
5429 int nregs = ix86_function_regparm (funtype, fundecl);
5431 return GET_MODE_SIZE (Pmode);
5437 /* Argument support functions. */
5439 /* Return true when register may be used to pass function parameters. */
5441 ix86_function_arg_regno_p (int regno)
5444 const int *parm_regs;
5449 return (regno < REGPARM_MAX
5450 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5452 return (regno < REGPARM_MAX
5453 || (TARGET_MMX && MMX_REGNO_P (regno)
5454 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5455 || (TARGET_SSE && SSE_REGNO_P (regno)
5456 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5461 if (SSE_REGNO_P (regno) && TARGET_SSE)
5466 if (TARGET_SSE && SSE_REGNO_P (regno)
5467 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5471 /* TODO: The function should depend on current function ABI but
5472 builtins.c would need updating then. Therefore we use the
5475 /* RAX is used as hidden argument to va_arg functions. */
5476 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5479 if (ix86_abi == MS_ABI)
5480 parm_regs = x86_64_ms_abi_int_parameter_registers;
5482 parm_regs = x86_64_int_parameter_registers;
5483 for (i = 0; i < (ix86_abi == MS_ABI
5484 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5485 if (regno == parm_regs[i])
5490 /* Return if we do not know how to pass TYPE solely in registers. */
5493 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5495 if (must_pass_in_stack_var_size_or_pad (mode, type))
5498 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5499 The layout_type routine is crafty and tries to trick us into passing
5500 currently unsupported vector types on the stack by using TImode. */
5501 return (!TARGET_64BIT && mode == TImode
5502 && type && TREE_CODE (type) != VECTOR_TYPE);
5505 /* It returns the size, in bytes, of the area reserved for arguments passed
5506 in registers for the function represented by fndecl dependent to the used
5509 ix86_reg_parm_stack_space (const_tree fndecl)
5511 enum calling_abi call_abi = SYSV_ABI;
5512 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5513 call_abi = ix86_function_abi (fndecl);
5515 call_abi = ix86_function_type_abi (fndecl);
5516 if (call_abi == MS_ABI)
5521 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5524 ix86_function_type_abi (const_tree fntype)
5526 if (TARGET_64BIT && fntype != NULL)
5528 enum calling_abi abi = ix86_abi;
5529 if (abi == SYSV_ABI)
5531 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5534 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5542 ix86_function_ms_hook_prologue (const_tree fn)
5544 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5546 if (decl_function_context (fn) != NULL_TREE)
5547 error_at (DECL_SOURCE_LOCATION (fn),
5548 "ms_hook_prologue is not compatible with nested function");
5555 static enum calling_abi
5556 ix86_function_abi (const_tree fndecl)
5560 return ix86_function_type_abi (TREE_TYPE (fndecl));
5563 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5566 ix86_cfun_abi (void)
5568 if (! cfun || ! TARGET_64BIT)
5570 return cfun->machine->call_abi;
5573 /* Write the extra assembler code needed to declare a function properly. */
5576 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5579 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5583 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5584 unsigned int filler_cc = 0xcccccccc;
5586 for (i = 0; i < filler_count; i += 4)
5587 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5590 #ifdef SUBTARGET_ASM_UNWIND_INIT
5591 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5594 ASM_OUTPUT_LABEL (asm_out_file, fname);
5596 /* Output magic byte marker, if hot-patch attribute is set. */
5601 /* leaq [%rsp + 0], %rsp */
5602 asm_fprintf (asm_out_file, ASM_BYTE
5603 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5607 /* movl.s %edi, %edi
5609 movl.s %esp, %ebp */
5610 asm_fprintf (asm_out_file, ASM_BYTE
5611 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5617 extern void init_regs (void);
5619 /* Implementation of call abi switching target hook. Specific to FNDECL
5620 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5621 for more details. */
5623 ix86_call_abi_override (const_tree fndecl)
5625 if (fndecl == NULL_TREE)
5626 cfun->machine->call_abi = ix86_abi;
5628 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5631 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5632 re-initialization of init_regs each time we switch function context since
5633 this is needed only during RTL expansion. */
5635 ix86_maybe_switch_abi (void)
5638 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5642 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5643 for a call to a function whose data type is FNTYPE.
5644 For a library call, FNTYPE is 0. */
5647 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5648 tree fntype, /* tree ptr for function decl */
5649 rtx libname, /* SYMBOL_REF of library name or 0 */
5653 struct cgraph_local_info *i;
5656 memset (cum, 0, sizeof (*cum));
5658 /* Initialize for the current callee. */
5661 cfun->machine->callee_pass_avx256_p = false;
5662 cfun->machine->callee_return_avx256_p = false;
5667 i = cgraph_local_info (fndecl);
5668 cum->call_abi = ix86_function_abi (fndecl);
5669 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5674 cum->call_abi = ix86_function_type_abi (fntype);
5676 fnret_type = TREE_TYPE (fntype);
5681 if (TARGET_VZEROUPPER && fnret_type)
5683 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5685 if (function_pass_avx256_p (fnret_value))
5687 /* The return value of this function uses 256bit AVX modes. */
5688 cfun->machine->use_avx256_p = true;
5690 cfun->machine->callee_return_avx256_p = true;
5692 cfun->machine->caller_return_avx256_p = true;
5696 cum->caller = caller;
5698 /* Set up the number of registers to use for passing arguments. */
5700 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5701 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5702 "or subtarget optimization implying it");
5703 cum->nregs = ix86_regparm;
5706 cum->nregs = (cum->call_abi == SYSV_ABI
5707 ? X86_64_REGPARM_MAX
5708 : X86_64_MS_REGPARM_MAX);
5712 cum->sse_nregs = SSE_REGPARM_MAX;
5715 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5716 ? X86_64_SSE_REGPARM_MAX
5717 : X86_64_MS_SSE_REGPARM_MAX);
5721 cum->mmx_nregs = MMX_REGPARM_MAX;
5722 cum->warn_avx = true;
5723 cum->warn_sse = true;
5724 cum->warn_mmx = true;
5726 /* Because type might mismatch in between caller and callee, we need to
5727 use actual type of function for local calls.
5728 FIXME: cgraph_analyze can be told to actually record if function uses
5729 va_start so for local functions maybe_vaarg can be made aggressive
5731 FIXME: once typesytem is fixed, we won't need this code anymore. */
5733 fntype = TREE_TYPE (fndecl);
5734 cum->maybe_vaarg = (fntype
5735 ? (!prototype_p (fntype) || stdarg_p (fntype))
5740 /* If there are variable arguments, then we won't pass anything
5741 in registers in 32-bit mode. */
5742 if (stdarg_p (fntype))
5753 /* Use ecx and edx registers if function has fastcall attribute,
5754 else look for regparm information. */
5757 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5760 cum->fastcall = 1; /* Same first register as in fastcall. */
5762 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5768 cum->nregs = ix86_function_regparm (fntype, fndecl);
5771 /* Set up the number of SSE registers used for passing SFmode
5772 and DFmode arguments. Warn for mismatching ABI. */
5773 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5777 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5778 But in the case of vector types, it is some vector mode.
5780 When we have only some of our vector isa extensions enabled, then there
5781 are some modes for which vector_mode_supported_p is false. For these
5782 modes, the generic vector support in gcc will choose some non-vector mode
5783 in order to implement the type. By computing the natural mode, we'll
5784 select the proper ABI location for the operand and not depend on whatever
5785 the middle-end decides to do with these vector types.
5787 The midde-end can't deal with the vector types > 16 bytes. In this
5788 case, we return the original mode and warn ABI change if CUM isn't
5791 static enum machine_mode
5792 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5794 enum machine_mode mode = TYPE_MODE (type);
5796 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5798 HOST_WIDE_INT size = int_size_in_bytes (type);
5799 if ((size == 8 || size == 16 || size == 32)
5800 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5801 && TYPE_VECTOR_SUBPARTS (type) > 1)
5803 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5805 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5806 mode = MIN_MODE_VECTOR_FLOAT;
5808 mode = MIN_MODE_VECTOR_INT;
5810 /* Get the mode which has this inner mode and number of units. */
5811 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5812 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5813 && GET_MODE_INNER (mode) == innermode)
5815 if (size == 32 && !TARGET_AVX)
5817 static bool warnedavx;
5824 warning (0, "AVX vector argument without AVX "
5825 "enabled changes the ABI");
5827 return TYPE_MODE (type);
5840 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5841 this may not agree with the mode that the type system has chosen for the
5842 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5843 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5846 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5851 if (orig_mode != BLKmode)
5852 tmp = gen_rtx_REG (orig_mode, regno);
5855 tmp = gen_rtx_REG (mode, regno);
5856 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5857 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5863 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5864 of this code is to classify each 8bytes of incoming argument by the register
5865 class and assign registers accordingly. */
5867 /* Return the union class of CLASS1 and CLASS2.
5868 See the x86-64 PS ABI for details. */
5870 static enum x86_64_reg_class
5871 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5873 /* Rule #1: If both classes are equal, this is the resulting class. */
5874 if (class1 == class2)
5877 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5879 if (class1 == X86_64_NO_CLASS)
5881 if (class2 == X86_64_NO_CLASS)
5884 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5885 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5886 return X86_64_MEMORY_CLASS;
5888 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5889 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5890 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5891 return X86_64_INTEGERSI_CLASS;
5892 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5893 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5894 return X86_64_INTEGER_CLASS;
5896 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5898 if (class1 == X86_64_X87_CLASS
5899 || class1 == X86_64_X87UP_CLASS
5900 || class1 == X86_64_COMPLEX_X87_CLASS
5901 || class2 == X86_64_X87_CLASS
5902 || class2 == X86_64_X87UP_CLASS
5903 || class2 == X86_64_COMPLEX_X87_CLASS)
5904 return X86_64_MEMORY_CLASS;
5906 /* Rule #6: Otherwise class SSE is used. */
5907 return X86_64_SSE_CLASS;
5910 /* Classify the argument of type TYPE and mode MODE.
5911 CLASSES will be filled by the register class used to pass each word
5912 of the operand. The number of words is returned. In case the parameter
5913 should be passed in memory, 0 is returned. As a special case for zero
5914 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5916 BIT_OFFSET is used internally for handling records and specifies offset
5917 of the offset in bits modulo 256 to avoid overflow cases.
5919 See the x86-64 PS ABI for details.
5923 classify_argument (enum machine_mode mode, const_tree type,
5924 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5926 HOST_WIDE_INT bytes =
5927 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5928 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5930 /* Variable sized entities are always passed/returned in memory. */
5934 if (mode != VOIDmode
5935 && targetm.calls.must_pass_in_stack (mode, type))
5938 if (type && AGGREGATE_TYPE_P (type))
5942 enum x86_64_reg_class subclasses[MAX_CLASSES];
5944 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5948 for (i = 0; i < words; i++)
5949 classes[i] = X86_64_NO_CLASS;
5951 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5952 signalize memory class, so handle it as special case. */
5955 classes[0] = X86_64_NO_CLASS;
5959 /* Classify each field of record and merge classes. */
5960 switch (TREE_CODE (type))
5963 /* And now merge the fields of structure. */
5964 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5966 if (TREE_CODE (field) == FIELD_DECL)
5970 if (TREE_TYPE (field) == error_mark_node)
5973 /* Bitfields are always classified as integer. Handle them
5974 early, since later code would consider them to be
5975 misaligned integers. */
5976 if (DECL_BIT_FIELD (field))
5978 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5979 i < ((int_bit_position (field) + (bit_offset % 64))
5980 + tree_low_cst (DECL_SIZE (field), 0)
5983 merge_classes (X86_64_INTEGER_CLASS,
5990 type = TREE_TYPE (field);
5992 /* Flexible array member is ignored. */
5993 if (TYPE_MODE (type) == BLKmode
5994 && TREE_CODE (type) == ARRAY_TYPE
5995 && TYPE_SIZE (type) == NULL_TREE
5996 && TYPE_DOMAIN (type) != NULL_TREE
5997 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6002 if (!warned && warn_psabi)
6005 inform (input_location,
6006 "The ABI of passing struct with"
6007 " a flexible array member has"
6008 " changed in GCC 4.4");
6012 num = classify_argument (TYPE_MODE (type), type,
6014 (int_bit_position (field)
6015 + bit_offset) % 256);
6018 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
6019 for (i = 0; i < num && (i + pos) < words; i++)
6021 merge_classes (subclasses[i], classes[i + pos]);
6028 /* Arrays are handled as small records. */
6031 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6032 TREE_TYPE (type), subclasses, bit_offset);
6036 /* The partial classes are now full classes. */
6037 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6038 subclasses[0] = X86_64_SSE_CLASS;
6039 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6040 && !((bit_offset % 64) == 0 && bytes == 4))
6041 subclasses[0] = X86_64_INTEGER_CLASS;
6043 for (i = 0; i < words; i++)
6044 classes[i] = subclasses[i % num];
6049 case QUAL_UNION_TYPE:
6050 /* Unions are similar to RECORD_TYPE but offset is always 0.
6052 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6054 if (TREE_CODE (field) == FIELD_DECL)
6058 if (TREE_TYPE (field) == error_mark_node)
6061 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6062 TREE_TYPE (field), subclasses,
6066 for (i = 0; i < num; i++)
6067 classes[i] = merge_classes (subclasses[i], classes[i]);
6078 /* When size > 16 bytes, if the first one isn't
6079 X86_64_SSE_CLASS or any other ones aren't
6080 X86_64_SSEUP_CLASS, everything should be passed in
6082 if (classes[0] != X86_64_SSE_CLASS)
6085 for (i = 1; i < words; i++)
6086 if (classes[i] != X86_64_SSEUP_CLASS)
6090 /* Final merger cleanup. */
6091 for (i = 0; i < words; i++)
6093 /* If one class is MEMORY, everything should be passed in
6095 if (classes[i] == X86_64_MEMORY_CLASS)
6098 /* The X86_64_SSEUP_CLASS should be always preceded by
6099 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6100 if (classes[i] == X86_64_SSEUP_CLASS
6101 && classes[i - 1] != X86_64_SSE_CLASS
6102 && classes[i - 1] != X86_64_SSEUP_CLASS)
6104 /* The first one should never be X86_64_SSEUP_CLASS. */
6105 gcc_assert (i != 0);
6106 classes[i] = X86_64_SSE_CLASS;
6109 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6110 everything should be passed in memory. */
6111 if (classes[i] == X86_64_X87UP_CLASS
6112 && (classes[i - 1] != X86_64_X87_CLASS))
6116 /* The first one should never be X86_64_X87UP_CLASS. */
6117 gcc_assert (i != 0);
6118 if (!warned && warn_psabi)
6121 inform (input_location,
6122 "The ABI of passing union with long double"
6123 " has changed in GCC 4.4");
6131 /* Compute alignment needed. We align all types to natural boundaries with
6132 exception of XFmode that is aligned to 64bits. */
6133 if (mode != VOIDmode && mode != BLKmode)
6135 int mode_alignment = GET_MODE_BITSIZE (mode);
6138 mode_alignment = 128;
6139 else if (mode == XCmode)
6140 mode_alignment = 256;
6141 if (COMPLEX_MODE_P (mode))
6142 mode_alignment /= 2;
6143 /* Misaligned fields are always returned in memory. */
6144 if (bit_offset % mode_alignment)
6148 /* for V1xx modes, just use the base mode */
6149 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6150 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6151 mode = GET_MODE_INNER (mode);
6153 /* Classification of atomic types. */
6158 classes[0] = X86_64_SSE_CLASS;
6161 classes[0] = X86_64_SSE_CLASS;
6162 classes[1] = X86_64_SSEUP_CLASS;
6172 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6176 classes[0] = X86_64_INTEGERSI_CLASS;
6179 else if (size <= 64)
6181 classes[0] = X86_64_INTEGER_CLASS;
6184 else if (size <= 64+32)
6186 classes[0] = X86_64_INTEGER_CLASS;
6187 classes[1] = X86_64_INTEGERSI_CLASS;
6190 else if (size <= 64+64)
6192 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6200 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6204 /* OImode shouldn't be used directly. */
6209 if (!(bit_offset % 64))
6210 classes[0] = X86_64_SSESF_CLASS;
6212 classes[0] = X86_64_SSE_CLASS;
6215 classes[0] = X86_64_SSEDF_CLASS;
6218 classes[0] = X86_64_X87_CLASS;
6219 classes[1] = X86_64_X87UP_CLASS;
6222 classes[0] = X86_64_SSE_CLASS;
6223 classes[1] = X86_64_SSEUP_CLASS;
6226 classes[0] = X86_64_SSE_CLASS;
6227 if (!(bit_offset % 64))
6233 if (!warned && warn_psabi)
6236 inform (input_location,
6237 "The ABI of passing structure with complex float"
6238 " member has changed in GCC 4.4");
6240 classes[1] = X86_64_SSESF_CLASS;
6244 classes[0] = X86_64_SSEDF_CLASS;
6245 classes[1] = X86_64_SSEDF_CLASS;
6248 classes[0] = X86_64_COMPLEX_X87_CLASS;
6251 /* This modes is larger than 16 bytes. */
6259 classes[0] = X86_64_SSE_CLASS;
6260 classes[1] = X86_64_SSEUP_CLASS;
6261 classes[2] = X86_64_SSEUP_CLASS;
6262 classes[3] = X86_64_SSEUP_CLASS;
6270 classes[0] = X86_64_SSE_CLASS;
6271 classes[1] = X86_64_SSEUP_CLASS;
6279 classes[0] = X86_64_SSE_CLASS;
6285 gcc_assert (VECTOR_MODE_P (mode));
6290 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6292 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6293 classes[0] = X86_64_INTEGERSI_CLASS;
6295 classes[0] = X86_64_INTEGER_CLASS;
6296 classes[1] = X86_64_INTEGER_CLASS;
6297 return 1 + (bytes > 8);
6301 /* Examine the argument and return set number of register required in each
6302 class. Return 0 iff parameter should be passed in memory. */
6304 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6305 int *int_nregs, int *sse_nregs)
6307 enum x86_64_reg_class regclass[MAX_CLASSES];
6308 int n = classify_argument (mode, type, regclass, 0);
6314 for (n--; n >= 0; n--)
6315 switch (regclass[n])
6317 case X86_64_INTEGER_CLASS:
6318 case X86_64_INTEGERSI_CLASS:
6321 case X86_64_SSE_CLASS:
6322 case X86_64_SSESF_CLASS:
6323 case X86_64_SSEDF_CLASS:
6326 case X86_64_NO_CLASS:
6327 case X86_64_SSEUP_CLASS:
6329 case X86_64_X87_CLASS:
6330 case X86_64_X87UP_CLASS:
6334 case X86_64_COMPLEX_X87_CLASS:
6335 return in_return ? 2 : 0;
6336 case X86_64_MEMORY_CLASS:
6342 /* Construct container for the argument used by GCC interface. See
6343 FUNCTION_ARG for the detailed description. */
6346 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6347 const_tree type, int in_return, int nintregs, int nsseregs,
6348 const int *intreg, int sse_regno)
6350 /* The following variables hold the static issued_error state. */
6351 static bool issued_sse_arg_error;
6352 static bool issued_sse_ret_error;
6353 static bool issued_x87_ret_error;
6355 enum machine_mode tmpmode;
6357 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6358 enum x86_64_reg_class regclass[MAX_CLASSES];
6362 int needed_sseregs, needed_intregs;
6363 rtx exp[MAX_CLASSES];
6366 n = classify_argument (mode, type, regclass, 0);
6369 if (!examine_argument (mode, type, in_return, &needed_intregs,
6372 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6375 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6376 some less clueful developer tries to use floating-point anyway. */
6377 if (needed_sseregs && !TARGET_SSE)
6381 if (!issued_sse_ret_error)
6383 error ("SSE register return with SSE disabled");
6384 issued_sse_ret_error = true;
6387 else if (!issued_sse_arg_error)
6389 error ("SSE register argument with SSE disabled");
6390 issued_sse_arg_error = true;
6395 /* Likewise, error if the ABI requires us to return values in the
6396 x87 registers and the user specified -mno-80387. */
6397 if (!TARGET_80387 && in_return)
6398 for (i = 0; i < n; i++)
6399 if (regclass[i] == X86_64_X87_CLASS
6400 || regclass[i] == X86_64_X87UP_CLASS
6401 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6403 if (!issued_x87_ret_error)
6405 error ("x87 register return with x87 disabled");
6406 issued_x87_ret_error = true;
6411 /* First construct simple cases. Avoid SCmode, since we want to use
6412 single register to pass this type. */
6413 if (n == 1 && mode != SCmode)
6414 switch (regclass[0])
6416 case X86_64_INTEGER_CLASS:
6417 case X86_64_INTEGERSI_CLASS:
6418 return gen_rtx_REG (mode, intreg[0]);
6419 case X86_64_SSE_CLASS:
6420 case X86_64_SSESF_CLASS:
6421 case X86_64_SSEDF_CLASS:
6422 if (mode != BLKmode)
6423 return gen_reg_or_parallel (mode, orig_mode,
6424 SSE_REGNO (sse_regno));
6426 case X86_64_X87_CLASS:
6427 case X86_64_COMPLEX_X87_CLASS:
6428 return gen_rtx_REG (mode, FIRST_STACK_REG);
6429 case X86_64_NO_CLASS:
6430 /* Zero sized array, struct or class. */
6435 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6436 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6437 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6439 && regclass[0] == X86_64_SSE_CLASS
6440 && regclass[1] == X86_64_SSEUP_CLASS
6441 && regclass[2] == X86_64_SSEUP_CLASS
6442 && regclass[3] == X86_64_SSEUP_CLASS
6444 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6447 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6448 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6449 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6450 && regclass[1] == X86_64_INTEGER_CLASS
6451 && (mode == CDImode || mode == TImode || mode == TFmode)
6452 && intreg[0] + 1 == intreg[1])
6453 return gen_rtx_REG (mode, intreg[0]);
6455 /* Otherwise figure out the entries of the PARALLEL. */
6456 for (i = 0; i < n; i++)
6460 switch (regclass[i])
6462 case X86_64_NO_CLASS:
6464 case X86_64_INTEGER_CLASS:
6465 case X86_64_INTEGERSI_CLASS:
6466 /* Merge TImodes on aligned occasions here too. */
6467 if (i * 8 + 8 > bytes)
6468 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6469 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6473 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6474 if (tmpmode == BLKmode)
6476 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6477 gen_rtx_REG (tmpmode, *intreg),
6481 case X86_64_SSESF_CLASS:
6482 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6483 gen_rtx_REG (SFmode,
6484 SSE_REGNO (sse_regno)),
6488 case X86_64_SSEDF_CLASS:
6489 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6490 gen_rtx_REG (DFmode,
6491 SSE_REGNO (sse_regno)),
6495 case X86_64_SSE_CLASS:
6503 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6513 && regclass[1] == X86_64_SSEUP_CLASS
6514 && regclass[2] == X86_64_SSEUP_CLASS
6515 && regclass[3] == X86_64_SSEUP_CLASS);
6522 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6523 gen_rtx_REG (tmpmode,
6524 SSE_REGNO (sse_regno)),
6533 /* Empty aligned struct, union or class. */
6537 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6538 for (i = 0; i < nexps; i++)
6539 XVECEXP (ret, 0, i) = exp [i];
6543 /* Update the data in CUM to advance over an argument of mode MODE
6544 and data type TYPE. (TYPE is null for libcalls where that information
6545 may not be available.) */
6548 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6549 const_tree type, HOST_WIDE_INT bytes,
6550 HOST_WIDE_INT words)
6566 cum->words += words;
6567 cum->nregs -= words;
6568 cum->regno += words;
6570 if (cum->nregs <= 0)
6578 /* OImode shouldn't be used directly. */
6582 if (cum->float_in_sse < 2)
6585 if (cum->float_in_sse < 1)
6602 if (!type || !AGGREGATE_TYPE_P (type))
6604 cum->sse_words += words;
6605 cum->sse_nregs -= 1;
6606 cum->sse_regno += 1;
6607 if (cum->sse_nregs <= 0)
6621 if (!type || !AGGREGATE_TYPE_P (type))
6623 cum->mmx_words += words;
6624 cum->mmx_nregs -= 1;
6625 cum->mmx_regno += 1;
6626 if (cum->mmx_nregs <= 0)
6637 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6638 const_tree type, HOST_WIDE_INT words, bool named)
6640 int int_nregs, sse_nregs;
6642 /* Unnamed 256bit vector mode parameters are passed on stack. */
6643 if (!named && VALID_AVX256_REG_MODE (mode))
6646 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6647 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6649 cum->nregs -= int_nregs;
6650 cum->sse_nregs -= sse_nregs;
6651 cum->regno += int_nregs;
6652 cum->sse_regno += sse_nregs;
6656 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6657 cum->words = (cum->words + align - 1) & ~(align - 1);
6658 cum->words += words;
6663 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6664 HOST_WIDE_INT words)
6666 /* Otherwise, this should be passed indirect. */
6667 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6669 cum->words += words;
6677 /* Update the data in CUM to advance over an argument of mode MODE and
6678 data type TYPE. (TYPE is null for libcalls where that information
6679 may not be available.) */
6682 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6683 const_tree type, bool named)
6685 HOST_WIDE_INT bytes, words;
6687 if (mode == BLKmode)
6688 bytes = int_size_in_bytes (type);
6690 bytes = GET_MODE_SIZE (mode);
6691 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6694 mode = type_natural_mode (type, NULL);
6696 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6697 function_arg_advance_ms_64 (cum, bytes, words);
6698 else if (TARGET_64BIT)
6699 function_arg_advance_64 (cum, mode, type, words, named);
6701 function_arg_advance_32 (cum, mode, type, bytes, words);
6704 /* Define where to put the arguments to a function.
6705 Value is zero to push the argument on the stack,
6706 or a hard register in which to store the argument.
6708 MODE is the argument's machine mode.
6709 TYPE is the data type of the argument (as a tree).
6710 This is null for libcalls where that information may
6712 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6713 the preceding args and about the function being called.
6714 NAMED is nonzero if this argument is a named parameter
6715 (otherwise it is an extra parameter matching an ellipsis). */
6718 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6719 enum machine_mode orig_mode, const_tree type,
6720 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6722 static bool warnedsse, warnedmmx;
6724 /* Avoid the AL settings for the Unix64 ABI. */
6725 if (mode == VOIDmode)
6741 if (words <= cum->nregs)
6743 int regno = cum->regno;
6745 /* Fastcall allocates the first two DWORD (SImode) or
6746 smaller arguments to ECX and EDX if it isn't an
6752 || (type && AGGREGATE_TYPE_P (type)))
6755 /* ECX not EAX is the first allocated register. */
6756 if (regno == AX_REG)
6759 return gen_rtx_REG (mode, regno);
6764 if (cum->float_in_sse < 2)
6767 if (cum->float_in_sse < 1)
6771 /* In 32bit, we pass TImode in xmm registers. */
6778 if (!type || !AGGREGATE_TYPE_P (type))
6780 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6783 warning (0, "SSE vector argument without SSE enabled "
6787 return gen_reg_or_parallel (mode, orig_mode,
6788 cum->sse_regno + FIRST_SSE_REG);
6793 /* OImode shouldn't be used directly. */
6802 if (!type || !AGGREGATE_TYPE_P (type))
6805 return gen_reg_or_parallel (mode, orig_mode,
6806 cum->sse_regno + FIRST_SSE_REG);
6816 if (!type || !AGGREGATE_TYPE_P (type))
6818 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6821 warning (0, "MMX vector argument without MMX enabled "
6825 return gen_reg_or_parallel (mode, orig_mode,
6826 cum->mmx_regno + FIRST_MMX_REG);
6835 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6836 enum machine_mode orig_mode, const_tree type, bool named)
6838 /* Handle a hidden AL argument containing number of registers
6839 for varargs x86-64 functions. */
6840 if (mode == VOIDmode)
6841 return GEN_INT (cum->maybe_vaarg
6842 ? (cum->sse_nregs < 0
6843 ? X86_64_SSE_REGPARM_MAX
6858 /* Unnamed 256bit vector mode parameters are passed on stack. */
6864 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6866 &x86_64_int_parameter_registers [cum->regno],
6871 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6872 enum machine_mode orig_mode, bool named,
6873 HOST_WIDE_INT bytes)
6877 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6878 We use value of -2 to specify that current function call is MSABI. */
6879 if (mode == VOIDmode)
6880 return GEN_INT (-2);
6882 /* If we've run out of registers, it goes on the stack. */
6883 if (cum->nregs == 0)
6886 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6888 /* Only floating point modes are passed in anything but integer regs. */
6889 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6892 regno = cum->regno + FIRST_SSE_REG;
6897 /* Unnamed floating parameters are passed in both the
6898 SSE and integer registers. */
6899 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6900 t2 = gen_rtx_REG (mode, regno);
6901 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6902 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6903 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6906 /* Handle aggregated types passed in register. */
6907 if (orig_mode == BLKmode)
6909 if (bytes > 0 && bytes <= 8)
6910 mode = (bytes > 4 ? DImode : SImode);
6911 if (mode == BLKmode)
6915 return gen_reg_or_parallel (mode, orig_mode, regno);
6918 /* Return where to put the arguments to a function.
6919 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6921 MODE is the argument's machine mode. TYPE is the data type of the
6922 argument. It is null for libcalls where that information may not be
6923 available. CUM gives information about the preceding args and about
6924 the function being called. NAMED is nonzero if this argument is a
6925 named parameter (otherwise it is an extra parameter matching an
6929 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6930 const_tree type, bool named)
6932 enum machine_mode mode = omode;
6933 HOST_WIDE_INT bytes, words;
6936 if (mode == BLKmode)
6937 bytes = int_size_in_bytes (type);
6939 bytes = GET_MODE_SIZE (mode);
6940 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6942 /* To simplify the code below, represent vector types with a vector mode
6943 even if MMX/SSE are not active. */
6944 if (type && TREE_CODE (type) == VECTOR_TYPE)
6945 mode = type_natural_mode (type, cum);
6947 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6948 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6949 else if (TARGET_64BIT)
6950 arg = function_arg_64 (cum, mode, omode, type, named);
6952 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6954 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6956 /* This argument uses 256bit AVX modes. */
6957 cfun->machine->use_avx256_p = true;
6959 cfun->machine->callee_pass_avx256_p = true;
6961 cfun->machine->caller_pass_avx256_p = true;
6967 /* A C expression that indicates when an argument must be passed by
6968 reference. If nonzero for an argument, a copy of that argument is
6969 made in memory and a pointer to the argument is passed instead of
6970 the argument itself. The pointer is passed in whatever way is
6971 appropriate for passing a pointer to that type. */
6974 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6975 enum machine_mode mode ATTRIBUTE_UNUSED,
6976 const_tree type, bool named ATTRIBUTE_UNUSED)
6978 /* See Windows x64 Software Convention. */
6979 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6981 int msize = (int) GET_MODE_SIZE (mode);
6984 /* Arrays are passed by reference. */
6985 if (TREE_CODE (type) == ARRAY_TYPE)
6988 if (AGGREGATE_TYPE_P (type))
6990 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6991 are passed by reference. */
6992 msize = int_size_in_bytes (type);
6996 /* __m128 is passed by reference. */
6998 case 1: case 2: case 4: case 8:
7004 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7010 /* Return true when TYPE should be 128bit aligned for 32bit argument
7011 passing ABI. XXX: This function is obsolete and is only used for
7012 checking psABI compatibility with previous versions of GCC. */
7015 ix86_compat_aligned_value_p (const_tree type)
7017 enum machine_mode mode = TYPE_MODE (type);
7018 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7022 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7024 if (TYPE_ALIGN (type) < 128)
7027 if (AGGREGATE_TYPE_P (type))
7029 /* Walk the aggregates recursively. */
7030 switch (TREE_CODE (type))
7034 case QUAL_UNION_TYPE:
7038 /* Walk all the structure fields. */
7039 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7041 if (TREE_CODE (field) == FIELD_DECL
7042 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7049 /* Just for use if some languages passes arrays by value. */
7050 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7061 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7062 XXX: This function is obsolete and is only used for checking psABI
7063 compatibility with previous versions of GCC. */
7066 ix86_compat_function_arg_boundary (enum machine_mode mode,
7067 const_tree type, int align)
7069 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7070 natural boundaries. */
7071 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7073 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7074 make an exception for SSE modes since these require 128bit
7077 The handling here differs from field_alignment. ICC aligns MMX
7078 arguments to 4 byte boundaries, while structure fields are aligned
7079 to 8 byte boundaries. */
7082 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7083 align = PARM_BOUNDARY;
7087 if (!ix86_compat_aligned_value_p (type))
7088 align = PARM_BOUNDARY;
7091 if (align > BIGGEST_ALIGNMENT)
7092 align = BIGGEST_ALIGNMENT;
7096 /* Return true when TYPE should be 128bit aligned for 32bit argument
7100 ix86_contains_aligned_value_p (const_tree type)
7102 enum machine_mode mode = TYPE_MODE (type);
7104 if (mode == XFmode || mode == XCmode)
7107 if (TYPE_ALIGN (type) < 128)
7110 if (AGGREGATE_TYPE_P (type))
7112 /* Walk the aggregates recursively. */
7113 switch (TREE_CODE (type))
7117 case QUAL_UNION_TYPE:
7121 /* Walk all the structure fields. */
7122 for (field = TYPE_FIELDS (type);
7124 field = DECL_CHAIN (field))
7126 if (TREE_CODE (field) == FIELD_DECL
7127 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7134 /* Just for use if some languages passes arrays by value. */
7135 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7144 return TYPE_ALIGN (type) >= 128;
7149 /* Gives the alignment boundary, in bits, of an argument with the
7150 specified mode and type. */
7153 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7158 /* Since the main variant type is used for call, we convert it to
7159 the main variant type. */
7160 type = TYPE_MAIN_VARIANT (type);
7161 align = TYPE_ALIGN (type);
7164 align = GET_MODE_ALIGNMENT (mode);
7165 if (align < PARM_BOUNDARY)
7166 align = PARM_BOUNDARY;
7170 int saved_align = align;
7174 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7177 if (mode == XFmode || mode == XCmode)
7178 align = PARM_BOUNDARY;
7180 else if (!ix86_contains_aligned_value_p (type))
7181 align = PARM_BOUNDARY;
7184 align = PARM_BOUNDARY;
7189 && align != ix86_compat_function_arg_boundary (mode, type,
7193 inform (input_location,
7194 "The ABI of passing parameter with %dbyte"
7195 " alignment has changed in GCC 4.6",
7196 align / BITS_PER_UNIT);
7203 /* Return true if N is a possible register number of function value. */
7206 ix86_function_value_regno_p (const unsigned int regno)
7213 case FIRST_FLOAT_REG:
7214 /* TODO: The function should depend on current function ABI but
7215 builtins.c would need updating then. Therefore we use the
7217 if (TARGET_64BIT && ix86_abi == MS_ABI)
7219 return TARGET_FLOAT_RETURNS_IN_80387;
7225 if (TARGET_MACHO || TARGET_64BIT)
7233 /* Define how to find the value returned by a function.
7234 VALTYPE is the data type of the value (as a tree).
7235 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7236 otherwise, FUNC is 0. */
7239 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7240 const_tree fntype, const_tree fn)
7244 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7245 we normally prevent this case when mmx is not available. However
7246 some ABIs may require the result to be returned like DImode. */
7247 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7248 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7250 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7251 we prevent this case when sse is not available. However some ABIs
7252 may require the result to be returned like integer TImode. */
7253 else if (mode == TImode
7254 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7255 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7257 /* 32-byte vector modes in %ymm0. */
7258 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7259 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7261 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7262 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7263 regno = FIRST_FLOAT_REG;
7265 /* Most things go in %eax. */
7268 /* Override FP return register with %xmm0 for local functions when
7269 SSE math is enabled or for functions with sseregparm attribute. */
7270 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7272 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7273 if ((sse_level >= 1 && mode == SFmode)
7274 || (sse_level == 2 && mode == DFmode))
7275 regno = FIRST_SSE_REG;
7278 /* OImode shouldn't be used directly. */
7279 gcc_assert (mode != OImode);
7281 return gen_rtx_REG (orig_mode, regno);
7285 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7290 /* Handle libcalls, which don't provide a type node. */
7291 if (valtype == NULL)
7303 return gen_rtx_REG (mode, FIRST_SSE_REG);
7306 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7310 return gen_rtx_REG (mode, AX_REG);
7314 ret = construct_container (mode, orig_mode, valtype, 1,
7315 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7316 x86_64_int_return_registers, 0);
7318 /* For zero sized structures, construct_container returns NULL, but we
7319 need to keep rest of compiler happy by returning meaningful value. */
7321 ret = gen_rtx_REG (orig_mode, AX_REG);
7327 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7329 unsigned int regno = AX_REG;
7333 switch (GET_MODE_SIZE (mode))
7336 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7337 && !COMPLEX_MODE_P (mode))
7338 regno = FIRST_SSE_REG;
7342 if (mode == SFmode || mode == DFmode)
7343 regno = FIRST_SSE_REG;
7349 return gen_rtx_REG (orig_mode, regno);
7353 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7354 enum machine_mode orig_mode, enum machine_mode mode)
7356 const_tree fn, fntype;
7359 if (fntype_or_decl && DECL_P (fntype_or_decl))
7360 fn = fntype_or_decl;
7361 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7363 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7364 return function_value_ms_64 (orig_mode, mode);
7365 else if (TARGET_64BIT)
7366 return function_value_64 (orig_mode, mode, valtype);
7368 return function_value_32 (orig_mode, mode, fntype, fn);
7372 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7373 bool outgoing ATTRIBUTE_UNUSED)
7375 enum machine_mode mode, orig_mode;
7377 orig_mode = TYPE_MODE (valtype);
7378 mode = type_natural_mode (valtype, NULL);
7379 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7383 ix86_libcall_value (enum machine_mode mode)
7385 return ix86_function_value_1 (NULL, NULL, mode, mode);
7388 /* Return true iff type is returned in memory. */
7390 static bool ATTRIBUTE_UNUSED
7391 return_in_memory_32 (const_tree type, enum machine_mode mode)
7395 if (mode == BLKmode)
7398 size = int_size_in_bytes (type);
7400 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7403 if (VECTOR_MODE_P (mode) || mode == TImode)
7405 /* User-created vectors small enough to fit in EAX. */
7409 /* MMX/3dNow values are returned in MM0,
7410 except when it doesn't exits or the ABI prescribes otherwise. */
7412 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7414 /* SSE values are returned in XMM0, except when it doesn't exist. */
7418 /* AVX values are returned in YMM0, except when it doesn't exist. */
7429 /* OImode shouldn't be used directly. */
7430 gcc_assert (mode != OImode);
7435 static bool ATTRIBUTE_UNUSED
7436 return_in_memory_64 (const_tree type, enum machine_mode mode)
7438 int needed_intregs, needed_sseregs;
7439 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7442 static bool ATTRIBUTE_UNUSED
7443 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7445 HOST_WIDE_INT size = int_size_in_bytes (type);
7447 /* __m128 is returned in xmm0. */
7448 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7449 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7452 /* Otherwise, the size must be exactly in [1248]. */
7453 return size != 1 && size != 2 && size != 4 && size != 8;
7457 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7459 #ifdef SUBTARGET_RETURN_IN_MEMORY
7460 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7462 const enum machine_mode mode = type_natural_mode (type, NULL);
7466 if (ix86_function_type_abi (fntype) == MS_ABI)
7467 return return_in_memory_ms_64 (type, mode);
7469 return return_in_memory_64 (type, mode);
7472 return return_in_memory_32 (type, mode);
7476 /* When returning SSE vector types, we have a choice of either
7477 (1) being abi incompatible with a -march switch, or
7478 (2) generating an error.
7479 Given no good solution, I think the safest thing is one warning.
7480 The user won't be able to use -Werror, but....
7482 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7483 called in response to actually generating a caller or callee that
7484 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7485 via aggregate_value_p for general type probing from tree-ssa. */
7488 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7490 static bool warnedsse, warnedmmx;
7492 if (!TARGET_64BIT && type)
7494 /* Look at the return type of the function, not the function type. */
7495 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7497 if (!TARGET_SSE && !warnedsse)
7500 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7503 warning (0, "SSE vector return without SSE enabled "
7508 if (!TARGET_MMX && !warnedmmx)
7510 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7513 warning (0, "MMX vector return without MMX enabled "
7523 /* Create the va_list data type. */
7525 /* Returns the calling convention specific va_list date type.
7526 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7529 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7531 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7533 /* For i386 we use plain pointer to argument area. */
7534 if (!TARGET_64BIT || abi == MS_ABI)
7535 return build_pointer_type (char_type_node);
7537 record = lang_hooks.types.make_type (RECORD_TYPE);
7538 type_decl = build_decl (BUILTINS_LOCATION,
7539 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7541 f_gpr = build_decl (BUILTINS_LOCATION,
7542 FIELD_DECL, get_identifier ("gp_offset"),
7543 unsigned_type_node);
7544 f_fpr = build_decl (BUILTINS_LOCATION,
7545 FIELD_DECL, get_identifier ("fp_offset"),
7546 unsigned_type_node);
7547 f_ovf = build_decl (BUILTINS_LOCATION,
7548 FIELD_DECL, get_identifier ("overflow_arg_area"),
7550 f_sav = build_decl (BUILTINS_LOCATION,
7551 FIELD_DECL, get_identifier ("reg_save_area"),
7554 va_list_gpr_counter_field = f_gpr;
7555 va_list_fpr_counter_field = f_fpr;
7557 DECL_FIELD_CONTEXT (f_gpr) = record;
7558 DECL_FIELD_CONTEXT (f_fpr) = record;
7559 DECL_FIELD_CONTEXT (f_ovf) = record;
7560 DECL_FIELD_CONTEXT (f_sav) = record;
7562 TYPE_STUB_DECL (record) = type_decl;
7563 TYPE_NAME (record) = type_decl;
7564 TYPE_FIELDS (record) = f_gpr;
7565 DECL_CHAIN (f_gpr) = f_fpr;
7566 DECL_CHAIN (f_fpr) = f_ovf;
7567 DECL_CHAIN (f_ovf) = f_sav;
7569 layout_type (record);
7571 /* The correct type is an array type of one element. */
7572 return build_array_type (record, build_index_type (size_zero_node));
7575 /* Setup the builtin va_list data type and for 64-bit the additional
7576 calling convention specific va_list data types. */
7579 ix86_build_builtin_va_list (void)
7581 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7583 /* Initialize abi specific va_list builtin types. */
7587 if (ix86_abi == MS_ABI)
7589 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7590 if (TREE_CODE (t) != RECORD_TYPE)
7591 t = build_variant_type_copy (t);
7592 sysv_va_list_type_node = t;
7597 if (TREE_CODE (t) != RECORD_TYPE)
7598 t = build_variant_type_copy (t);
7599 sysv_va_list_type_node = t;
7601 if (ix86_abi != MS_ABI)
7603 t = ix86_build_builtin_va_list_abi (MS_ABI);
7604 if (TREE_CODE (t) != RECORD_TYPE)
7605 t = build_variant_type_copy (t);
7606 ms_va_list_type_node = t;
7611 if (TREE_CODE (t) != RECORD_TYPE)
7612 t = build_variant_type_copy (t);
7613 ms_va_list_type_node = t;
7620 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7623 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7629 /* GPR size of varargs save area. */
7630 if (cfun->va_list_gpr_size)
7631 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7633 ix86_varargs_gpr_size = 0;
7635 /* FPR size of varargs save area. We don't need it if we don't pass
7636 anything in SSE registers. */
7637 if (TARGET_SSE && cfun->va_list_fpr_size)
7638 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7640 ix86_varargs_fpr_size = 0;
7642 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7645 save_area = frame_pointer_rtx;
7646 set = get_varargs_alias_set ();
7648 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7649 if (max > X86_64_REGPARM_MAX)
7650 max = X86_64_REGPARM_MAX;
7652 for (i = cum->regno; i < max; i++)
7654 mem = gen_rtx_MEM (Pmode,
7655 plus_constant (save_area, i * UNITS_PER_WORD));
7656 MEM_NOTRAP_P (mem) = 1;
7657 set_mem_alias_set (mem, set);
7658 emit_move_insn (mem, gen_rtx_REG (Pmode,
7659 x86_64_int_parameter_registers[i]));
7662 if (ix86_varargs_fpr_size)
7664 enum machine_mode smode;
7667 /* Now emit code to save SSE registers. The AX parameter contains number
7668 of SSE parameter registers used to call this function, though all we
7669 actually check here is the zero/non-zero status. */
7671 label = gen_label_rtx ();
7672 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7673 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7676 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7677 we used movdqa (i.e. TImode) instead? Perhaps even better would
7678 be if we could determine the real mode of the data, via a hook
7679 into pass_stdarg. Ignore all that for now. */
7681 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7682 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7684 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7685 if (max > X86_64_SSE_REGPARM_MAX)
7686 max = X86_64_SSE_REGPARM_MAX;
7688 for (i = cum->sse_regno; i < max; ++i)
7690 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7691 mem = gen_rtx_MEM (smode, mem);
7692 MEM_NOTRAP_P (mem) = 1;
7693 set_mem_alias_set (mem, set);
7694 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7696 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7704 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7706 alias_set_type set = get_varargs_alias_set ();
7709 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7713 mem = gen_rtx_MEM (Pmode,
7714 plus_constant (virtual_incoming_args_rtx,
7715 i * UNITS_PER_WORD));
7716 MEM_NOTRAP_P (mem) = 1;
7717 set_mem_alias_set (mem, set);
7719 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7720 emit_move_insn (mem, reg);
7725 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7726 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7729 CUMULATIVE_ARGS next_cum;
7732 /* This argument doesn't appear to be used anymore. Which is good,
7733 because the old code here didn't suppress rtl generation. */
7734 gcc_assert (!no_rtl);
7739 fntype = TREE_TYPE (current_function_decl);
7741 /* For varargs, we do not want to skip the dummy va_dcl argument.
7742 For stdargs, we do want to skip the last named argument. */
7744 if (stdarg_p (fntype))
7745 ix86_function_arg_advance (&next_cum, mode, type, true);
7747 if (cum->call_abi == MS_ABI)
7748 setup_incoming_varargs_ms_64 (&next_cum);
7750 setup_incoming_varargs_64 (&next_cum);
7753 /* Checks if TYPE is of kind va_list char *. */
7756 is_va_list_char_pointer (tree type)
7760 /* For 32-bit it is always true. */
7763 canonic = ix86_canonical_va_list_type (type);
7764 return (canonic == ms_va_list_type_node
7765 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7768 /* Implement va_start. */
7771 ix86_va_start (tree valist, rtx nextarg)
7773 HOST_WIDE_INT words, n_gpr, n_fpr;
7774 tree f_gpr, f_fpr, f_ovf, f_sav;
7775 tree gpr, fpr, ovf, sav, t;
7779 if (flag_split_stack
7780 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7782 unsigned int scratch_regno;
7784 /* When we are splitting the stack, we can't refer to the stack
7785 arguments using internal_arg_pointer, because they may be on
7786 the old stack. The split stack prologue will arrange to
7787 leave a pointer to the old stack arguments in a scratch
7788 register, which we here copy to a pseudo-register. The split
7789 stack prologue can't set the pseudo-register directly because
7790 it (the prologue) runs before any registers have been saved. */
7792 scratch_regno = split_stack_prologue_scratch_regno ();
7793 if (scratch_regno != INVALID_REGNUM)
7797 reg = gen_reg_rtx (Pmode);
7798 cfun->machine->split_stack_varargs_pointer = reg;
7801 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7805 push_topmost_sequence ();
7806 emit_insn_after (seq, entry_of_function ());
7807 pop_topmost_sequence ();
7811 /* Only 64bit target needs something special. */
7812 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7814 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7815 std_expand_builtin_va_start (valist, nextarg);
7820 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7821 next = expand_binop (ptr_mode, add_optab,
7822 cfun->machine->split_stack_varargs_pointer,
7823 crtl->args.arg_offset_rtx,
7824 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7825 convert_move (va_r, next, 0);
7830 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7831 f_fpr = DECL_CHAIN (f_gpr);
7832 f_ovf = DECL_CHAIN (f_fpr);
7833 f_sav = DECL_CHAIN (f_ovf);
7835 valist = build_simple_mem_ref (valist);
7836 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7837 /* The following should be folded into the MEM_REF offset. */
7838 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7840 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7842 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7844 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7847 /* Count number of gp and fp argument registers used. */
7848 words = crtl->args.info.words;
7849 n_gpr = crtl->args.info.regno;
7850 n_fpr = crtl->args.info.sse_regno;
7852 if (cfun->va_list_gpr_size)
7854 type = TREE_TYPE (gpr);
7855 t = build2 (MODIFY_EXPR, type,
7856 gpr, build_int_cst (type, n_gpr * 8));
7857 TREE_SIDE_EFFECTS (t) = 1;
7858 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7861 if (TARGET_SSE && cfun->va_list_fpr_size)
7863 type = TREE_TYPE (fpr);
7864 t = build2 (MODIFY_EXPR, type, fpr,
7865 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7866 TREE_SIDE_EFFECTS (t) = 1;
7867 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7870 /* Find the overflow area. */
7871 type = TREE_TYPE (ovf);
7872 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7873 ovf_rtx = crtl->args.internal_arg_pointer;
7875 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7876 t = make_tree (type, ovf_rtx);
7878 t = build2 (POINTER_PLUS_EXPR, type, t,
7879 size_int (words * UNITS_PER_WORD));
7880 t = build2 (MODIFY_EXPR, type, ovf, t);
7881 TREE_SIDE_EFFECTS (t) = 1;
7882 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7884 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7886 /* Find the register save area.
7887 Prologue of the function save it right above stack frame. */
7888 type = TREE_TYPE (sav);
7889 t = make_tree (type, frame_pointer_rtx);
7890 if (!ix86_varargs_gpr_size)
7891 t = build2 (POINTER_PLUS_EXPR, type, t,
7892 size_int (-8 * X86_64_REGPARM_MAX));
7893 t = build2 (MODIFY_EXPR, type, sav, t);
7894 TREE_SIDE_EFFECTS (t) = 1;
7895 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7899 /* Implement va_arg. */
7902 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7905 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7906 tree f_gpr, f_fpr, f_ovf, f_sav;
7907 tree gpr, fpr, ovf, sav, t;
7909 tree lab_false, lab_over = NULL_TREE;
7914 enum machine_mode nat_mode;
7915 unsigned int arg_boundary;
7917 /* Only 64bit target needs something special. */
7918 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7919 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7921 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7922 f_fpr = DECL_CHAIN (f_gpr);
7923 f_ovf = DECL_CHAIN (f_fpr);
7924 f_sav = DECL_CHAIN (f_ovf);
7926 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7927 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7928 valist = build_va_arg_indirect_ref (valist);
7929 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7930 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7931 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7933 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7935 type = build_pointer_type (type);
7936 size = int_size_in_bytes (type);
7937 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7939 nat_mode = type_natural_mode (type, NULL);
7948 /* Unnamed 256bit vector mode parameters are passed on stack. */
7949 if (ix86_cfun_abi () == SYSV_ABI)
7956 container = construct_container (nat_mode, TYPE_MODE (type),
7957 type, 0, X86_64_REGPARM_MAX,
7958 X86_64_SSE_REGPARM_MAX, intreg,
7963 /* Pull the value out of the saved registers. */
7965 addr = create_tmp_var (ptr_type_node, "addr");
7969 int needed_intregs, needed_sseregs;
7971 tree int_addr, sse_addr;
7973 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7974 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7976 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7978 need_temp = (!REG_P (container)
7979 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7980 || TYPE_ALIGN (type) > 128));
7982 /* In case we are passing structure, verify that it is consecutive block
7983 on the register save area. If not we need to do moves. */
7984 if (!need_temp && !REG_P (container))
7986 /* Verify that all registers are strictly consecutive */
7987 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7991 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7993 rtx slot = XVECEXP (container, 0, i);
7994 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7995 || INTVAL (XEXP (slot, 1)) != i * 16)
8003 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8005 rtx slot = XVECEXP (container, 0, i);
8006 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8007 || INTVAL (XEXP (slot, 1)) != i * 8)
8019 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8020 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8023 /* First ensure that we fit completely in registers. */
8026 t = build_int_cst (TREE_TYPE (gpr),
8027 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8028 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8029 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8030 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8031 gimplify_and_add (t, pre_p);
8035 t = build_int_cst (TREE_TYPE (fpr),
8036 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8037 + X86_64_REGPARM_MAX * 8);
8038 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8039 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8040 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8041 gimplify_and_add (t, pre_p);
8044 /* Compute index to start of area used for integer regs. */
8047 /* int_addr = gpr + sav; */
8048 t = fold_convert (sizetype, gpr);
8049 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8050 gimplify_assign (int_addr, t, pre_p);
8054 /* sse_addr = fpr + sav; */
8055 t = fold_convert (sizetype, fpr);
8056 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8057 gimplify_assign (sse_addr, t, pre_p);
8061 int i, prev_size = 0;
8062 tree temp = create_tmp_var (type, "va_arg_tmp");
8065 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8066 gimplify_assign (addr, t, pre_p);
8068 for (i = 0; i < XVECLEN (container, 0); i++)
8070 rtx slot = XVECEXP (container, 0, i);
8071 rtx reg = XEXP (slot, 0);
8072 enum machine_mode mode = GET_MODE (reg);
8078 tree dest_addr, dest;
8079 int cur_size = GET_MODE_SIZE (mode);
8081 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8082 prev_size = INTVAL (XEXP (slot, 1));
8083 if (prev_size + cur_size > size)
8085 cur_size = size - prev_size;
8086 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8087 if (mode == BLKmode)
8090 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8091 if (mode == GET_MODE (reg))
8092 addr_type = build_pointer_type (piece_type);
8094 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8096 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8099 if (SSE_REGNO_P (REGNO (reg)))
8101 src_addr = sse_addr;
8102 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8106 src_addr = int_addr;
8107 src_offset = REGNO (reg) * 8;
8109 src_addr = fold_convert (addr_type, src_addr);
8110 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8111 size_int (src_offset));
8113 dest_addr = fold_convert (daddr_type, addr);
8114 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8115 size_int (prev_size));
8116 if (cur_size == GET_MODE_SIZE (mode))
8118 src = build_va_arg_indirect_ref (src_addr);
8119 dest = build_va_arg_indirect_ref (dest_addr);
8121 gimplify_assign (dest, src, pre_p);
8126 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8127 3, dest_addr, src_addr,
8128 size_int (cur_size));
8129 gimplify_and_add (copy, pre_p);
8131 prev_size += cur_size;
8137 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8138 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8139 gimplify_assign (gpr, t, pre_p);
8144 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8145 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8146 gimplify_assign (fpr, t, pre_p);
8149 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8151 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8154 /* ... otherwise out of the overflow area. */
8156 /* When we align parameter on stack for caller, if the parameter
8157 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8158 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8159 here with caller. */
8160 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
8161 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8162 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8164 /* Care for on-stack alignment if needed. */
8165 if (arg_boundary <= 64 || size == 0)
8169 HOST_WIDE_INT align = arg_boundary / 8;
8170 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8171 size_int (align - 1));
8172 t = fold_convert (sizetype, t);
8173 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8175 t = fold_convert (TREE_TYPE (ovf), t);
8178 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8179 gimplify_assign (addr, t, pre_p);
8181 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8182 size_int (rsize * UNITS_PER_WORD));
8183 gimplify_assign (unshare_expr (ovf), t, pre_p);
8186 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8188 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8189 addr = fold_convert (ptrtype, addr);
8192 addr = build_va_arg_indirect_ref (addr);
8193 return build_va_arg_indirect_ref (addr);
8196 /* Return true if OPNUM's MEM should be matched
8197 in movabs* patterns. */
8200 ix86_check_movabs (rtx insn, int opnum)
8204 set = PATTERN (insn);
8205 if (GET_CODE (set) == PARALLEL)
8206 set = XVECEXP (set, 0, 0);
8207 gcc_assert (GET_CODE (set) == SET);
8208 mem = XEXP (set, opnum);
8209 while (GET_CODE (mem) == SUBREG)
8210 mem = SUBREG_REG (mem);
8211 gcc_assert (MEM_P (mem));
8212 return volatile_ok || !MEM_VOLATILE_P (mem);
8215 /* Initialize the table of extra 80387 mathematical constants. */
8218 init_ext_80387_constants (void)
8220 static const char * cst[5] =
8222 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8223 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8224 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8225 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8226 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8230 for (i = 0; i < 5; i++)
8232 real_from_string (&ext_80387_constants_table[i], cst[i]);
8233 /* Ensure each constant is rounded to XFmode precision. */
8234 real_convert (&ext_80387_constants_table[i],
8235 XFmode, &ext_80387_constants_table[i]);
8238 ext_80387_constants_init = 1;
8241 /* Return non-zero if the constant is something that
8242 can be loaded with a special instruction. */
8245 standard_80387_constant_p (rtx x)
8247 enum machine_mode mode = GET_MODE (x);
8251 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8254 if (x == CONST0_RTX (mode))
8256 if (x == CONST1_RTX (mode))
8259 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8261 /* For XFmode constants, try to find a special 80387 instruction when
8262 optimizing for size or on those CPUs that benefit from them. */
8264 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8268 if (! ext_80387_constants_init)
8269 init_ext_80387_constants ();
8271 for (i = 0; i < 5; i++)
8272 if (real_identical (&r, &ext_80387_constants_table[i]))
8276 /* Load of the constant -0.0 or -1.0 will be split as
8277 fldz;fchs or fld1;fchs sequence. */
8278 if (real_isnegzero (&r))
8280 if (real_identical (&r, &dconstm1))
8286 /* Return the opcode of the special instruction to be used to load
8290 standard_80387_constant_opcode (rtx x)
8292 switch (standard_80387_constant_p (x))
8316 /* Return the CONST_DOUBLE representing the 80387 constant that is
8317 loaded by the specified special instruction. The argument IDX
8318 matches the return value from standard_80387_constant_p. */
8321 standard_80387_constant_rtx (int idx)
8325 if (! ext_80387_constants_init)
8326 init_ext_80387_constants ();
8342 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8346 /* Return 1 if X is all 0s and 2 if x is all 1s
8347 in supported SSE vector mode. */
8350 standard_sse_constant_p (rtx x)
8352 enum machine_mode mode = GET_MODE (x);
8354 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8356 if (vector_all_ones_operand (x, mode))
8372 /* Return the opcode of the special instruction to be used to load
8376 standard_sse_constant_opcode (rtx insn, rtx x)
8378 switch (standard_sse_constant_p (x))
8381 switch (get_attr_mode (insn))
8384 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8386 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8387 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8389 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8391 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8392 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8394 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8396 return "vxorps\t%x0, %x0, %x0";
8398 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8399 return "vxorps\t%x0, %x0, %x0";
8401 return "vxorpd\t%x0, %x0, %x0";
8403 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8404 return "vxorps\t%x0, %x0, %x0";
8406 return "vpxor\t%x0, %x0, %x0";
8411 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8418 /* Returns true if OP contains a symbol reference */
8421 symbolic_reference_mentioned_p (rtx op)
8426 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8429 fmt = GET_RTX_FORMAT (GET_CODE (op));
8430 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8436 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8437 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8441 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8448 /* Return true if it is appropriate to emit `ret' instructions in the
8449 body of a function. Do this only if the epilogue is simple, needing a
8450 couple of insns. Prior to reloading, we can't tell how many registers
8451 must be saved, so return false then. Return false if there is no frame
8452 marker to de-allocate. */
8455 ix86_can_use_return_insn_p (void)
8457 struct ix86_frame frame;
8459 if (! reload_completed || frame_pointer_needed)
8462 /* Don't allow more than 32k pop, since that's all we can do
8463 with one instruction. */
8464 if (crtl->args.pops_args && crtl->args.size >= 32768)
8467 ix86_compute_frame_layout (&frame);
8468 return (frame.stack_pointer_offset == UNITS_PER_WORD
8469 && (frame.nregs + frame.nsseregs) == 0);
8472 /* Value should be nonzero if functions must have frame pointers.
8473 Zero means the frame pointer need not be set up (and parms may
8474 be accessed via the stack pointer) in functions that seem suitable. */
8477 ix86_frame_pointer_required (void)
8479 /* If we accessed previous frames, then the generated code expects
8480 to be able to access the saved ebp value in our frame. */
8481 if (cfun->machine->accesses_prev_frame)
8484 /* Several x86 os'es need a frame pointer for other reasons,
8485 usually pertaining to setjmp. */
8486 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8489 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8490 turns off the frame pointer by default. Turn it back on now if
8491 we've not got a leaf function. */
8492 if (TARGET_OMIT_LEAF_FRAME_POINTER
8493 && (!current_function_is_leaf
8494 || ix86_current_function_calls_tls_descriptor))
8497 if (crtl->profile && !flag_fentry)
8503 /* Record that the current function accesses previous call frames. */
8506 ix86_setup_frame_addresses (void)
8508 cfun->machine->accesses_prev_frame = 1;
8511 #ifndef USE_HIDDEN_LINKONCE
8512 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8513 # define USE_HIDDEN_LINKONCE 1
8515 # define USE_HIDDEN_LINKONCE 0
8519 static int pic_labels_used;
8521 /* Fills in the label name that should be used for a pc thunk for
8522 the given register. */
8525 get_pc_thunk_name (char name[32], unsigned int regno)
8527 gcc_assert (!TARGET_64BIT);
8529 if (USE_HIDDEN_LINKONCE)
8530 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8532 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8536 /* This function generates code for -fpic that loads %ebx with
8537 the return address of the caller and then returns. */
8540 ix86_code_end (void)
8545 for (regno = AX_REG; regno <= SP_REG; regno++)
8550 if (!(pic_labels_used & (1 << regno)))
8553 get_pc_thunk_name (name, regno);
8555 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8556 get_identifier (name),
8557 build_function_type (void_type_node, void_list_node));
8558 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8559 NULL_TREE, void_type_node);
8560 TREE_PUBLIC (decl) = 1;
8561 TREE_STATIC (decl) = 1;
8566 switch_to_section (darwin_sections[text_coal_section]);
8567 fputs ("\t.weak_definition\t", asm_out_file);
8568 assemble_name (asm_out_file, name);
8569 fputs ("\n\t.private_extern\t", asm_out_file);
8570 assemble_name (asm_out_file, name);
8571 putc ('\n', asm_out_file);
8572 ASM_OUTPUT_LABEL (asm_out_file, name);
8573 DECL_WEAK (decl) = 1;
8577 if (USE_HIDDEN_LINKONCE)
8579 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8581 targetm.asm_out.unique_section (decl, 0);
8582 switch_to_section (get_named_section (decl, NULL, 0));
8584 targetm.asm_out.globalize_label (asm_out_file, name);
8585 fputs ("\t.hidden\t", asm_out_file);
8586 assemble_name (asm_out_file, name);
8587 putc ('\n', asm_out_file);
8588 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8592 switch_to_section (text_section);
8593 ASM_OUTPUT_LABEL (asm_out_file, name);
8596 DECL_INITIAL (decl) = make_node (BLOCK);
8597 current_function_decl = decl;
8598 init_function_start (decl);
8599 first_function_block_is_cold = false;
8600 /* Make sure unwind info is emitted for the thunk if needed. */
8601 final_start_function (emit_barrier (), asm_out_file, 1);
8603 /* Pad stack IP move with 4 instructions (two NOPs count
8604 as one instruction). */
8605 if (TARGET_PAD_SHORT_FUNCTION)
8610 fputs ("\tnop\n", asm_out_file);
8613 xops[0] = gen_rtx_REG (Pmode, regno);
8614 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8615 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8616 fputs ("\tret\n", asm_out_file);
8617 final_end_function ();
8618 init_insn_lengths ();
8619 free_after_compilation (cfun);
8621 current_function_decl = NULL;
8624 if (flag_split_stack)
8625 file_end_indicate_split_stack ();
8628 /* Emit code for the SET_GOT patterns. */
8631 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8637 if (TARGET_VXWORKS_RTP && flag_pic)
8639 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8640 xops[2] = gen_rtx_MEM (Pmode,
8641 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8642 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8644 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8645 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8646 an unadorned address. */
8647 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8648 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8649 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8653 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8655 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8657 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8660 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8663 output_asm_insn ("call\t%a2", xops);
8664 #ifdef DWARF2_UNWIND_INFO
8665 /* The call to next label acts as a push. */
8666 if (dwarf2out_do_frame ())
8670 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8671 gen_rtx_PLUS (Pmode,
8674 RTX_FRAME_RELATED_P (insn) = 1;
8675 dwarf2out_frame_debug (insn, true);
8682 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8683 is what will be referenced by the Mach-O PIC subsystem. */
8685 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8688 targetm.asm_out.internal_label (asm_out_file, "L",
8689 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8693 output_asm_insn ("pop%z0\t%0", xops);
8694 #ifdef DWARF2_UNWIND_INFO
8695 /* The pop is a pop and clobbers dest, but doesn't restore it
8696 for unwind info purposes. */
8697 if (dwarf2out_do_frame ())
8701 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8702 dwarf2out_frame_debug (insn, true);
8703 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8704 gen_rtx_PLUS (Pmode,
8707 RTX_FRAME_RELATED_P (insn) = 1;
8708 dwarf2out_frame_debug (insn, true);
8717 get_pc_thunk_name (name, REGNO (dest));
8718 pic_labels_used |= 1 << REGNO (dest);
8720 #ifdef DWARF2_UNWIND_INFO
8721 /* Ensure all queued register saves are flushed before the
8723 if (dwarf2out_do_frame ())
8724 dwarf2out_flush_queued_reg_saves ();
8726 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8727 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8728 output_asm_insn ("call\t%X2", xops);
8729 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8730 is what will be referenced by the Mach-O PIC subsystem. */
8733 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8735 targetm.asm_out.internal_label (asm_out_file, "L",
8736 CODE_LABEL_NUMBER (label));
8743 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8744 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8746 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8751 /* Generate an "push" pattern for input ARG. */
8756 struct machine_function *m = cfun->machine;
8758 if (m->fs.cfa_reg == stack_pointer_rtx)
8759 m->fs.cfa_offset += UNITS_PER_WORD;
8760 m->fs.sp_offset += UNITS_PER_WORD;
8762 return gen_rtx_SET (VOIDmode,
8764 gen_rtx_PRE_DEC (Pmode,
8765 stack_pointer_rtx)),
8769 /* Generate an "pop" pattern for input ARG. */
8774 return gen_rtx_SET (VOIDmode,
8777 gen_rtx_POST_INC (Pmode,
8778 stack_pointer_rtx)));
8781 /* Return >= 0 if there is an unused call-clobbered register available
8782 for the entire function. */
8785 ix86_select_alt_pic_regnum (void)
8787 if (current_function_is_leaf
8789 && !ix86_current_function_calls_tls_descriptor)
8792 /* Can't use the same register for both PIC and DRAP. */
8794 drap = REGNO (crtl->drap_reg);
8797 for (i = 2; i >= 0; --i)
8798 if (i != drap && !df_regs_ever_live_p (i))
8802 return INVALID_REGNUM;
8805 /* Return 1 if we need to save REGNO. */
8807 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8809 if (pic_offset_table_rtx
8810 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8811 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8813 || crtl->calls_eh_return
8814 || crtl->uses_const_pool))
8816 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8821 if (crtl->calls_eh_return && maybe_eh_return)
8826 unsigned test = EH_RETURN_DATA_REGNO (i);
8827 if (test == INVALID_REGNUM)
8834 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8837 return (df_regs_ever_live_p (regno)
8838 && !call_used_regs[regno]
8839 && !fixed_regs[regno]
8840 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8843 /* Return number of saved general prupose registers. */
8846 ix86_nsaved_regs (void)
8851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8852 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8857 /* Return number of saved SSE registrers. */
8860 ix86_nsaved_sseregs (void)
8865 if (ix86_cfun_abi () != MS_ABI)
8867 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8868 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8873 /* Given FROM and TO register numbers, say whether this elimination is
8874 allowed. If stack alignment is needed, we can only replace argument
8875 pointer with hard frame pointer, or replace frame pointer with stack
8876 pointer. Otherwise, frame pointer elimination is automatically
8877 handled and all other eliminations are valid. */
8880 ix86_can_eliminate (const int from, const int to)
8882 if (stack_realign_fp)
8883 return ((from == ARG_POINTER_REGNUM
8884 && to == HARD_FRAME_POINTER_REGNUM)
8885 || (from == FRAME_POINTER_REGNUM
8886 && to == STACK_POINTER_REGNUM));
8888 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8891 /* Return the offset between two registers, one to be eliminated, and the other
8892 its replacement, at the start of a routine. */
8895 ix86_initial_elimination_offset (int from, int to)
8897 struct ix86_frame frame;
8898 ix86_compute_frame_layout (&frame);
8900 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8901 return frame.hard_frame_pointer_offset;
8902 else if (from == FRAME_POINTER_REGNUM
8903 && to == HARD_FRAME_POINTER_REGNUM)
8904 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8907 gcc_assert (to == STACK_POINTER_REGNUM);
8909 if (from == ARG_POINTER_REGNUM)
8910 return frame.stack_pointer_offset;
8912 gcc_assert (from == FRAME_POINTER_REGNUM);
8913 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8917 /* In a dynamically-aligned function, we can't know the offset from
8918 stack pointer to frame pointer, so we must ensure that setjmp
8919 eliminates fp against the hard fp (%ebp) rather than trying to
8920 index from %esp up to the top of the frame across a gap that is
8921 of unknown (at compile-time) size. */
8923 ix86_builtin_setjmp_frame_value (void)
8925 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8928 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8929 field in the TCB, so they can not be used together. */
8932 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8936 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8938 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8941 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8944 error ("%<-fsplit-stack%> requires "
8945 "assembler support for CFI directives");
8953 /* When using -fsplit-stack, the allocation routines set a field in
8954 the TCB to the bottom of the stack plus this much space, measured
8957 #define SPLIT_STACK_AVAILABLE 256
8959 /* Fill structure ix86_frame about frame of currently computed function. */
8962 ix86_compute_frame_layout (struct ix86_frame *frame)
8964 unsigned int stack_alignment_needed;
8965 HOST_WIDE_INT offset;
8966 unsigned int preferred_alignment;
8967 HOST_WIDE_INT size = get_frame_size ();
8968 HOST_WIDE_INT to_allocate;
8970 frame->nregs = ix86_nsaved_regs ();
8971 frame->nsseregs = ix86_nsaved_sseregs ();
8973 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8974 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8976 /* MS ABI seem to require stack alignment to be always 16 except for function
8977 prologues and leaf. */
8978 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8979 && (!current_function_is_leaf || cfun->calls_alloca != 0
8980 || ix86_current_function_calls_tls_descriptor))
8982 preferred_alignment = 16;
8983 stack_alignment_needed = 16;
8984 crtl->preferred_stack_boundary = 128;
8985 crtl->stack_alignment_needed = 128;
8988 gcc_assert (!size || stack_alignment_needed);
8989 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8990 gcc_assert (preferred_alignment <= stack_alignment_needed);
8992 /* For SEH we have to limit the amount of code movement into the prologue.
8993 At present we do this via a BLOCKAGE, at which point there's very little
8994 scheduling that can be done, which means that there's very little point
8995 in doing anything except PUSHs. */
8997 cfun->machine->use_fast_prologue_epilogue = false;
8999 /* During reload iteration the amount of registers saved can change.
9000 Recompute the value as needed. Do not recompute when amount of registers
9001 didn't change as reload does multiple calls to the function and does not
9002 expect the decision to change within single iteration. */
9003 else if (!optimize_function_for_size_p (cfun)
9004 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9006 int count = frame->nregs;
9007 struct cgraph_node *node = cgraph_node (current_function_decl);
9009 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9011 /* The fast prologue uses move instead of push to save registers. This
9012 is significantly longer, but also executes faster as modern hardware
9013 can execute the moves in parallel, but can't do that for push/pop.
9015 Be careful about choosing what prologue to emit: When function takes
9016 many instructions to execute we may use slow version as well as in
9017 case function is known to be outside hot spot (this is known with
9018 feedback only). Weight the size of function by number of registers
9019 to save as it is cheap to use one or two push instructions but very
9020 slow to use many of them. */
9022 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9023 if (node->frequency < NODE_FREQUENCY_NORMAL
9024 || (flag_branch_probabilities
9025 && node->frequency < NODE_FREQUENCY_HOT))
9026 cfun->machine->use_fast_prologue_epilogue = false;
9028 cfun->machine->use_fast_prologue_epilogue
9029 = !expensive_function_p (count);
9031 if (TARGET_PROLOGUE_USING_MOVE
9032 && cfun->machine->use_fast_prologue_epilogue)
9033 frame->save_regs_using_mov = true;
9035 frame->save_regs_using_mov = false;
9037 /* If static stack checking is enabled and done with probes, the registers
9038 need to be saved before allocating the frame. */
9039 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9040 frame->save_regs_using_mov = false;
9042 /* Skip return address. */
9043 offset = UNITS_PER_WORD;
9045 /* Skip pushed static chain. */
9046 if (ix86_static_chain_on_stack)
9047 offset += UNITS_PER_WORD;
9049 /* Skip saved base pointer. */
9050 if (frame_pointer_needed)
9051 offset += UNITS_PER_WORD;
9052 frame->hfp_save_offset = offset;
9054 /* The traditional frame pointer location is at the top of the frame. */
9055 frame->hard_frame_pointer_offset = offset;
9057 /* Register save area */
9058 offset += frame->nregs * UNITS_PER_WORD;
9059 frame->reg_save_offset = offset;
9061 /* Align and set SSE register save area. */
9062 if (frame->nsseregs)
9064 /* The only ABI that has saved SSE registers (Win64) also has a
9065 16-byte aligned default stack, and thus we don't need to be
9066 within the re-aligned local stack frame to save them. */
9067 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9068 offset = (offset + 16 - 1) & -16;
9069 offset += frame->nsseregs * 16;
9071 frame->sse_reg_save_offset = offset;
9073 /* The re-aligned stack starts here. Values before this point are not
9074 directly comparable with values below this point. In order to make
9075 sure that no value happens to be the same before and after, force
9076 the alignment computation below to add a non-zero value. */
9077 if (stack_realign_fp)
9078 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9081 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9082 offset += frame->va_arg_size;
9084 /* Align start of frame for local function. */
9085 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9087 /* Frame pointer points here. */
9088 frame->frame_pointer_offset = offset;
9092 /* Add outgoing arguments area. Can be skipped if we eliminated
9093 all the function calls as dead code.
9094 Skipping is however impossible when function calls alloca. Alloca
9095 expander assumes that last crtl->outgoing_args_size
9096 of stack frame are unused. */
9097 if (ACCUMULATE_OUTGOING_ARGS
9098 && (!current_function_is_leaf || cfun->calls_alloca
9099 || ix86_current_function_calls_tls_descriptor))
9101 offset += crtl->outgoing_args_size;
9102 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9105 frame->outgoing_arguments_size = 0;
9107 /* Align stack boundary. Only needed if we're calling another function
9109 if (!current_function_is_leaf || cfun->calls_alloca
9110 || ix86_current_function_calls_tls_descriptor)
9111 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9113 /* We've reached end of stack frame. */
9114 frame->stack_pointer_offset = offset;
9116 /* Size prologue needs to allocate. */
9117 to_allocate = offset - frame->sse_reg_save_offset;
9119 if ((!to_allocate && frame->nregs <= 1)
9120 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9121 frame->save_regs_using_mov = false;
9123 if (ix86_using_red_zone ()
9124 && current_function_sp_is_unchanging
9125 && current_function_is_leaf
9126 && !ix86_current_function_calls_tls_descriptor)
9128 frame->red_zone_size = to_allocate;
9129 if (frame->save_regs_using_mov)
9130 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9131 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9132 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9135 frame->red_zone_size = 0;
9136 frame->stack_pointer_offset -= frame->red_zone_size;
9138 /* The SEH frame pointer location is near the bottom of the frame.
9139 This is enforced by the fact that the difference between the
9140 stack pointer and the frame pointer is limited to 240 bytes in
9141 the unwind data structure. */
9146 /* If we can leave the frame pointer where it is, do so. */
9147 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9148 if (diff > 240 || (diff & 15) != 0)
9150 /* Ideally we'd determine what portion of the local stack frame
9151 (within the constraint of the lowest 240) is most heavily used.
9152 But without that complication, simply bias the frame pointer
9153 by 128 bytes so as to maximize the amount of the local stack
9154 frame that is addressable with 8-bit offsets. */
9155 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9160 /* This is semi-inlined memory_address_length, but simplified
9161 since we know that we're always dealing with reg+offset, and
9162 to avoid having to create and discard all that rtl. */
9165 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9171 /* EBP and R13 cannot be encoded without an offset. */
9172 len = (regno == BP_REG || regno == R13_REG);
9174 else if (IN_RANGE (offset, -128, 127))
9177 /* ESP and R12 must be encoded with a SIB byte. */
9178 if (regno == SP_REG || regno == R12_REG)
9184 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9185 The valid base registers are taken from CFUN->MACHINE->FS. */
9188 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9190 const struct machine_function *m = cfun->machine;
9191 rtx base_reg = NULL;
9192 HOST_WIDE_INT base_offset = 0;
9194 if (m->use_fast_prologue_epilogue)
9196 /* Choose the base register most likely to allow the most scheduling
9197 opportunities. Generally FP is valid througout the function,
9198 while DRAP must be reloaded within the epilogue. But choose either
9199 over the SP due to increased encoding size. */
9203 base_reg = hard_frame_pointer_rtx;
9204 base_offset = m->fs.fp_offset - cfa_offset;
9206 else if (m->fs.drap_valid)
9208 base_reg = crtl->drap_reg;
9209 base_offset = 0 - cfa_offset;
9211 else if (m->fs.sp_valid)
9213 base_reg = stack_pointer_rtx;
9214 base_offset = m->fs.sp_offset - cfa_offset;
9219 HOST_WIDE_INT toffset;
9222 /* Choose the base register with the smallest address encoding.
9223 With a tie, choose FP > DRAP > SP. */
9226 base_reg = stack_pointer_rtx;
9227 base_offset = m->fs.sp_offset - cfa_offset;
9228 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9230 if (m->fs.drap_valid)
9232 toffset = 0 - cfa_offset;
9233 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9236 base_reg = crtl->drap_reg;
9237 base_offset = toffset;
9243 toffset = m->fs.fp_offset - cfa_offset;
9244 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9247 base_reg = hard_frame_pointer_rtx;
9248 base_offset = toffset;
9253 gcc_assert (base_reg != NULL);
9255 return plus_constant (base_reg, base_offset);
9258 /* Emit code to save registers in the prologue. */
9261 ix86_emit_save_regs (void)
9266 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9267 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9269 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9270 RTX_FRAME_RELATED_P (insn) = 1;
9274 /* Emit a single register save at CFA - CFA_OFFSET. */
9277 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9278 HOST_WIDE_INT cfa_offset)
9280 struct machine_function *m = cfun->machine;
9281 rtx reg = gen_rtx_REG (mode, regno);
9282 rtx mem, addr, base, insn;
9284 addr = choose_baseaddr (cfa_offset);
9285 mem = gen_frame_mem (mode, addr);
9287 /* For SSE saves, we need to indicate the 128-bit alignment. */
9288 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9290 insn = emit_move_insn (mem, reg);
9291 RTX_FRAME_RELATED_P (insn) = 1;
9294 if (GET_CODE (base) == PLUS)
9295 base = XEXP (base, 0);
9296 gcc_checking_assert (REG_P (base));
9298 /* When saving registers into a re-aligned local stack frame, avoid
9299 any tricky guessing by dwarf2out. */
9300 if (m->fs.realigned)
9302 gcc_checking_assert (stack_realign_drap);
9304 if (regno == REGNO (crtl->drap_reg))
9306 /* A bit of a hack. We force the DRAP register to be saved in
9307 the re-aligned stack frame, which provides us with a copy
9308 of the CFA that will last past the prologue. Install it. */
9309 gcc_checking_assert (cfun->machine->fs.fp_valid);
9310 addr = plus_constant (hard_frame_pointer_rtx,
9311 cfun->machine->fs.fp_offset - cfa_offset);
9312 mem = gen_rtx_MEM (mode, addr);
9313 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9317 /* The frame pointer is a stable reference within the
9318 aligned frame. Use it. */
9319 gcc_checking_assert (cfun->machine->fs.fp_valid);
9320 addr = plus_constant (hard_frame_pointer_rtx,
9321 cfun->machine->fs.fp_offset - cfa_offset);
9322 mem = gen_rtx_MEM (mode, addr);
9323 add_reg_note (insn, REG_CFA_EXPRESSION,
9324 gen_rtx_SET (VOIDmode, mem, reg));
9328 /* The memory may not be relative to the current CFA register,
9329 which means that we may need to generate a new pattern for
9330 use by the unwind info. */
9331 else if (base != m->fs.cfa_reg)
9333 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9334 mem = gen_rtx_MEM (mode, addr);
9335 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9339 /* Emit code to save registers using MOV insns.
9340 First register is stored at CFA - CFA_OFFSET. */
9342 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9346 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9347 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9349 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9350 cfa_offset -= UNITS_PER_WORD;
9354 /* Emit code to save SSE registers using MOV insns.
9355 First register is stored at CFA - CFA_OFFSET. */
9357 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9361 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9362 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9364 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9369 static GTY(()) rtx queued_cfa_restores;
9371 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9372 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9373 Don't add the note if the previously saved value will be left untouched
9374 within stack red-zone till return, as unwinders can find the same value
9375 in the register and on the stack. */
9378 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9380 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9385 add_reg_note (insn, REG_CFA_RESTORE, reg);
9386 RTX_FRAME_RELATED_P (insn) = 1;
9390 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9393 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9396 ix86_add_queued_cfa_restore_notes (rtx insn)
9399 if (!queued_cfa_restores)
9401 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9403 XEXP (last, 1) = REG_NOTES (insn);
9404 REG_NOTES (insn) = queued_cfa_restores;
9405 queued_cfa_restores = NULL_RTX;
9406 RTX_FRAME_RELATED_P (insn) = 1;
9409 /* Expand prologue or epilogue stack adjustment.
9410 The pattern exist to put a dependency on all ebp-based memory accesses.
9411 STYLE should be negative if instructions should be marked as frame related,
9412 zero if %r11 register is live and cannot be freely used and positive
9416 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9417 int style, bool set_cfa)
9419 struct machine_function *m = cfun->machine;
9423 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9424 else if (x86_64_immediate_operand (offset, DImode))
9425 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9429 /* r11 is used by indirect sibcall return as well, set before the
9430 epilogue and used after the epilogue. */
9432 tmp = gen_rtx_REG (DImode, R11_REG);
9435 gcc_assert (src != hard_frame_pointer_rtx
9436 && dest != hard_frame_pointer_rtx);
9437 tmp = hard_frame_pointer_rtx;
9439 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9441 RTX_FRAME_RELATED_P (insn) = 1;
9443 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9446 insn = emit_insn (insn);
9448 ix86_add_queued_cfa_restore_notes (insn);
9454 gcc_assert (m->fs.cfa_reg == src);
9455 m->fs.cfa_offset += INTVAL (offset);
9456 m->fs.cfa_reg = dest;
9458 r = gen_rtx_PLUS (Pmode, src, offset);
9459 r = gen_rtx_SET (VOIDmode, dest, r);
9460 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9461 RTX_FRAME_RELATED_P (insn) = 1;
9464 RTX_FRAME_RELATED_P (insn) = 1;
9466 if (dest == stack_pointer_rtx)
9468 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9469 bool valid = m->fs.sp_valid;
9471 if (src == hard_frame_pointer_rtx)
9473 valid = m->fs.fp_valid;
9474 ooffset = m->fs.fp_offset;
9476 else if (src == crtl->drap_reg)
9478 valid = m->fs.drap_valid;
9483 /* Else there are two possibilities: SP itself, which we set
9484 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9485 taken care of this by hand along the eh_return path. */
9486 gcc_checking_assert (src == stack_pointer_rtx
9487 || offset == const0_rtx);
9490 m->fs.sp_offset = ooffset - INTVAL (offset);
9491 m->fs.sp_valid = valid;
9495 /* Find an available register to be used as dynamic realign argument
9496 pointer regsiter. Such a register will be written in prologue and
9497 used in begin of body, so it must not be
9498 1. parameter passing register.
9500 We reuse static-chain register if it is available. Otherwise, we
9501 use DI for i386 and R13 for x86-64. We chose R13 since it has
9504 Return: the regno of chosen register. */
9507 find_drap_reg (void)
9509 tree decl = cfun->decl;
9513 /* Use R13 for nested function or function need static chain.
9514 Since function with tail call may use any caller-saved
9515 registers in epilogue, DRAP must not use caller-saved
9516 register in such case. */
9517 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9524 /* Use DI for nested function or function need static chain.
9525 Since function with tail call may use any caller-saved
9526 registers in epilogue, DRAP must not use caller-saved
9527 register in such case. */
9528 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9531 /* Reuse static chain register if it isn't used for parameter
9533 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9534 && !lookup_attribute ("fastcall",
9535 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9536 && !lookup_attribute ("thiscall",
9537 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9544 /* Return minimum incoming stack alignment. */
9547 ix86_minimum_incoming_stack_boundary (bool sibcall)
9549 unsigned int incoming_stack_boundary;
9551 /* Prefer the one specified at command line. */
9552 if (ix86_user_incoming_stack_boundary)
9553 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9554 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9555 if -mstackrealign is used, it isn't used for sibcall check and
9556 estimated stack alignment is 128bit. */
9559 && ix86_force_align_arg_pointer
9560 && crtl->stack_alignment_estimated == 128)
9561 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9563 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9565 /* Incoming stack alignment can be changed on individual functions
9566 via force_align_arg_pointer attribute. We use the smallest
9567 incoming stack boundary. */
9568 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9569 && lookup_attribute (ix86_force_align_arg_pointer_string,
9570 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9571 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9573 /* The incoming stack frame has to be aligned at least at
9574 parm_stack_boundary. */
9575 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9576 incoming_stack_boundary = crtl->parm_stack_boundary;
9578 /* Stack at entrance of main is aligned by runtime. We use the
9579 smallest incoming stack boundary. */
9580 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9581 && DECL_NAME (current_function_decl)
9582 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9583 && DECL_FILE_SCOPE_P (current_function_decl))
9584 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9586 return incoming_stack_boundary;
9589 /* Update incoming stack boundary and estimated stack alignment. */
9592 ix86_update_stack_boundary (void)
9594 ix86_incoming_stack_boundary
9595 = ix86_minimum_incoming_stack_boundary (false);
9597 /* x86_64 vararg needs 16byte stack alignment for register save
9601 && crtl->stack_alignment_estimated < 128)
9602 crtl->stack_alignment_estimated = 128;
9605 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9606 needed or an rtx for DRAP otherwise. */
9609 ix86_get_drap_rtx (void)
9611 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9612 crtl->need_drap = true;
9614 if (stack_realign_drap)
9616 /* Assign DRAP to vDRAP and returns vDRAP */
9617 unsigned int regno = find_drap_reg ();
9622 arg_ptr = gen_rtx_REG (Pmode, regno);
9623 crtl->drap_reg = arg_ptr;
9626 drap_vreg = copy_to_reg (arg_ptr);
9630 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9633 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9634 RTX_FRAME_RELATED_P (insn) = 1;
9642 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9645 ix86_internal_arg_pointer (void)
9647 return virtual_incoming_args_rtx;
9650 struct scratch_reg {
9655 /* Return a short-lived scratch register for use on function entry.
9656 In 32-bit mode, it is valid only after the registers are saved
9657 in the prologue. This register must be released by means of
9658 release_scratch_register_on_entry once it is dead. */
9661 get_scratch_register_on_entry (struct scratch_reg *sr)
9669 /* We always use R11 in 64-bit mode. */
9674 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9676 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9677 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9678 int regparm = ix86_function_regparm (fntype, decl);
9680 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9682 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9683 for the static chain register. */
9684 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9685 && drap_regno != AX_REG)
9687 else if (regparm < 2 && drap_regno != DX_REG)
9689 /* ecx is the static chain register. */
9690 else if (regparm < 3 && !fastcall_p && !static_chain_p
9691 && drap_regno != CX_REG)
9693 else if (ix86_save_reg (BX_REG, true))
9695 /* esi is the static chain register. */
9696 else if (!(regparm == 3 && static_chain_p)
9697 && ix86_save_reg (SI_REG, true))
9699 else if (ix86_save_reg (DI_REG, true))
9703 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9708 sr->reg = gen_rtx_REG (Pmode, regno);
9711 rtx insn = emit_insn (gen_push (sr->reg));
9712 RTX_FRAME_RELATED_P (insn) = 1;
9716 /* Release a scratch register obtained from the preceding function. */
9719 release_scratch_register_on_entry (struct scratch_reg *sr)
9723 rtx x, insn = emit_insn (gen_pop (sr->reg));
9725 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9726 RTX_FRAME_RELATED_P (insn) = 1;
9727 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9728 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9729 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9733 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9735 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9738 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9740 /* We skip the probe for the first interval + a small dope of 4 words and
9741 probe that many bytes past the specified size to maintain a protection
9742 area at the botton of the stack. */
9743 const int dope = 4 * UNITS_PER_WORD;
9744 rtx size_rtx = GEN_INT (size);
9746 /* See if we have a constant small number of probes to generate. If so,
9747 that's the easy case. The run-time loop is made up of 11 insns in the
9748 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9749 for n # of intervals. */
9750 if (size <= 5 * PROBE_INTERVAL)
9752 HOST_WIDE_INT i, adjust;
9753 bool first_probe = true;
9755 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9756 values of N from 1 until it exceeds SIZE. If only one probe is
9757 needed, this will not generate any code. Then adjust and probe
9758 to PROBE_INTERVAL + SIZE. */
9759 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9763 adjust = 2 * PROBE_INTERVAL + dope;
9764 first_probe = false;
9767 adjust = PROBE_INTERVAL;
9769 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9770 plus_constant (stack_pointer_rtx, -adjust)));
9771 emit_stack_probe (stack_pointer_rtx);
9775 adjust = size + PROBE_INTERVAL + dope;
9777 adjust = size + PROBE_INTERVAL - i;
9779 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9780 plus_constant (stack_pointer_rtx, -adjust)));
9781 emit_stack_probe (stack_pointer_rtx);
9783 /* Adjust back to account for the additional first interval. */
9784 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9785 plus_constant (stack_pointer_rtx,
9786 PROBE_INTERVAL + dope)));
9789 /* Otherwise, do the same as above, but in a loop. Note that we must be
9790 extra careful with variables wrapping around because we might be at
9791 the very top (or the very bottom) of the address space and we have
9792 to be able to handle this case properly; in particular, we use an
9793 equality test for the loop condition. */
9796 HOST_WIDE_INT rounded_size;
9797 struct scratch_reg sr;
9799 get_scratch_register_on_entry (&sr);
9802 /* Step 1: round SIZE to the previous multiple of the interval. */
9804 rounded_size = size & -PROBE_INTERVAL;
9807 /* Step 2: compute initial and final value of the loop counter. */
9809 /* SP = SP_0 + PROBE_INTERVAL. */
9810 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9811 plus_constant (stack_pointer_rtx,
9812 - (PROBE_INTERVAL + dope))));
9814 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9815 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9816 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9817 gen_rtx_PLUS (Pmode, sr.reg,
9818 stack_pointer_rtx)));
9823 while (SP != LAST_ADDR)
9825 SP = SP + PROBE_INTERVAL
9829 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9830 values of N from 1 until it is equal to ROUNDED_SIZE. */
9832 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9835 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9836 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9838 if (size != rounded_size)
9840 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9841 plus_constant (stack_pointer_rtx,
9842 rounded_size - size)));
9843 emit_stack_probe (stack_pointer_rtx);
9846 /* Adjust back to account for the additional first interval. */
9847 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9848 plus_constant (stack_pointer_rtx,
9849 PROBE_INTERVAL + dope)));
9851 release_scratch_register_on_entry (&sr);
9854 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9855 cfun->machine->fs.sp_offset += size;
9857 /* Make sure nothing is scheduled before we are done. */
9858 emit_insn (gen_blockage ());
9861 /* Adjust the stack pointer up to REG while probing it. */
9864 output_adjust_stack_and_probe (rtx reg)
9866 static int labelno = 0;
9867 char loop_lab[32], end_lab[32];
9870 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9871 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9873 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9875 /* Jump to END_LAB if SP == LAST_ADDR. */
9876 xops[0] = stack_pointer_rtx;
9878 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9879 fputs ("\tje\t", asm_out_file);
9880 assemble_name_raw (asm_out_file, end_lab);
9881 fputc ('\n', asm_out_file);
9883 /* SP = SP + PROBE_INTERVAL. */
9884 xops[1] = GEN_INT (PROBE_INTERVAL);
9885 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9888 xops[1] = const0_rtx;
9889 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9891 fprintf (asm_out_file, "\tjmp\t");
9892 assemble_name_raw (asm_out_file, loop_lab);
9893 fputc ('\n', asm_out_file);
9895 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9900 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9901 inclusive. These are offsets from the current stack pointer. */
9904 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9906 /* See if we have a constant small number of probes to generate. If so,
9907 that's the easy case. The run-time loop is made up of 7 insns in the
9908 generic case while the compile-time loop is made up of n insns for n #
9910 if (size <= 7 * PROBE_INTERVAL)
9914 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9915 it exceeds SIZE. If only one probe is needed, this will not
9916 generate any code. Then probe at FIRST + SIZE. */
9917 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9918 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9920 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9923 /* Otherwise, do the same as above, but in a loop. Note that we must be
9924 extra careful with variables wrapping around because we might be at
9925 the very top (or the very bottom) of the address space and we have
9926 to be able to handle this case properly; in particular, we use an
9927 equality test for the loop condition. */
9930 HOST_WIDE_INT rounded_size, last;
9931 struct scratch_reg sr;
9933 get_scratch_register_on_entry (&sr);
9936 /* Step 1: round SIZE to the previous multiple of the interval. */
9938 rounded_size = size & -PROBE_INTERVAL;
9941 /* Step 2: compute initial and final value of the loop counter. */
9943 /* TEST_OFFSET = FIRST. */
9944 emit_move_insn (sr.reg, GEN_INT (-first));
9946 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9947 last = first + rounded_size;
9952 while (TEST_ADDR != LAST_ADDR)
9954 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9958 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9959 until it is equal to ROUNDED_SIZE. */
9961 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9964 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9965 that SIZE is equal to ROUNDED_SIZE. */
9967 if (size != rounded_size)
9968 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9971 rounded_size - size));
9973 release_scratch_register_on_entry (&sr);
9976 /* Make sure nothing is scheduled before we are done. */
9977 emit_insn (gen_blockage ());
9980 /* Probe a range of stack addresses from REG to END, inclusive. These are
9981 offsets from the current stack pointer. */
9984 output_probe_stack_range (rtx reg, rtx end)
9986 static int labelno = 0;
9987 char loop_lab[32], end_lab[32];
9990 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9991 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9993 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9995 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9998 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9999 fputs ("\tje\t", asm_out_file);
10000 assemble_name_raw (asm_out_file, end_lab);
10001 fputc ('\n', asm_out_file);
10003 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10004 xops[1] = GEN_INT (PROBE_INTERVAL);
10005 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10007 /* Probe at TEST_ADDR. */
10008 xops[0] = stack_pointer_rtx;
10010 xops[2] = const0_rtx;
10011 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10013 fprintf (asm_out_file, "\tjmp\t");
10014 assemble_name_raw (asm_out_file, loop_lab);
10015 fputc ('\n', asm_out_file);
10017 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10022 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10023 to be generated in correct form. */
10025 ix86_finalize_stack_realign_flags (void)
10027 /* Check if stack realign is really needed after reload, and
10028 stores result in cfun */
10029 unsigned int incoming_stack_boundary
10030 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10031 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10032 unsigned int stack_realign = (incoming_stack_boundary
10033 < (current_function_is_leaf
10034 ? crtl->max_used_stack_slot_alignment
10035 : crtl->stack_alignment_needed));
10037 if (crtl->stack_realign_finalized)
10039 /* After stack_realign_needed is finalized, we can't no longer
10041 gcc_assert (crtl->stack_realign_needed == stack_realign);
10045 crtl->stack_realign_needed = stack_realign;
10046 crtl->stack_realign_finalized = true;
10050 /* Expand the prologue into a bunch of separate insns. */
10053 ix86_expand_prologue (void)
10055 struct machine_function *m = cfun->machine;
10058 struct ix86_frame frame;
10059 HOST_WIDE_INT allocate;
10060 bool int_registers_saved;
10062 ix86_finalize_stack_realign_flags ();
10064 /* DRAP should not coexist with stack_realign_fp */
10065 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10067 memset (&m->fs, 0, sizeof (m->fs));
10069 /* Initialize CFA state for before the prologue. */
10070 m->fs.cfa_reg = stack_pointer_rtx;
10071 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10073 /* Track SP offset to the CFA. We continue tracking this after we've
10074 swapped the CFA register away from SP. In the case of re-alignment
10075 this is fudged; we're interested to offsets within the local frame. */
10076 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10077 m->fs.sp_valid = true;
10079 ix86_compute_frame_layout (&frame);
10081 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10083 /* We should have already generated an error for any use of
10084 ms_hook on a nested function. */
10085 gcc_checking_assert (!ix86_static_chain_on_stack);
10087 /* Check if profiling is active and we shall use profiling before
10088 prologue variant. If so sorry. */
10089 if (crtl->profile && flag_fentry != 0)
10090 sorry ("ms_hook_prologue attribute isn't compatible "
10091 "with -mfentry for 32-bit");
10093 /* In ix86_asm_output_function_label we emitted:
10094 8b ff movl.s %edi,%edi
10096 8b ec movl.s %esp,%ebp
10098 This matches the hookable function prologue in Win32 API
10099 functions in Microsoft Windows XP Service Pack 2 and newer.
10100 Wine uses this to enable Windows apps to hook the Win32 API
10101 functions provided by Wine.
10103 What that means is that we've already set up the frame pointer. */
10105 if (frame_pointer_needed
10106 && !(crtl->drap_reg && crtl->stack_realign_needed))
10110 /* We've decided to use the frame pointer already set up.
10111 Describe this to the unwinder by pretending that both
10112 push and mov insns happen right here.
10114 Putting the unwind info here at the end of the ms_hook
10115 is done so that we can make absolutely certain we get
10116 the required byte sequence at the start of the function,
10117 rather than relying on an assembler that can produce
10118 the exact encoding required.
10120 However it does mean (in the unpatched case) that we have
10121 a 1 insn window where the asynchronous unwind info is
10122 incorrect. However, if we placed the unwind info at
10123 its correct location we would have incorrect unwind info
10124 in the patched case. Which is probably all moot since
10125 I don't expect Wine generates dwarf2 unwind info for the
10126 system libraries that use this feature. */
10128 insn = emit_insn (gen_blockage ());
10130 push = gen_push (hard_frame_pointer_rtx);
10131 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10132 stack_pointer_rtx);
10133 RTX_FRAME_RELATED_P (push) = 1;
10134 RTX_FRAME_RELATED_P (mov) = 1;
10136 RTX_FRAME_RELATED_P (insn) = 1;
10137 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10138 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10140 /* Note that gen_push incremented m->fs.cfa_offset, even
10141 though we didn't emit the push insn here. */
10142 m->fs.cfa_reg = hard_frame_pointer_rtx;
10143 m->fs.fp_offset = m->fs.cfa_offset;
10144 m->fs.fp_valid = true;
10148 /* The frame pointer is not needed so pop %ebp again.
10149 This leaves us with a pristine state. */
10150 emit_insn (gen_pop (hard_frame_pointer_rtx));
10154 /* The first insn of a function that accepts its static chain on the
10155 stack is to push the register that would be filled in by a direct
10156 call. This insn will be skipped by the trampoline. */
10157 else if (ix86_static_chain_on_stack)
10159 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10160 emit_insn (gen_blockage ());
10162 /* We don't want to interpret this push insn as a register save,
10163 only as a stack adjustment. The real copy of the register as
10164 a save will be done later, if needed. */
10165 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10166 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10167 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10168 RTX_FRAME_RELATED_P (insn) = 1;
10171 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10172 of DRAP is needed and stack realignment is really needed after reload */
10173 if (stack_realign_drap)
10175 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10177 /* Only need to push parameter pointer reg if it is caller saved. */
10178 if (!call_used_regs[REGNO (crtl->drap_reg)])
10180 /* Push arg pointer reg */
10181 insn = emit_insn (gen_push (crtl->drap_reg));
10182 RTX_FRAME_RELATED_P (insn) = 1;
10185 /* Grab the argument pointer. */
10186 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10187 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10188 RTX_FRAME_RELATED_P (insn) = 1;
10189 m->fs.cfa_reg = crtl->drap_reg;
10190 m->fs.cfa_offset = 0;
10192 /* Align the stack. */
10193 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10195 GEN_INT (-align_bytes)));
10196 RTX_FRAME_RELATED_P (insn) = 1;
10198 /* Replicate the return address on the stack so that return
10199 address can be reached via (argp - 1) slot. This is needed
10200 to implement macro RETURN_ADDR_RTX and intrinsic function
10201 expand_builtin_return_addr etc. */
10202 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10203 t = gen_frame_mem (Pmode, t);
10204 insn = emit_insn (gen_push (t));
10205 RTX_FRAME_RELATED_P (insn) = 1;
10207 /* For the purposes of frame and register save area addressing,
10208 we've started over with a new frame. */
10209 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10210 m->fs.realigned = true;
10213 if (frame_pointer_needed && !m->fs.fp_valid)
10215 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10216 slower on all targets. Also sdb doesn't like it. */
10217 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10218 RTX_FRAME_RELATED_P (insn) = 1;
10220 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10222 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10223 RTX_FRAME_RELATED_P (insn) = 1;
10225 if (m->fs.cfa_reg == stack_pointer_rtx)
10226 m->fs.cfa_reg = hard_frame_pointer_rtx;
10227 m->fs.fp_offset = m->fs.sp_offset;
10228 m->fs.fp_valid = true;
10232 int_registers_saved = (frame.nregs == 0);
10234 if (!int_registers_saved)
10236 /* If saving registers via PUSH, do so now. */
10237 if (!frame.save_regs_using_mov)
10239 ix86_emit_save_regs ();
10240 int_registers_saved = true;
10241 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10244 /* When using red zone we may start register saving before allocating
10245 the stack frame saving one cycle of the prologue. However, avoid
10246 doing this if we have to probe the stack; at least on x86_64 the
10247 stack probe can turn into a call that clobbers a red zone location. */
10248 else if (ix86_using_red_zone ()
10249 && (! TARGET_STACK_PROBE
10250 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10252 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10253 int_registers_saved = true;
10257 if (stack_realign_fp)
10259 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10260 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10262 /* The computation of the size of the re-aligned stack frame means
10263 that we must allocate the size of the register save area before
10264 performing the actual alignment. Otherwise we cannot guarantee
10265 that there's enough storage above the realignment point. */
10266 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10267 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10268 GEN_INT (m->fs.sp_offset
10269 - frame.sse_reg_save_offset),
10272 /* Align the stack. */
10273 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10275 GEN_INT (-align_bytes)));
10277 /* For the purposes of register save area addressing, the stack
10278 pointer is no longer valid. As for the value of sp_offset,
10279 see ix86_compute_frame_layout, which we need to match in order
10280 to pass verification of stack_pointer_offset at the end. */
10281 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10282 m->fs.sp_valid = false;
10285 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10287 if (flag_stack_usage)
10289 /* We start to count from ARG_POINTER. */
10290 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10292 /* If it was realigned, take into account the fake frame. */
10293 if (stack_realign_drap)
10295 if (ix86_static_chain_on_stack)
10296 stack_size += UNITS_PER_WORD;
10298 if (!call_used_regs[REGNO (crtl->drap_reg)])
10299 stack_size += UNITS_PER_WORD;
10301 /* This over-estimates by 1 minimal-stack-alignment-unit but
10302 mitigates that by counting in the new return address slot. */
10303 current_function_dynamic_stack_size
10304 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10307 current_function_static_stack_size = stack_size;
10310 /* The stack has already been decremented by the instruction calling us
10311 so we need to probe unconditionally to preserve the protection area. */
10312 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10314 /* We expect the registers to be saved when probes are used. */
10315 gcc_assert (int_registers_saved);
10317 if (STACK_CHECK_MOVING_SP)
10319 ix86_adjust_stack_and_probe (allocate);
10324 HOST_WIDE_INT size = allocate;
10326 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10327 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10329 if (TARGET_STACK_PROBE)
10330 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10332 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10338 else if (!ix86_target_stack_probe ()
10339 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10341 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10342 GEN_INT (-allocate), -1,
10343 m->fs.cfa_reg == stack_pointer_rtx);
10347 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10349 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10351 bool eax_live = false;
10352 bool r10_live = false;
10355 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10356 if (!TARGET_64BIT_MS_ABI)
10357 eax_live = ix86_eax_live_at_start_p ();
10361 emit_insn (gen_push (eax));
10362 allocate -= UNITS_PER_WORD;
10366 r10 = gen_rtx_REG (Pmode, R10_REG);
10367 emit_insn (gen_push (r10));
10368 allocate -= UNITS_PER_WORD;
10371 emit_move_insn (eax, GEN_INT (allocate));
10372 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10374 /* Use the fact that AX still contains ALLOCATE. */
10375 adjust_stack_insn = (TARGET_64BIT
10376 ? gen_pro_epilogue_adjust_stack_di_sub
10377 : gen_pro_epilogue_adjust_stack_si_sub);
10379 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10380 stack_pointer_rtx, eax));
10382 /* Note that SEH directives need to continue tracking the stack
10383 pointer even after the frame pointer has been set up. */
10384 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10386 if (m->fs.cfa_reg == stack_pointer_rtx)
10387 m->fs.cfa_offset += allocate;
10389 RTX_FRAME_RELATED_P (insn) = 1;
10390 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10391 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10392 plus_constant (stack_pointer_rtx,
10395 m->fs.sp_offset += allocate;
10397 if (r10_live && eax_live)
10399 t = choose_baseaddr (m->fs.sp_offset - allocate);
10400 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10401 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10402 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10404 else if (eax_live || r10_live)
10406 t = choose_baseaddr (m->fs.sp_offset - allocate);
10407 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10410 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10412 /* If we havn't already set up the frame pointer, do so now. */
10413 if (frame_pointer_needed && !m->fs.fp_valid)
10415 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10416 GEN_INT (frame.stack_pointer_offset
10417 - frame.hard_frame_pointer_offset));
10418 insn = emit_insn (insn);
10419 RTX_FRAME_RELATED_P (insn) = 1;
10420 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10422 if (m->fs.cfa_reg == stack_pointer_rtx)
10423 m->fs.cfa_reg = hard_frame_pointer_rtx;
10424 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10425 m->fs.fp_valid = true;
10428 if (!int_registers_saved)
10429 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10430 if (frame.nsseregs)
10431 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10433 pic_reg_used = false;
10434 if (pic_offset_table_rtx
10435 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10438 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10440 if (alt_pic_reg_used != INVALID_REGNUM)
10441 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10443 pic_reg_used = true;
10450 if (ix86_cmodel == CM_LARGE_PIC)
10452 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10453 rtx label = gen_label_rtx ();
10454 emit_label (label);
10455 LABEL_PRESERVE_P (label) = 1;
10456 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10457 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10458 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10459 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10460 pic_offset_table_rtx, tmp_reg));
10463 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10466 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10469 /* In the pic_reg_used case, make sure that the got load isn't deleted
10470 when mcount needs it. Blockage to avoid call movement across mcount
10471 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10473 if (crtl->profile && !flag_fentry && pic_reg_used)
10474 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10476 if (crtl->drap_reg && !crtl->stack_realign_needed)
10478 /* vDRAP is setup but after reload it turns out stack realign
10479 isn't necessary, here we will emit prologue to setup DRAP
10480 without stack realign adjustment */
10481 t = choose_baseaddr (0);
10482 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10485 /* Prevent instructions from being scheduled into register save push
10486 sequence when access to the redzone area is done through frame pointer.
10487 The offset between the frame pointer and the stack pointer is calculated
10488 relative to the value of the stack pointer at the end of the function
10489 prologue, and moving instructions that access redzone area via frame
10490 pointer inside push sequence violates this assumption. */
10491 if (frame_pointer_needed && frame.red_zone_size)
10492 emit_insn (gen_memory_blockage ());
10494 /* Emit cld instruction if stringops are used in the function. */
10495 if (TARGET_CLD && ix86_current_function_needs_cld)
10496 emit_insn (gen_cld ());
10498 /* SEH requires that the prologue end within 256 bytes of the start of
10499 the function. Prevent instruction schedules that would extend that. */
10501 emit_insn (gen_blockage ());
10504 /* Emit code to restore REG using a POP insn. */
10507 ix86_emit_restore_reg_using_pop (rtx reg)
10509 struct machine_function *m = cfun->machine;
10510 rtx insn = emit_insn (gen_pop (reg));
10512 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10513 m->fs.sp_offset -= UNITS_PER_WORD;
10515 if (m->fs.cfa_reg == crtl->drap_reg
10516 && REGNO (reg) == REGNO (crtl->drap_reg))
10518 /* Previously we'd represented the CFA as an expression
10519 like *(%ebp - 8). We've just popped that value from
10520 the stack, which means we need to reset the CFA to
10521 the drap register. This will remain until we restore
10522 the stack pointer. */
10523 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10524 RTX_FRAME_RELATED_P (insn) = 1;
10526 /* This means that the DRAP register is valid for addressing too. */
10527 m->fs.drap_valid = true;
10531 if (m->fs.cfa_reg == stack_pointer_rtx)
10533 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10534 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10535 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10536 RTX_FRAME_RELATED_P (insn) = 1;
10538 m->fs.cfa_offset -= UNITS_PER_WORD;
10541 /* When the frame pointer is the CFA, and we pop it, we are
10542 swapping back to the stack pointer as the CFA. This happens
10543 for stack frames that don't allocate other data, so we assume
10544 the stack pointer is now pointing at the return address, i.e.
10545 the function entry state, which makes the offset be 1 word. */
10546 if (reg == hard_frame_pointer_rtx)
10548 m->fs.fp_valid = false;
10549 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10551 m->fs.cfa_reg = stack_pointer_rtx;
10552 m->fs.cfa_offset -= UNITS_PER_WORD;
10554 add_reg_note (insn, REG_CFA_DEF_CFA,
10555 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10556 GEN_INT (m->fs.cfa_offset)));
10557 RTX_FRAME_RELATED_P (insn) = 1;
10562 /* Emit code to restore saved registers using POP insns. */
10565 ix86_emit_restore_regs_using_pop (void)
10567 unsigned int regno;
10569 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10570 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10571 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10574 /* Emit code and notes for the LEAVE instruction. */
10577 ix86_emit_leave (void)
10579 struct machine_function *m = cfun->machine;
10580 rtx insn = emit_insn (ix86_gen_leave ());
10582 ix86_add_queued_cfa_restore_notes (insn);
10584 gcc_assert (m->fs.fp_valid);
10585 m->fs.sp_valid = true;
10586 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10587 m->fs.fp_valid = false;
10589 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10591 m->fs.cfa_reg = stack_pointer_rtx;
10592 m->fs.cfa_offset = m->fs.sp_offset;
10594 add_reg_note (insn, REG_CFA_DEF_CFA,
10595 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10596 RTX_FRAME_RELATED_P (insn) = 1;
10597 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10602 /* Emit code to restore saved registers using MOV insns.
10603 First register is restored from CFA - CFA_OFFSET. */
10605 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10606 int maybe_eh_return)
10608 struct machine_function *m = cfun->machine;
10609 unsigned int regno;
10611 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10612 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10614 rtx reg = gen_rtx_REG (Pmode, regno);
10617 mem = choose_baseaddr (cfa_offset);
10618 mem = gen_frame_mem (Pmode, mem);
10619 insn = emit_move_insn (reg, mem);
10621 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10623 /* Previously we'd represented the CFA as an expression
10624 like *(%ebp - 8). We've just popped that value from
10625 the stack, which means we need to reset the CFA to
10626 the drap register. This will remain until we restore
10627 the stack pointer. */
10628 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10629 RTX_FRAME_RELATED_P (insn) = 1;
10631 /* This means that the DRAP register is valid for addressing. */
10632 m->fs.drap_valid = true;
10635 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10637 cfa_offset -= UNITS_PER_WORD;
10641 /* Emit code to restore saved registers using MOV insns.
10642 First register is restored from CFA - CFA_OFFSET. */
10644 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10645 int maybe_eh_return)
10647 unsigned int regno;
10649 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10650 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10652 rtx reg = gen_rtx_REG (V4SFmode, regno);
10655 mem = choose_baseaddr (cfa_offset);
10656 mem = gen_rtx_MEM (V4SFmode, mem);
10657 set_mem_align (mem, 128);
10658 emit_move_insn (reg, mem);
10660 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10666 /* Restore function stack, frame, and registers. */
10669 ix86_expand_epilogue (int style)
10671 struct machine_function *m = cfun->machine;
10672 struct machine_frame_state frame_state_save = m->fs;
10673 struct ix86_frame frame;
10674 bool restore_regs_via_mov;
10677 ix86_finalize_stack_realign_flags ();
10678 ix86_compute_frame_layout (&frame);
10680 m->fs.sp_valid = (!frame_pointer_needed
10681 || (current_function_sp_is_unchanging
10682 && !stack_realign_fp));
10683 gcc_assert (!m->fs.sp_valid
10684 || m->fs.sp_offset == frame.stack_pointer_offset);
10686 /* The FP must be valid if the frame pointer is present. */
10687 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10688 gcc_assert (!m->fs.fp_valid
10689 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10691 /* We must have *some* valid pointer to the stack frame. */
10692 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10694 /* The DRAP is never valid at this point. */
10695 gcc_assert (!m->fs.drap_valid);
10697 /* See the comment about red zone and frame
10698 pointer usage in ix86_expand_prologue. */
10699 if (frame_pointer_needed && frame.red_zone_size)
10700 emit_insn (gen_memory_blockage ());
10702 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10703 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10705 /* Determine the CFA offset of the end of the red-zone. */
10706 m->fs.red_zone_offset = 0;
10707 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10709 /* The red-zone begins below the return address. */
10710 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10712 /* When the register save area is in the aligned portion of
10713 the stack, determine the maximum runtime displacement that
10714 matches up with the aligned frame. */
10715 if (stack_realign_drap)
10716 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10720 /* Special care must be taken for the normal return case of a function
10721 using eh_return: the eax and edx registers are marked as saved, but
10722 not restored along this path. Adjust the save location to match. */
10723 if (crtl->calls_eh_return && style != 2)
10724 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10726 /* EH_RETURN requires the use of moves to function properly. */
10727 if (crtl->calls_eh_return)
10728 restore_regs_via_mov = true;
10729 /* SEH requires the use of pops to identify the epilogue. */
10730 else if (TARGET_SEH)
10731 restore_regs_via_mov = false;
10732 /* If we're only restoring one register and sp is not valid then
10733 using a move instruction to restore the register since it's
10734 less work than reloading sp and popping the register. */
10735 else if (!m->fs.sp_valid && frame.nregs <= 1)
10736 restore_regs_via_mov = true;
10737 else if (TARGET_EPILOGUE_USING_MOVE
10738 && cfun->machine->use_fast_prologue_epilogue
10739 && (frame.nregs > 1
10740 || m->fs.sp_offset != frame.reg_save_offset))
10741 restore_regs_via_mov = true;
10742 else if (frame_pointer_needed
10744 && m->fs.sp_offset != frame.reg_save_offset)
10745 restore_regs_via_mov = true;
10746 else if (frame_pointer_needed
10747 && TARGET_USE_LEAVE
10748 && cfun->machine->use_fast_prologue_epilogue
10749 && frame.nregs == 1)
10750 restore_regs_via_mov = true;
10752 restore_regs_via_mov = false;
10754 if (restore_regs_via_mov || frame.nsseregs)
10756 /* Ensure that the entire register save area is addressable via
10757 the stack pointer, if we will restore via sp. */
10759 && m->fs.sp_offset > 0x7fffffff
10760 && !(m->fs.fp_valid || m->fs.drap_valid)
10761 && (frame.nsseregs + frame.nregs) != 0)
10763 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10764 GEN_INT (m->fs.sp_offset
10765 - frame.sse_reg_save_offset),
10767 m->fs.cfa_reg == stack_pointer_rtx);
10771 /* If there are any SSE registers to restore, then we have to do it
10772 via moves, since there's obviously no pop for SSE regs. */
10773 if (frame.nsseregs)
10774 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10777 if (restore_regs_via_mov)
10782 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10784 /* eh_return epilogues need %ecx added to the stack pointer. */
10787 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10789 /* Stack align doesn't work with eh_return. */
10790 gcc_assert (!stack_realign_drap);
10791 /* Neither does regparm nested functions. */
10792 gcc_assert (!ix86_static_chain_on_stack);
10794 if (frame_pointer_needed)
10796 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10797 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10798 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10800 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10801 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10803 /* Note that we use SA as a temporary CFA, as the return
10804 address is at the proper place relative to it. We
10805 pretend this happens at the FP restore insn because
10806 prior to this insn the FP would be stored at the wrong
10807 offset relative to SA, and after this insn we have no
10808 other reasonable register to use for the CFA. We don't
10809 bother resetting the CFA to the SP for the duration of
10810 the return insn. */
10811 add_reg_note (insn, REG_CFA_DEF_CFA,
10812 plus_constant (sa, UNITS_PER_WORD));
10813 ix86_add_queued_cfa_restore_notes (insn);
10814 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10815 RTX_FRAME_RELATED_P (insn) = 1;
10817 m->fs.cfa_reg = sa;
10818 m->fs.cfa_offset = UNITS_PER_WORD;
10819 m->fs.fp_valid = false;
10821 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10822 const0_rtx, style, false);
10826 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10827 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10828 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10829 ix86_add_queued_cfa_restore_notes (insn);
10831 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10832 if (m->fs.cfa_offset != UNITS_PER_WORD)
10834 m->fs.cfa_offset = UNITS_PER_WORD;
10835 add_reg_note (insn, REG_CFA_DEF_CFA,
10836 plus_constant (stack_pointer_rtx,
10838 RTX_FRAME_RELATED_P (insn) = 1;
10841 m->fs.sp_offset = UNITS_PER_WORD;
10842 m->fs.sp_valid = true;
10847 /* SEH requires that the function end with (1) a stack adjustment
10848 if necessary, (2) a sequence of pops, and (3) a return or
10849 jump instruction. Prevent insns from the function body from
10850 being scheduled into this sequence. */
10853 /* Prevent a catch region from being adjacent to the standard
10854 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10855 several other flags that would be interesting to test are
10857 if (flag_non_call_exceptions)
10858 emit_insn (gen_nops (const1_rtx));
10860 emit_insn (gen_blockage ());
10863 /* First step is to deallocate the stack frame so that we can
10864 pop the registers. */
10865 if (!m->fs.sp_valid)
10867 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10868 GEN_INT (m->fs.fp_offset
10869 - frame.reg_save_offset),
10872 else if (m->fs.sp_offset != frame.reg_save_offset)
10874 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10875 GEN_INT (m->fs.sp_offset
10876 - frame.reg_save_offset),
10878 m->fs.cfa_reg == stack_pointer_rtx);
10881 ix86_emit_restore_regs_using_pop ();
10884 /* If we used a stack pointer and haven't already got rid of it,
10886 if (m->fs.fp_valid)
10888 /* If the stack pointer is valid and pointing at the frame
10889 pointer store address, then we only need a pop. */
10890 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10891 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10892 /* Leave results in shorter dependency chains on CPUs that are
10893 able to grok it fast. */
10894 else if (TARGET_USE_LEAVE
10895 || optimize_function_for_size_p (cfun)
10896 || !cfun->machine->use_fast_prologue_epilogue)
10897 ix86_emit_leave ();
10900 pro_epilogue_adjust_stack (stack_pointer_rtx,
10901 hard_frame_pointer_rtx,
10902 const0_rtx, style, !using_drap);
10903 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10909 int param_ptr_offset = UNITS_PER_WORD;
10912 gcc_assert (stack_realign_drap);
10914 if (ix86_static_chain_on_stack)
10915 param_ptr_offset += UNITS_PER_WORD;
10916 if (!call_used_regs[REGNO (crtl->drap_reg)])
10917 param_ptr_offset += UNITS_PER_WORD;
10919 insn = emit_insn (gen_rtx_SET
10920 (VOIDmode, stack_pointer_rtx,
10921 gen_rtx_PLUS (Pmode,
10923 GEN_INT (-param_ptr_offset))));
10924 m->fs.cfa_reg = stack_pointer_rtx;
10925 m->fs.cfa_offset = param_ptr_offset;
10926 m->fs.sp_offset = param_ptr_offset;
10927 m->fs.realigned = false;
10929 add_reg_note (insn, REG_CFA_DEF_CFA,
10930 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10931 GEN_INT (param_ptr_offset)));
10932 RTX_FRAME_RELATED_P (insn) = 1;
10934 if (!call_used_regs[REGNO (crtl->drap_reg)])
10935 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10938 /* At this point the stack pointer must be valid, and we must have
10939 restored all of the registers. We may not have deallocated the
10940 entire stack frame. We've delayed this until now because it may
10941 be possible to merge the local stack deallocation with the
10942 deallocation forced by ix86_static_chain_on_stack. */
10943 gcc_assert (m->fs.sp_valid);
10944 gcc_assert (!m->fs.fp_valid);
10945 gcc_assert (!m->fs.realigned);
10946 if (m->fs.sp_offset != UNITS_PER_WORD)
10948 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10949 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10953 /* Sibcall epilogues don't want a return instruction. */
10956 m->fs = frame_state_save;
10960 /* Emit vzeroupper if needed. */
10961 if (TARGET_VZEROUPPER
10962 && cfun->machine->use_avx256_p
10963 && !cfun->machine->caller_return_avx256_p)
10965 cfun->machine->use_vzeroupper_p = 1;
10966 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10969 if (crtl->args.pops_args && crtl->args.size)
10971 rtx popc = GEN_INT (crtl->args.pops_args);
10973 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10974 address, do explicit add, and jump indirectly to the caller. */
10976 if (crtl->args.pops_args >= 65536)
10978 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10981 /* There is no "pascal" calling convention in any 64bit ABI. */
10982 gcc_assert (!TARGET_64BIT);
10984 insn = emit_insn (gen_pop (ecx));
10985 m->fs.cfa_offset -= UNITS_PER_WORD;
10986 m->fs.sp_offset -= UNITS_PER_WORD;
10988 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10989 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10990 add_reg_note (insn, REG_CFA_REGISTER,
10991 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10992 RTX_FRAME_RELATED_P (insn) = 1;
10994 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10996 emit_jump_insn (gen_return_indirect_internal (ecx));
10999 emit_jump_insn (gen_return_pop_internal (popc));
11002 emit_jump_insn (gen_return_internal ());
11004 /* Restore the state back to the state from the prologue,
11005 so that it's correct for the next epilogue. */
11006 m->fs = frame_state_save;
11009 /* Reset from the function's potential modifications. */
11012 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11013 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11015 if (pic_offset_table_rtx)
11016 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11018 /* Mach-O doesn't support labels at the end of objects, so if
11019 it looks like we might want one, insert a NOP. */
11021 rtx insn = get_last_insn ();
11024 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11025 insn = PREV_INSN (insn);
11029 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11030 fputs ("\tnop\n", file);
11036 /* Return a scratch register to use in the split stack prologue. The
11037 split stack prologue is used for -fsplit-stack. It is the first
11038 instructions in the function, even before the regular prologue.
11039 The scratch register can be any caller-saved register which is not
11040 used for parameters or for the static chain. */
11042 static unsigned int
11043 split_stack_prologue_scratch_regno (void)
11052 is_fastcall = (lookup_attribute ("fastcall",
11053 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11055 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11059 if (DECL_STATIC_CHAIN (cfun->decl))
11061 sorry ("-fsplit-stack does not support fastcall with "
11062 "nested function");
11063 return INVALID_REGNUM;
11067 else if (regparm < 3)
11069 if (!DECL_STATIC_CHAIN (cfun->decl))
11075 sorry ("-fsplit-stack does not support 2 register "
11076 " parameters for a nested function");
11077 return INVALID_REGNUM;
11084 /* FIXME: We could make this work by pushing a register
11085 around the addition and comparison. */
11086 sorry ("-fsplit-stack does not support 3 register parameters");
11087 return INVALID_REGNUM;
11092 /* A SYMBOL_REF for the function which allocates new stackspace for
11095 static GTY(()) rtx split_stack_fn;
11097 /* Handle -fsplit-stack. These are the first instructions in the
11098 function, even before the regular prologue. */
11101 ix86_expand_split_stack_prologue (void)
11103 struct ix86_frame frame;
11104 HOST_WIDE_INT allocate;
11106 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11107 rtx scratch_reg = NULL_RTX;
11108 rtx varargs_label = NULL_RTX;
11110 gcc_assert (flag_split_stack && reload_completed);
11112 ix86_finalize_stack_realign_flags ();
11113 ix86_compute_frame_layout (&frame);
11114 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11116 /* This is the label we will branch to if we have enough stack
11117 space. We expect the basic block reordering pass to reverse this
11118 branch if optimizing, so that we branch in the unlikely case. */
11119 label = gen_label_rtx ();
11121 /* We need to compare the stack pointer minus the frame size with
11122 the stack boundary in the TCB. The stack boundary always gives
11123 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11124 can compare directly. Otherwise we need to do an addition. */
11126 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11127 UNSPEC_STACK_CHECK);
11128 limit = gen_rtx_CONST (Pmode, limit);
11129 limit = gen_rtx_MEM (Pmode, limit);
11130 if (allocate < SPLIT_STACK_AVAILABLE)
11131 current = stack_pointer_rtx;
11134 unsigned int scratch_regno;
11137 /* We need a scratch register to hold the stack pointer minus
11138 the required frame size. Since this is the very start of the
11139 function, the scratch register can be any caller-saved
11140 register which is not used for parameters. */
11141 offset = GEN_INT (- allocate);
11142 scratch_regno = split_stack_prologue_scratch_regno ();
11143 if (scratch_regno == INVALID_REGNUM)
11145 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11146 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11148 /* We don't use ix86_gen_add3 in this case because it will
11149 want to split to lea, but when not optimizing the insn
11150 will not be split after this point. */
11151 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11152 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11157 emit_move_insn (scratch_reg, offset);
11158 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11159 stack_pointer_rtx));
11161 current = scratch_reg;
11164 ix86_expand_branch (GEU, current, limit, label);
11165 jump_insn = get_last_insn ();
11166 JUMP_LABEL (jump_insn) = label;
11168 /* Mark the jump as very likely to be taken. */
11169 add_reg_note (jump_insn, REG_BR_PROB,
11170 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11172 /* Get more stack space. We pass in the desired stack space and the
11173 size of the arguments to copy to the new stack. In 32-bit mode
11174 we push the parameters; __morestack will return on a new stack
11175 anyhow. In 64-bit mode we pass the parameters in r10 and
11177 allocate_rtx = GEN_INT (allocate);
11178 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11179 call_fusage = NULL_RTX;
11184 reg = gen_rtx_REG (Pmode, R10_REG);
11186 /* If this function uses a static chain, it will be in %r10.
11187 Preserve it across the call to __morestack. */
11188 if (DECL_STATIC_CHAIN (cfun->decl))
11192 rax = gen_rtx_REG (Pmode, AX_REG);
11193 emit_move_insn (rax, reg);
11194 use_reg (&call_fusage, rax);
11197 emit_move_insn (reg, allocate_rtx);
11198 use_reg (&call_fusage, reg);
11199 reg = gen_rtx_REG (Pmode, R11_REG);
11200 emit_move_insn (reg, GEN_INT (args_size));
11201 use_reg (&call_fusage, reg);
11205 emit_insn (gen_push (GEN_INT (args_size)));
11206 emit_insn (gen_push (allocate_rtx));
11208 if (split_stack_fn == NULL_RTX)
11209 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11210 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
11211 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11213 add_function_usage_to (call_insn, call_fusage);
11215 /* In order to make call/return prediction work right, we now need
11216 to execute a return instruction. See
11217 libgcc/config/i386/morestack.S for the details on how this works.
11219 For flow purposes gcc must not see this as a return
11220 instruction--we need control flow to continue at the subsequent
11221 label. Therefore, we use an unspec. */
11222 gcc_assert (crtl->args.pops_args < 65536);
11223 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11225 /* If we are in 64-bit mode and this function uses a static chain,
11226 we saved %r10 in %rax before calling _morestack. */
11227 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11228 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11229 gen_rtx_REG (Pmode, AX_REG));
11231 /* If this function calls va_start, we need to store a pointer to
11232 the arguments on the old stack, because they may not have been
11233 all copied to the new stack. At this point the old stack can be
11234 found at the frame pointer value used by __morestack, because
11235 __morestack has set that up before calling back to us. Here we
11236 store that pointer in a scratch register, and in
11237 ix86_expand_prologue we store the scratch register in a stack
11239 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11241 unsigned int scratch_regno;
11245 scratch_regno = split_stack_prologue_scratch_regno ();
11246 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11247 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11251 return address within this function
11252 return address of caller of this function
11254 So we add three words to get to the stack arguments.
11258 return address within this function
11259 first argument to __morestack
11260 second argument to __morestack
11261 return address of caller of this function
11263 So we add five words to get to the stack arguments.
11265 words = TARGET_64BIT ? 3 : 5;
11266 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11267 gen_rtx_PLUS (Pmode, frame_reg,
11268 GEN_INT (words * UNITS_PER_WORD))));
11270 varargs_label = gen_label_rtx ();
11271 emit_jump_insn (gen_jump (varargs_label));
11272 JUMP_LABEL (get_last_insn ()) = varargs_label;
11277 emit_label (label);
11278 LABEL_NUSES (label) = 1;
11280 /* If this function calls va_start, we now have to set the scratch
11281 register for the case where we do not call __morestack. In this
11282 case we need to set it based on the stack pointer. */
11283 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11285 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11286 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11287 GEN_INT (UNITS_PER_WORD))));
11289 emit_label (varargs_label);
11290 LABEL_NUSES (varargs_label) = 1;
11294 /* We may have to tell the dataflow pass that the split stack prologue
11295 is initializing a scratch register. */
11298 ix86_live_on_entry (bitmap regs)
11300 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11302 gcc_assert (flag_split_stack);
11303 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11307 /* Extract the parts of an RTL expression that is a valid memory address
11308 for an instruction. Return 0 if the structure of the address is
11309 grossly off. Return -1 if the address contains ASHIFT, so it is not
11310 strictly valid, but still used for computing length of lea instruction. */
11313 ix86_decompose_address (rtx addr, struct ix86_address *out)
11315 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11316 rtx base_reg, index_reg;
11317 HOST_WIDE_INT scale = 1;
11318 rtx scale_rtx = NULL_RTX;
11321 enum ix86_address_seg seg = SEG_DEFAULT;
11323 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11325 else if (GET_CODE (addr) == PLUS)
11327 rtx addends[4], op;
11335 addends[n++] = XEXP (op, 1);
11338 while (GET_CODE (op) == PLUS);
11343 for (i = n; i >= 0; --i)
11346 switch (GET_CODE (op))
11351 index = XEXP (op, 0);
11352 scale_rtx = XEXP (op, 1);
11358 index = XEXP (op, 0);
11359 tmp = XEXP (op, 1);
11360 if (!CONST_INT_P (tmp))
11362 scale = INTVAL (tmp);
11363 if ((unsigned HOST_WIDE_INT) scale > 3)
11365 scale = 1 << scale;
11369 if (XINT (op, 1) == UNSPEC_TP
11370 && TARGET_TLS_DIRECT_SEG_REFS
11371 && seg == SEG_DEFAULT)
11372 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11401 else if (GET_CODE (addr) == MULT)
11403 index = XEXP (addr, 0); /* index*scale */
11404 scale_rtx = XEXP (addr, 1);
11406 else if (GET_CODE (addr) == ASHIFT)
11408 /* We're called for lea too, which implements ashift on occasion. */
11409 index = XEXP (addr, 0);
11410 tmp = XEXP (addr, 1);
11411 if (!CONST_INT_P (tmp))
11413 scale = INTVAL (tmp);
11414 if ((unsigned HOST_WIDE_INT) scale > 3)
11416 scale = 1 << scale;
11420 disp = addr; /* displacement */
11422 /* Extract the integral value of scale. */
11425 if (!CONST_INT_P (scale_rtx))
11427 scale = INTVAL (scale_rtx);
11430 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11431 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11433 /* Avoid useless 0 displacement. */
11434 if (disp == const0_rtx && (base || index))
11437 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11438 if (base_reg && index_reg && scale == 1
11439 && (index_reg == arg_pointer_rtx
11440 || index_reg == frame_pointer_rtx
11441 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11444 tmp = base, base = index, index = tmp;
11445 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11448 /* Special case: %ebp cannot be encoded as a base without a displacement.
11452 && (base_reg == hard_frame_pointer_rtx
11453 || base_reg == frame_pointer_rtx
11454 || base_reg == arg_pointer_rtx
11455 || (REG_P (base_reg)
11456 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11457 || REGNO (base_reg) == R13_REG))))
11460 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11461 Avoid this by transforming to [%esi+0].
11462 Reload calls address legitimization without cfun defined, so we need
11463 to test cfun for being non-NULL. */
11464 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11465 && base_reg && !index_reg && !disp
11466 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11469 /* Special case: encode reg+reg instead of reg*2. */
11470 if (!base && index && scale == 2)
11471 base = index, base_reg = index_reg, scale = 1;
11473 /* Special case: scaling cannot be encoded without base or displacement. */
11474 if (!base && !disp && index && scale != 1)
11478 out->index = index;
11480 out->scale = scale;
11486 /* Return cost of the memory address x.
11487 For i386, it is better to use a complex address than let gcc copy
11488 the address into a reg and make a new pseudo. But not if the address
11489 requires to two regs - that would mean more pseudos with longer
11492 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11494 struct ix86_address parts;
11496 int ok = ix86_decompose_address (x, &parts);
11500 if (parts.base && GET_CODE (parts.base) == SUBREG)
11501 parts.base = SUBREG_REG (parts.base);
11502 if (parts.index && GET_CODE (parts.index) == SUBREG)
11503 parts.index = SUBREG_REG (parts.index);
11505 /* Attempt to minimize number of registers in the address. */
11507 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11509 && (!REG_P (parts.index)
11510 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11514 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11516 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11517 && parts.base != parts.index)
11520 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11521 since it's predecode logic can't detect the length of instructions
11522 and it degenerates to vector decoded. Increase cost of such
11523 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11524 to split such addresses or even refuse such addresses at all.
11526 Following addressing modes are affected:
11531 The first and last case may be avoidable by explicitly coding the zero in
11532 memory address, but I don't have AMD-K6 machine handy to check this
11536 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11537 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11538 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11544 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11545 this is used for to form addresses to local data when -fPIC is in
11549 darwin_local_data_pic (rtx disp)
11551 return (GET_CODE (disp) == UNSPEC
11552 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11555 /* Determine if a given RTX is a valid constant. We already know this
11556 satisfies CONSTANT_P. */
11559 legitimate_constant_p (rtx x)
11561 switch (GET_CODE (x))
11566 if (GET_CODE (x) == PLUS)
11568 if (!CONST_INT_P (XEXP (x, 1)))
11573 if (TARGET_MACHO && darwin_local_data_pic (x))
11576 /* Only some unspecs are valid as "constants". */
11577 if (GET_CODE (x) == UNSPEC)
11578 switch (XINT (x, 1))
11581 case UNSPEC_GOTOFF:
11582 case UNSPEC_PLTOFF:
11583 return TARGET_64BIT;
11585 case UNSPEC_NTPOFF:
11586 x = XVECEXP (x, 0, 0);
11587 return (GET_CODE (x) == SYMBOL_REF
11588 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11589 case UNSPEC_DTPOFF:
11590 x = XVECEXP (x, 0, 0);
11591 return (GET_CODE (x) == SYMBOL_REF
11592 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11597 /* We must have drilled down to a symbol. */
11598 if (GET_CODE (x) == LABEL_REF)
11600 if (GET_CODE (x) != SYMBOL_REF)
11605 /* TLS symbols are never valid. */
11606 if (SYMBOL_REF_TLS_MODEL (x))
11609 /* DLLIMPORT symbols are never valid. */
11610 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11611 && SYMBOL_REF_DLLIMPORT_P (x))
11615 /* mdynamic-no-pic */
11616 if (MACHO_DYNAMIC_NO_PIC_P)
11617 return machopic_symbol_defined_p (x);
11622 if (GET_MODE (x) == TImode
11623 && x != CONST0_RTX (TImode)
11629 if (!standard_sse_constant_p (x))
11636 /* Otherwise we handle everything else in the move patterns. */
11640 /* Determine if it's legal to put X into the constant pool. This
11641 is not possible for the address of thread-local symbols, which
11642 is checked above. */
11645 ix86_cannot_force_const_mem (rtx x)
11647 /* We can always put integral constants and vectors in memory. */
11648 switch (GET_CODE (x))
11658 return !legitimate_constant_p (x);
11662 /* Nonzero if the constant value X is a legitimate general operand
11663 when generating PIC code. It is given that flag_pic is on and
11664 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11667 legitimate_pic_operand_p (rtx x)
11671 switch (GET_CODE (x))
11674 inner = XEXP (x, 0);
11675 if (GET_CODE (inner) == PLUS
11676 && CONST_INT_P (XEXP (inner, 1)))
11677 inner = XEXP (inner, 0);
11679 /* Only some unspecs are valid as "constants". */
11680 if (GET_CODE (inner) == UNSPEC)
11681 switch (XINT (inner, 1))
11684 case UNSPEC_GOTOFF:
11685 case UNSPEC_PLTOFF:
11686 return TARGET_64BIT;
11688 x = XVECEXP (inner, 0, 0);
11689 return (GET_CODE (x) == SYMBOL_REF
11690 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11691 case UNSPEC_MACHOPIC_OFFSET:
11692 return legitimate_pic_address_disp_p (x);
11700 return legitimate_pic_address_disp_p (x);
11707 /* Determine if a given CONST RTX is a valid memory displacement
11711 legitimate_pic_address_disp_p (rtx disp)
11715 /* In 64bit mode we can allow direct addresses of symbols and labels
11716 when they are not dynamic symbols. */
11719 rtx op0 = disp, op1;
11721 switch (GET_CODE (disp))
11727 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11729 op0 = XEXP (XEXP (disp, 0), 0);
11730 op1 = XEXP (XEXP (disp, 0), 1);
11731 if (!CONST_INT_P (op1)
11732 || INTVAL (op1) >= 16*1024*1024
11733 || INTVAL (op1) < -16*1024*1024)
11735 if (GET_CODE (op0) == LABEL_REF)
11737 if (GET_CODE (op0) != SYMBOL_REF)
11742 /* TLS references should always be enclosed in UNSPEC. */
11743 if (SYMBOL_REF_TLS_MODEL (op0))
11745 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11746 && ix86_cmodel != CM_LARGE_PIC)
11754 if (GET_CODE (disp) != CONST)
11756 disp = XEXP (disp, 0);
11760 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11761 of GOT tables. We should not need these anyway. */
11762 if (GET_CODE (disp) != UNSPEC
11763 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11764 && XINT (disp, 1) != UNSPEC_GOTOFF
11765 && XINT (disp, 1) != UNSPEC_PLTOFF))
11768 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11769 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11775 if (GET_CODE (disp) == PLUS)
11777 if (!CONST_INT_P (XEXP (disp, 1)))
11779 disp = XEXP (disp, 0);
11783 if (TARGET_MACHO && darwin_local_data_pic (disp))
11786 if (GET_CODE (disp) != UNSPEC)
11789 switch (XINT (disp, 1))
11794 /* We need to check for both symbols and labels because VxWorks loads
11795 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11797 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11798 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11799 case UNSPEC_GOTOFF:
11800 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11801 While ABI specify also 32bit relocation but we don't produce it in
11802 small PIC model at all. */
11803 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11804 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11806 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11808 case UNSPEC_GOTTPOFF:
11809 case UNSPEC_GOTNTPOFF:
11810 case UNSPEC_INDNTPOFF:
11813 disp = XVECEXP (disp, 0, 0);
11814 return (GET_CODE (disp) == SYMBOL_REF
11815 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11816 case UNSPEC_NTPOFF:
11817 disp = XVECEXP (disp, 0, 0);
11818 return (GET_CODE (disp) == SYMBOL_REF
11819 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11820 case UNSPEC_DTPOFF:
11821 disp = XVECEXP (disp, 0, 0);
11822 return (GET_CODE (disp) == SYMBOL_REF
11823 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11829 /* Recognizes RTL expressions that are valid memory addresses for an
11830 instruction. The MODE argument is the machine mode for the MEM
11831 expression that wants to use this address.
11833 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11834 convert common non-canonical forms to canonical form so that they will
11838 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11839 rtx addr, bool strict)
11841 struct ix86_address parts;
11842 rtx base, index, disp;
11843 HOST_WIDE_INT scale;
11845 if (ix86_decompose_address (addr, &parts) <= 0)
11846 /* Decomposition failed. */
11850 index = parts.index;
11852 scale = parts.scale;
11854 /* Validate base register.
11856 Don't allow SUBREG's that span more than a word here. It can lead to spill
11857 failures when the base is one word out of a two word structure, which is
11858 represented internally as a DImode int. */
11866 else if (GET_CODE (base) == SUBREG
11867 && REG_P (SUBREG_REG (base))
11868 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11870 reg = SUBREG_REG (base);
11872 /* Base is not a register. */
11875 if (GET_MODE (base) != Pmode)
11876 /* Base is not in Pmode. */
11879 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11880 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11881 /* Base is not valid. */
11885 /* Validate index register.
11887 Don't allow SUBREG's that span more than a word here -- same as above. */
11895 else if (GET_CODE (index) == SUBREG
11896 && REG_P (SUBREG_REG (index))
11897 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11899 reg = SUBREG_REG (index);
11901 /* Index is not a register. */
11904 if (GET_MODE (index) != Pmode)
11905 /* Index is not in Pmode. */
11908 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11909 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11910 /* Index is not valid. */
11914 /* Validate scale factor. */
11918 /* Scale without index. */
11921 if (scale != 2 && scale != 4 && scale != 8)
11922 /* Scale is not a valid multiplier. */
11926 /* Validate displacement. */
11929 if (GET_CODE (disp) == CONST
11930 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11931 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11932 switch (XINT (XEXP (disp, 0), 1))
11934 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11935 used. While ABI specify also 32bit relocations, we don't produce
11936 them at all and use IP relative instead. */
11938 case UNSPEC_GOTOFF:
11939 gcc_assert (flag_pic);
11941 goto is_legitimate_pic;
11943 /* 64bit address unspec. */
11946 case UNSPEC_GOTPCREL:
11947 gcc_assert (flag_pic);
11948 goto is_legitimate_pic;
11950 case UNSPEC_GOTTPOFF:
11951 case UNSPEC_GOTNTPOFF:
11952 case UNSPEC_INDNTPOFF:
11953 case UNSPEC_NTPOFF:
11954 case UNSPEC_DTPOFF:
11957 case UNSPEC_STACK_CHECK:
11958 gcc_assert (flag_split_stack);
11962 /* Invalid address unspec. */
11966 else if (SYMBOLIC_CONST (disp)
11970 && MACHOPIC_INDIRECT
11971 && !machopic_operand_p (disp)
11977 if (TARGET_64BIT && (index || base))
11979 /* foo@dtpoff(%rX) is ok. */
11980 if (GET_CODE (disp) != CONST
11981 || GET_CODE (XEXP (disp, 0)) != PLUS
11982 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11983 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11984 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11985 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11986 /* Non-constant pic memory reference. */
11989 else if ((!TARGET_MACHO || flag_pic)
11990 && ! legitimate_pic_address_disp_p (disp))
11991 /* Displacement is an invalid pic construct. */
11994 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
11995 /* displacment must be referenced via non_lazy_pointer */
11999 /* This code used to verify that a symbolic pic displacement
12000 includes the pic_offset_table_rtx register.
12002 While this is good idea, unfortunately these constructs may
12003 be created by "adds using lea" optimization for incorrect
12012 This code is nonsensical, but results in addressing
12013 GOT table with pic_offset_table_rtx base. We can't
12014 just refuse it easily, since it gets matched by
12015 "addsi3" pattern, that later gets split to lea in the
12016 case output register differs from input. While this
12017 can be handled by separate addsi pattern for this case
12018 that never results in lea, this seems to be easier and
12019 correct fix for crash to disable this test. */
12021 else if (GET_CODE (disp) != LABEL_REF
12022 && !CONST_INT_P (disp)
12023 && (GET_CODE (disp) != CONST
12024 || !legitimate_constant_p (disp))
12025 && (GET_CODE (disp) != SYMBOL_REF
12026 || !legitimate_constant_p (disp)))
12027 /* Displacement is not constant. */
12029 else if (TARGET_64BIT
12030 && !x86_64_immediate_operand (disp, VOIDmode))
12031 /* Displacement is out of range. */
12035 /* Everything looks valid. */
12039 /* Determine if a given RTX is a valid constant address. */
12042 constant_address_p (rtx x)
12044 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12047 /* Return a unique alias set for the GOT. */
12049 static alias_set_type
12050 ix86_GOT_alias_set (void)
12052 static alias_set_type set = -1;
12054 set = new_alias_set ();
12058 /* Return a legitimate reference for ORIG (an address) using the
12059 register REG. If REG is 0, a new pseudo is generated.
12061 There are two types of references that must be handled:
12063 1. Global data references must load the address from the GOT, via
12064 the PIC reg. An insn is emitted to do this load, and the reg is
12067 2. Static data references, constant pool addresses, and code labels
12068 compute the address as an offset from the GOT, whose base is in
12069 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12070 differentiate them from global data objects. The returned
12071 address is the PIC reg + an unspec constant.
12073 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12074 reg also appears in the address. */
12077 legitimize_pic_address (rtx orig, rtx reg)
12080 rtx new_rtx = orig;
12084 if (TARGET_MACHO && !TARGET_64BIT)
12087 reg = gen_reg_rtx (Pmode);
12088 /* Use the generic Mach-O PIC machinery. */
12089 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12093 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12095 else if (TARGET_64BIT
12096 && ix86_cmodel != CM_SMALL_PIC
12097 && gotoff_operand (addr, Pmode))
12100 /* This symbol may be referenced via a displacement from the PIC
12101 base address (@GOTOFF). */
12103 if (reload_in_progress)
12104 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12105 if (GET_CODE (addr) == CONST)
12106 addr = XEXP (addr, 0);
12107 if (GET_CODE (addr) == PLUS)
12109 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12111 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12114 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12115 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12117 tmpreg = gen_reg_rtx (Pmode);
12120 emit_move_insn (tmpreg, new_rtx);
12124 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12125 tmpreg, 1, OPTAB_DIRECT);
12128 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12130 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12132 /* This symbol may be referenced via a displacement from the PIC
12133 base address (@GOTOFF). */
12135 if (reload_in_progress)
12136 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12137 if (GET_CODE (addr) == CONST)
12138 addr = XEXP (addr, 0);
12139 if (GET_CODE (addr) == PLUS)
12141 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12143 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12146 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12147 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12148 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12152 emit_move_insn (reg, new_rtx);
12156 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12157 /* We can't use @GOTOFF for text labels on VxWorks;
12158 see gotoff_operand. */
12159 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12161 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12163 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12164 return legitimize_dllimport_symbol (addr, true);
12165 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12166 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12167 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12169 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12170 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12174 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12177 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12178 new_rtx = gen_const_mem (Pmode, new_rtx);
12179 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12182 reg = gen_reg_rtx (Pmode);
12183 /* Use directly gen_movsi, otherwise the address is loaded
12184 into register for CSE. We don't want to CSE this addresses,
12185 instead we CSE addresses from the GOT table, so skip this. */
12186 emit_insn (gen_movsi (reg, new_rtx));
12191 /* This symbol must be referenced via a load from the
12192 Global Offset Table (@GOT). */
12194 if (reload_in_progress)
12195 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12196 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12197 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12199 new_rtx = force_reg (Pmode, new_rtx);
12200 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12201 new_rtx = gen_const_mem (Pmode, new_rtx);
12202 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12205 reg = gen_reg_rtx (Pmode);
12206 emit_move_insn (reg, new_rtx);
12212 if (CONST_INT_P (addr)
12213 && !x86_64_immediate_operand (addr, VOIDmode))
12217 emit_move_insn (reg, addr);
12221 new_rtx = force_reg (Pmode, addr);
12223 else if (GET_CODE (addr) == CONST)
12225 addr = XEXP (addr, 0);
12227 /* We must match stuff we generate before. Assume the only
12228 unspecs that can get here are ours. Not that we could do
12229 anything with them anyway.... */
12230 if (GET_CODE (addr) == UNSPEC
12231 || (GET_CODE (addr) == PLUS
12232 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12234 gcc_assert (GET_CODE (addr) == PLUS);
12236 if (GET_CODE (addr) == PLUS)
12238 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12240 /* Check first to see if this is a constant offset from a @GOTOFF
12241 symbol reference. */
12242 if (gotoff_operand (op0, Pmode)
12243 && CONST_INT_P (op1))
12247 if (reload_in_progress)
12248 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12249 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12251 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12252 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12253 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12257 emit_move_insn (reg, new_rtx);
12263 if (INTVAL (op1) < -16*1024*1024
12264 || INTVAL (op1) >= 16*1024*1024)
12266 if (!x86_64_immediate_operand (op1, Pmode))
12267 op1 = force_reg (Pmode, op1);
12268 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12274 base = legitimize_pic_address (XEXP (addr, 0), reg);
12275 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12276 base == reg ? NULL_RTX : reg);
12278 if (CONST_INT_P (new_rtx))
12279 new_rtx = plus_constant (base, INTVAL (new_rtx));
12282 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12284 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12285 new_rtx = XEXP (new_rtx, 1);
12287 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12295 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12298 get_thread_pointer (int to_reg)
12302 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12306 reg = gen_reg_rtx (Pmode);
12307 insn = gen_rtx_SET (VOIDmode, reg, tp);
12308 insn = emit_insn (insn);
12313 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12314 false if we expect this to be used for a memory address and true if
12315 we expect to load the address into a register. */
12318 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12320 rtx dest, base, off, pic, tp;
12325 case TLS_MODEL_GLOBAL_DYNAMIC:
12326 dest = gen_reg_rtx (Pmode);
12327 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12329 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12331 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12334 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12335 insns = get_insns ();
12338 RTL_CONST_CALL_P (insns) = 1;
12339 emit_libcall_block (insns, dest, rax, x);
12341 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12342 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12344 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12346 if (TARGET_GNU2_TLS)
12348 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12350 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12354 case TLS_MODEL_LOCAL_DYNAMIC:
12355 base = gen_reg_rtx (Pmode);
12356 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12358 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12360 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12363 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12364 insns = get_insns ();
12367 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12368 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12369 RTL_CONST_CALL_P (insns) = 1;
12370 emit_libcall_block (insns, base, rax, note);
12372 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12373 emit_insn (gen_tls_local_dynamic_base_64 (base));
12375 emit_insn (gen_tls_local_dynamic_base_32 (base));
12377 if (TARGET_GNU2_TLS)
12379 rtx x = ix86_tls_module_base ();
12381 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12382 gen_rtx_MINUS (Pmode, x, tp));
12385 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12386 off = gen_rtx_CONST (Pmode, off);
12388 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12390 if (TARGET_GNU2_TLS)
12392 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12394 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12399 case TLS_MODEL_INITIAL_EXEC:
12403 type = UNSPEC_GOTNTPOFF;
12407 if (reload_in_progress)
12408 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12409 pic = pic_offset_table_rtx;
12410 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12412 else if (!TARGET_ANY_GNU_TLS)
12414 pic = gen_reg_rtx (Pmode);
12415 emit_insn (gen_set_got (pic));
12416 type = UNSPEC_GOTTPOFF;
12421 type = UNSPEC_INDNTPOFF;
12424 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12425 off = gen_rtx_CONST (Pmode, off);
12427 off = gen_rtx_PLUS (Pmode, pic, off);
12428 off = gen_const_mem (Pmode, off);
12429 set_mem_alias_set (off, ix86_GOT_alias_set ());
12431 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12433 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12434 off = force_reg (Pmode, off);
12435 return gen_rtx_PLUS (Pmode, base, off);
12439 base = get_thread_pointer (true);
12440 dest = gen_reg_rtx (Pmode);
12441 emit_insn (gen_subsi3 (dest, base, off));
12445 case TLS_MODEL_LOCAL_EXEC:
12446 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12447 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12448 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12449 off = gen_rtx_CONST (Pmode, off);
12451 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12453 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12454 return gen_rtx_PLUS (Pmode, base, off);
12458 base = get_thread_pointer (true);
12459 dest = gen_reg_rtx (Pmode);
12460 emit_insn (gen_subsi3 (dest, base, off));
12465 gcc_unreachable ();
12471 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12474 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12475 htab_t dllimport_map;
12478 get_dllimport_decl (tree decl)
12480 struct tree_map *h, in;
12483 const char *prefix;
12484 size_t namelen, prefixlen;
12489 if (!dllimport_map)
12490 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12492 in.hash = htab_hash_pointer (decl);
12493 in.base.from = decl;
12494 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12495 h = (struct tree_map *) *loc;
12499 *loc = h = ggc_alloc_tree_map ();
12501 h->base.from = decl;
12502 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12503 VAR_DECL, NULL, ptr_type_node);
12504 DECL_ARTIFICIAL (to) = 1;
12505 DECL_IGNORED_P (to) = 1;
12506 DECL_EXTERNAL (to) = 1;
12507 TREE_READONLY (to) = 1;
12509 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12510 name = targetm.strip_name_encoding (name);
12511 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12512 ? "*__imp_" : "*__imp__";
12513 namelen = strlen (name);
12514 prefixlen = strlen (prefix);
12515 imp_name = (char *) alloca (namelen + prefixlen + 1);
12516 memcpy (imp_name, prefix, prefixlen);
12517 memcpy (imp_name + prefixlen, name, namelen + 1);
12519 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12520 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12521 SET_SYMBOL_REF_DECL (rtl, to);
12522 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12524 rtl = gen_const_mem (Pmode, rtl);
12525 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12527 SET_DECL_RTL (to, rtl);
12528 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12533 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12534 true if we require the result be a register. */
12537 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12542 gcc_assert (SYMBOL_REF_DECL (symbol));
12543 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12545 x = DECL_RTL (imp_decl);
12547 x = force_reg (Pmode, x);
12551 /* Try machine-dependent ways of modifying an illegitimate address
12552 to be legitimate. If we find one, return the new, valid address.
12553 This macro is used in only one place: `memory_address' in explow.c.
12555 OLDX is the address as it was before break_out_memory_refs was called.
12556 In some cases it is useful to look at this to decide what needs to be done.
12558 It is always safe for this macro to do nothing. It exists to recognize
12559 opportunities to optimize the output.
12561 For the 80386, we handle X+REG by loading X into a register R and
12562 using R+REG. R will go in a general reg and indexing will be used.
12563 However, if REG is a broken-out memory address or multiplication,
12564 nothing needs to be done because REG can certainly go in a general reg.
12566 When -fpic is used, special handling is needed for symbolic references.
12567 See comments by legitimize_pic_address in i386.c for details. */
12570 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12571 enum machine_mode mode)
12576 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12578 return legitimize_tls_address (x, (enum tls_model) log, false);
12579 if (GET_CODE (x) == CONST
12580 && GET_CODE (XEXP (x, 0)) == PLUS
12581 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12582 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12584 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12585 (enum tls_model) log, false);
12586 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12589 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12591 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12592 return legitimize_dllimport_symbol (x, true);
12593 if (GET_CODE (x) == CONST
12594 && GET_CODE (XEXP (x, 0)) == PLUS
12595 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12596 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12598 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12599 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12603 if (flag_pic && SYMBOLIC_CONST (x))
12604 return legitimize_pic_address (x, 0);
12607 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12608 return machopic_indirect_data_reference (x, 0);
12611 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12612 if (GET_CODE (x) == ASHIFT
12613 && CONST_INT_P (XEXP (x, 1))
12614 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12617 log = INTVAL (XEXP (x, 1));
12618 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12619 GEN_INT (1 << log));
12622 if (GET_CODE (x) == PLUS)
12624 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12626 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12627 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12628 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12631 log = INTVAL (XEXP (XEXP (x, 0), 1));
12632 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12633 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12634 GEN_INT (1 << log));
12637 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12638 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12639 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12642 log = INTVAL (XEXP (XEXP (x, 1), 1));
12643 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12644 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12645 GEN_INT (1 << log));
12648 /* Put multiply first if it isn't already. */
12649 if (GET_CODE (XEXP (x, 1)) == MULT)
12651 rtx tmp = XEXP (x, 0);
12652 XEXP (x, 0) = XEXP (x, 1);
12657 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12658 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12659 created by virtual register instantiation, register elimination, and
12660 similar optimizations. */
12661 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12664 x = gen_rtx_PLUS (Pmode,
12665 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12666 XEXP (XEXP (x, 1), 0)),
12667 XEXP (XEXP (x, 1), 1));
12671 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12672 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12673 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12674 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12675 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12676 && CONSTANT_P (XEXP (x, 1)))
12679 rtx other = NULL_RTX;
12681 if (CONST_INT_P (XEXP (x, 1)))
12683 constant = XEXP (x, 1);
12684 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12686 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12688 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12689 other = XEXP (x, 1);
12697 x = gen_rtx_PLUS (Pmode,
12698 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12699 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12700 plus_constant (other, INTVAL (constant)));
12704 if (changed && ix86_legitimate_address_p (mode, x, false))
12707 if (GET_CODE (XEXP (x, 0)) == MULT)
12710 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12713 if (GET_CODE (XEXP (x, 1)) == MULT)
12716 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12720 && REG_P (XEXP (x, 1))
12721 && REG_P (XEXP (x, 0)))
12724 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12727 x = legitimize_pic_address (x, 0);
12730 if (changed && ix86_legitimate_address_p (mode, x, false))
12733 if (REG_P (XEXP (x, 0)))
12735 rtx temp = gen_reg_rtx (Pmode);
12736 rtx val = force_operand (XEXP (x, 1), temp);
12738 emit_move_insn (temp, val);
12740 XEXP (x, 1) = temp;
12744 else if (REG_P (XEXP (x, 1)))
12746 rtx temp = gen_reg_rtx (Pmode);
12747 rtx val = force_operand (XEXP (x, 0), temp);
12749 emit_move_insn (temp, val);
12751 XEXP (x, 0) = temp;
12759 /* Print an integer constant expression in assembler syntax. Addition
12760 and subtraction are the only arithmetic that may appear in these
12761 expressions. FILE is the stdio stream to write to, X is the rtx, and
12762 CODE is the operand print code from the output string. */
12765 output_pic_addr_const (FILE *file, rtx x, int code)
12769 switch (GET_CODE (x))
12772 gcc_assert (flag_pic);
12777 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12778 output_addr_const (file, x);
12781 const char *name = XSTR (x, 0);
12783 /* Mark the decl as referenced so that cgraph will
12784 output the function. */
12785 if (SYMBOL_REF_DECL (x))
12786 mark_decl_referenced (SYMBOL_REF_DECL (x));
12789 if (MACHOPIC_INDIRECT
12790 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12791 name = machopic_indirection_name (x, /*stub_p=*/true);
12793 assemble_name (file, name);
12795 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12796 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12797 fputs ("@PLT", file);
12804 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12805 assemble_name (asm_out_file, buf);
12809 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12813 /* This used to output parentheses around the expression,
12814 but that does not work on the 386 (either ATT or BSD assembler). */
12815 output_pic_addr_const (file, XEXP (x, 0), code);
12819 if (GET_MODE (x) == VOIDmode)
12821 /* We can use %d if the number is <32 bits and positive. */
12822 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12823 fprintf (file, "0x%lx%08lx",
12824 (unsigned long) CONST_DOUBLE_HIGH (x),
12825 (unsigned long) CONST_DOUBLE_LOW (x));
12827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12830 /* We can't handle floating point constants;
12831 TARGET_PRINT_OPERAND must handle them. */
12832 output_operand_lossage ("floating constant misused");
12836 /* Some assemblers need integer constants to appear first. */
12837 if (CONST_INT_P (XEXP (x, 0)))
12839 output_pic_addr_const (file, XEXP (x, 0), code);
12841 output_pic_addr_const (file, XEXP (x, 1), code);
12845 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12846 output_pic_addr_const (file, XEXP (x, 1), code);
12848 output_pic_addr_const (file, XEXP (x, 0), code);
12854 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12855 output_pic_addr_const (file, XEXP (x, 0), code);
12857 output_pic_addr_const (file, XEXP (x, 1), code);
12859 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12863 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12865 bool f = i386_asm_output_addr_const_extra (file, x);
12870 gcc_assert (XVECLEN (x, 0) == 1);
12871 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12872 switch (XINT (x, 1))
12875 fputs ("@GOT", file);
12877 case UNSPEC_GOTOFF:
12878 fputs ("@GOTOFF", file);
12880 case UNSPEC_PLTOFF:
12881 fputs ("@PLTOFF", file);
12883 case UNSPEC_GOTPCREL:
12884 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12885 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12887 case UNSPEC_GOTTPOFF:
12888 /* FIXME: This might be @TPOFF in Sun ld too. */
12889 fputs ("@gottpoff", file);
12892 fputs ("@tpoff", file);
12894 case UNSPEC_NTPOFF:
12896 fputs ("@tpoff", file);
12898 fputs ("@ntpoff", file);
12900 case UNSPEC_DTPOFF:
12901 fputs ("@dtpoff", file);
12903 case UNSPEC_GOTNTPOFF:
12905 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12906 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12908 fputs ("@gotntpoff", file);
12910 case UNSPEC_INDNTPOFF:
12911 fputs ("@indntpoff", file);
12914 case UNSPEC_MACHOPIC_OFFSET:
12916 machopic_output_function_base_name (file);
12920 output_operand_lossage ("invalid UNSPEC as operand");
12926 output_operand_lossage ("invalid expression as operand");
12930 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12931 We need to emit DTP-relative relocations. */
12933 static void ATTRIBUTE_UNUSED
12934 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12936 fputs (ASM_LONG, file);
12937 output_addr_const (file, x);
12938 fputs ("@dtpoff", file);
12944 fputs (", 0", file);
12947 gcc_unreachable ();
12951 /* Return true if X is a representation of the PIC register. This copes
12952 with calls from ix86_find_base_term, where the register might have
12953 been replaced by a cselib value. */
12956 ix86_pic_register_p (rtx x)
12958 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12959 return (pic_offset_table_rtx
12960 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12962 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12965 /* Helper function for ix86_delegitimize_address.
12966 Attempt to delegitimize TLS local-exec accesses. */
12969 ix86_delegitimize_tls_address (rtx orig_x)
12971 rtx x = orig_x, unspec;
12972 struct ix86_address addr;
12974 if (!TARGET_TLS_DIRECT_SEG_REFS)
12978 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12980 if (ix86_decompose_address (x, &addr) == 0
12981 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12982 || addr.disp == NULL_RTX
12983 || GET_CODE (addr.disp) != CONST)
12985 unspec = XEXP (addr.disp, 0);
12986 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12987 unspec = XEXP (unspec, 0);
12988 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12990 x = XVECEXP (unspec, 0, 0);
12991 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12992 if (unspec != XEXP (addr.disp, 0))
12993 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12996 rtx idx = addr.index;
12997 if (addr.scale != 1)
12998 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12999 x = gen_rtx_PLUS (Pmode, idx, x);
13002 x = gen_rtx_PLUS (Pmode, addr.base, x);
13003 if (MEM_P (orig_x))
13004 x = replace_equiv_address_nv (orig_x, x);
13008 /* In the name of slightly smaller debug output, and to cater to
13009 general assembler lossage, recognize PIC+GOTOFF and turn it back
13010 into a direct symbol reference.
13012 On Darwin, this is necessary to avoid a crash, because Darwin
13013 has a different PIC label for each routine but the DWARF debugging
13014 information is not associated with any particular routine, so it's
13015 necessary to remove references to the PIC label from RTL stored by
13016 the DWARF output code. */
13019 ix86_delegitimize_address (rtx x)
13021 rtx orig_x = delegitimize_mem_from_attrs (x);
13022 /* addend is NULL or some rtx if x is something+GOTOFF where
13023 something doesn't include the PIC register. */
13024 rtx addend = NULL_RTX;
13025 /* reg_addend is NULL or a multiple of some register. */
13026 rtx reg_addend = NULL_RTX;
13027 /* const_addend is NULL or a const_int. */
13028 rtx const_addend = NULL_RTX;
13029 /* This is the result, or NULL. */
13030 rtx result = NULL_RTX;
13039 if (GET_CODE (x) != CONST
13040 || GET_CODE (XEXP (x, 0)) != UNSPEC
13041 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13042 || !MEM_P (orig_x))
13043 return ix86_delegitimize_tls_address (orig_x);
13044 x = XVECEXP (XEXP (x, 0), 0, 0);
13045 if (GET_MODE (orig_x) != Pmode)
13046 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13050 if (GET_CODE (x) != PLUS
13051 || GET_CODE (XEXP (x, 1)) != CONST)
13052 return ix86_delegitimize_tls_address (orig_x);
13054 if (ix86_pic_register_p (XEXP (x, 0)))
13055 /* %ebx + GOT/GOTOFF */
13057 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13059 /* %ebx + %reg * scale + GOT/GOTOFF */
13060 reg_addend = XEXP (x, 0);
13061 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13062 reg_addend = XEXP (reg_addend, 1);
13063 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13064 reg_addend = XEXP (reg_addend, 0);
13067 reg_addend = NULL_RTX;
13068 addend = XEXP (x, 0);
13072 addend = XEXP (x, 0);
13074 x = XEXP (XEXP (x, 1), 0);
13075 if (GET_CODE (x) == PLUS
13076 && CONST_INT_P (XEXP (x, 1)))
13078 const_addend = XEXP (x, 1);
13082 if (GET_CODE (x) == UNSPEC
13083 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13084 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13085 result = XVECEXP (x, 0, 0);
13087 if (TARGET_MACHO && darwin_local_data_pic (x)
13088 && !MEM_P (orig_x))
13089 result = XVECEXP (x, 0, 0);
13092 return ix86_delegitimize_tls_address (orig_x);
13095 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13097 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13100 /* If the rest of original X doesn't involve the PIC register, add
13101 addend and subtract pic_offset_table_rtx. This can happen e.g.
13103 leal (%ebx, %ecx, 4), %ecx
13105 movl foo@GOTOFF(%ecx), %edx
13106 in which case we return (%ecx - %ebx) + foo. */
13107 if (pic_offset_table_rtx)
13108 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13109 pic_offset_table_rtx),
13114 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13115 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13119 /* If X is a machine specific address (i.e. a symbol or label being
13120 referenced as a displacement from the GOT implemented using an
13121 UNSPEC), then return the base term. Otherwise return X. */
13124 ix86_find_base_term (rtx x)
13130 if (GET_CODE (x) != CONST)
13132 term = XEXP (x, 0);
13133 if (GET_CODE (term) == PLUS
13134 && (CONST_INT_P (XEXP (term, 1))
13135 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13136 term = XEXP (term, 0);
13137 if (GET_CODE (term) != UNSPEC
13138 || XINT (term, 1) != UNSPEC_GOTPCREL)
13141 return XVECEXP (term, 0, 0);
13144 return ix86_delegitimize_address (x);
13148 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13149 int fp, FILE *file)
13151 const char *suffix;
13153 if (mode == CCFPmode || mode == CCFPUmode)
13155 code = ix86_fp_compare_code_to_integer (code);
13159 code = reverse_condition (code);
13210 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13214 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13215 Those same assemblers have the same but opposite lossage on cmov. */
13216 if (mode == CCmode)
13217 suffix = fp ? "nbe" : "a";
13218 else if (mode == CCCmode)
13221 gcc_unreachable ();
13237 gcc_unreachable ();
13241 gcc_assert (mode == CCmode || mode == CCCmode);
13258 gcc_unreachable ();
13262 /* ??? As above. */
13263 gcc_assert (mode == CCmode || mode == CCCmode);
13264 suffix = fp ? "nb" : "ae";
13267 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13271 /* ??? As above. */
13272 if (mode == CCmode)
13274 else if (mode == CCCmode)
13275 suffix = fp ? "nb" : "ae";
13277 gcc_unreachable ();
13280 suffix = fp ? "u" : "p";
13283 suffix = fp ? "nu" : "np";
13286 gcc_unreachable ();
13288 fputs (suffix, file);
13291 /* Print the name of register X to FILE based on its machine mode and number.
13292 If CODE is 'w', pretend the mode is HImode.
13293 If CODE is 'b', pretend the mode is QImode.
13294 If CODE is 'k', pretend the mode is SImode.
13295 If CODE is 'q', pretend the mode is DImode.
13296 If CODE is 'x', pretend the mode is V4SFmode.
13297 If CODE is 't', pretend the mode is V8SFmode.
13298 If CODE is 'h', pretend the reg is the 'high' byte register.
13299 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13300 If CODE is 'd', duplicate the operand for AVX instruction.
13304 print_reg (rtx x, int code, FILE *file)
13307 bool duplicated = code == 'd' && TARGET_AVX;
13309 gcc_assert (x == pc_rtx
13310 || (REGNO (x) != ARG_POINTER_REGNUM
13311 && REGNO (x) != FRAME_POINTER_REGNUM
13312 && REGNO (x) != FLAGS_REG
13313 && REGNO (x) != FPSR_REG
13314 && REGNO (x) != FPCR_REG));
13316 if (ASSEMBLER_DIALECT == ASM_ATT)
13321 gcc_assert (TARGET_64BIT);
13322 fputs ("rip", file);
13326 if (code == 'w' || MMX_REG_P (x))
13328 else if (code == 'b')
13330 else if (code == 'k')
13332 else if (code == 'q')
13334 else if (code == 'y')
13336 else if (code == 'h')
13338 else if (code == 'x')
13340 else if (code == 't')
13343 code = GET_MODE_SIZE (GET_MODE (x));
13345 /* Irritatingly, AMD extended registers use different naming convention
13346 from the normal registers. */
13347 if (REX_INT_REG_P (x))
13349 gcc_assert (TARGET_64BIT);
13353 error ("extended registers have no high halves");
13356 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13359 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13362 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13365 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13368 error ("unsupported operand size for extended register");
13378 if (STACK_TOP_P (x))
13387 if (! ANY_FP_REG_P (x))
13388 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13393 reg = hi_reg_name[REGNO (x)];
13396 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13398 reg = qi_reg_name[REGNO (x)];
13401 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13403 reg = qi_high_reg_name[REGNO (x)];
13408 gcc_assert (!duplicated);
13410 fputs (hi_reg_name[REGNO (x)] + 1, file);
13415 gcc_unreachable ();
13421 if (ASSEMBLER_DIALECT == ASM_ATT)
13422 fprintf (file, ", %%%s", reg);
13424 fprintf (file, ", %s", reg);
13428 /* Locate some local-dynamic symbol still in use by this function
13429 so that we can print its name in some tls_local_dynamic_base
13433 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13437 if (GET_CODE (x) == SYMBOL_REF
13438 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13440 cfun->machine->some_ld_name = XSTR (x, 0);
13447 static const char *
13448 get_some_local_dynamic_name (void)
13452 if (cfun->machine->some_ld_name)
13453 return cfun->machine->some_ld_name;
13455 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13456 if (NONDEBUG_INSN_P (insn)
13457 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13458 return cfun->machine->some_ld_name;
13463 /* Meaning of CODE:
13464 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13465 C -- print opcode suffix for set/cmov insn.
13466 c -- like C, but print reversed condition
13467 F,f -- likewise, but for floating-point.
13468 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13470 R -- print the prefix for register names.
13471 z -- print the opcode suffix for the size of the current operand.
13472 Z -- likewise, with special suffixes for x87 instructions.
13473 * -- print a star (in certain assembler syntax)
13474 A -- print an absolute memory reference.
13475 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13476 s -- print a shift double count, followed by the assemblers argument
13478 b -- print the QImode name of the register for the indicated operand.
13479 %b0 would print %al if operands[0] is reg 0.
13480 w -- likewise, print the HImode name of the register.
13481 k -- likewise, print the SImode name of the register.
13482 q -- likewise, print the DImode name of the register.
13483 x -- likewise, print the V4SFmode name of the register.
13484 t -- likewise, print the V8SFmode name of the register.
13485 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13486 y -- print "st(0)" instead of "st" as a register.
13487 d -- print duplicated register operand for AVX instruction.
13488 D -- print condition for SSE cmp instruction.
13489 P -- if PIC, print an @PLT suffix.
13490 X -- don't print any sort of PIC '@' suffix for a symbol.
13491 & -- print some in-use local-dynamic symbol name.
13492 H -- print a memory address offset by 8; used for sse high-parts
13493 Y -- print condition for XOP pcom* instruction.
13494 + -- print a branch hint as 'cs' or 'ds' prefix
13495 ; -- print a semicolon (after prefixes due to bug in older gas).
13496 @ -- print a segment register of thread base pointer load
13500 ix86_print_operand (FILE *file, rtx x, int code)
13507 if (ASSEMBLER_DIALECT == ASM_ATT)
13513 const char *name = get_some_local_dynamic_name ();
13515 output_operand_lossage ("'%%&' used without any "
13516 "local dynamic TLS references");
13518 assemble_name (file, name);
13523 switch (ASSEMBLER_DIALECT)
13530 /* Intel syntax. For absolute addresses, registers should not
13531 be surrounded by braces. */
13535 ix86_print_operand (file, x, 0);
13542 gcc_unreachable ();
13545 ix86_print_operand (file, x, 0);
13550 if (ASSEMBLER_DIALECT == ASM_ATT)
13555 if (ASSEMBLER_DIALECT == ASM_ATT)
13560 if (ASSEMBLER_DIALECT == ASM_ATT)
13565 if (ASSEMBLER_DIALECT == ASM_ATT)
13570 if (ASSEMBLER_DIALECT == ASM_ATT)
13575 if (ASSEMBLER_DIALECT == ASM_ATT)
13580 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13582 /* Opcodes don't get size suffixes if using Intel opcodes. */
13583 if (ASSEMBLER_DIALECT == ASM_INTEL)
13586 switch (GET_MODE_SIZE (GET_MODE (x)))
13605 output_operand_lossage
13606 ("invalid operand size for operand code '%c'", code);
13611 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13613 (0, "non-integer operand used with operand code '%c'", code);
13617 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13618 if (ASSEMBLER_DIALECT == ASM_INTEL)
13621 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13623 switch (GET_MODE_SIZE (GET_MODE (x)))
13626 #ifdef HAVE_AS_IX86_FILDS
13636 #ifdef HAVE_AS_IX86_FILDQ
13639 fputs ("ll", file);
13647 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13649 /* 387 opcodes don't get size suffixes
13650 if the operands are registers. */
13651 if (STACK_REG_P (x))
13654 switch (GET_MODE_SIZE (GET_MODE (x)))
13675 output_operand_lossage
13676 ("invalid operand type used with operand code '%c'", code);
13680 output_operand_lossage
13681 ("invalid operand size for operand code '%c'", code);
13698 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13700 ix86_print_operand (file, x, 0);
13701 fputs (", ", file);
13706 /* Little bit of braindamage here. The SSE compare instructions
13707 does use completely different names for the comparisons that the
13708 fp conditional moves. */
13711 switch (GET_CODE (x))
13714 fputs ("eq", file);
13717 fputs ("eq_us", file);
13720 fputs ("lt", file);
13723 fputs ("nge", file);
13726 fputs ("le", file);
13729 fputs ("ngt", file);
13732 fputs ("unord", file);
13735 fputs ("neq", file);
13738 fputs ("neq_oq", file);
13741 fputs ("ge", file);
13744 fputs ("nlt", file);
13747 fputs ("gt", file);
13750 fputs ("nle", file);
13753 fputs ("ord", file);
13756 output_operand_lossage ("operand is not a condition code, "
13757 "invalid operand code 'D'");
13763 switch (GET_CODE (x))
13767 fputs ("eq", file);
13771 fputs ("lt", file);
13775 fputs ("le", file);
13778 fputs ("unord", file);
13782 fputs ("neq", file);
13786 fputs ("nlt", file);
13790 fputs ("nle", file);
13793 fputs ("ord", file);
13796 output_operand_lossage ("operand is not a condition code, "
13797 "invalid operand code 'D'");
13803 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13804 if (ASSEMBLER_DIALECT == ASM_ATT)
13806 switch (GET_MODE (x))
13808 case HImode: putc ('w', file); break;
13810 case SFmode: putc ('l', file); break;
13812 case DFmode: putc ('q', file); break;
13813 default: gcc_unreachable ();
13820 if (!COMPARISON_P (x))
13822 output_operand_lossage ("operand is neither a constant nor a "
13823 "condition code, invalid operand code "
13827 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13830 if (!COMPARISON_P (x))
13832 output_operand_lossage ("operand is neither a constant nor a "
13833 "condition code, invalid operand code "
13837 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13838 if (ASSEMBLER_DIALECT == ASM_ATT)
13841 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13844 /* Like above, but reverse condition */
13846 /* Check to see if argument to %c is really a constant
13847 and not a condition code which needs to be reversed. */
13848 if (!COMPARISON_P (x))
13850 output_operand_lossage ("operand is neither a constant nor a "
13851 "condition code, invalid operand "
13855 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13858 if (!COMPARISON_P (x))
13860 output_operand_lossage ("operand is neither a constant nor a "
13861 "condition code, invalid operand "
13865 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13866 if (ASSEMBLER_DIALECT == ASM_ATT)
13869 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13873 /* It doesn't actually matter what mode we use here, as we're
13874 only going to use this for printing. */
13875 x = adjust_address_nv (x, DImode, 8);
13883 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13886 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13889 int pred_val = INTVAL (XEXP (x, 0));
13891 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13892 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13894 int taken = pred_val > REG_BR_PROB_BASE / 2;
13895 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13897 /* Emit hints only in the case default branch prediction
13898 heuristics would fail. */
13899 if (taken != cputaken)
13901 /* We use 3e (DS) prefix for taken branches and
13902 2e (CS) prefix for not taken branches. */
13904 fputs ("ds ; ", file);
13906 fputs ("cs ; ", file);
13914 switch (GET_CODE (x))
13917 fputs ("neq", file);
13920 fputs ("eq", file);
13924 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13928 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13932 fputs ("le", file);
13936 fputs ("lt", file);
13939 fputs ("unord", file);
13942 fputs ("ord", file);
13945 fputs ("ueq", file);
13948 fputs ("nlt", file);
13951 fputs ("nle", file);
13954 fputs ("ule", file);
13957 fputs ("ult", file);
13960 fputs ("une", file);
13963 output_operand_lossage ("operand is not a condition code, "
13964 "invalid operand code 'Y'");
13970 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13976 if (ASSEMBLER_DIALECT == ASM_ATT)
13979 /* The kernel uses a different segment register for performance
13980 reasons; a system call would not have to trash the userspace
13981 segment register, which would be expensive. */
13982 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13983 fputs ("fs", file);
13985 fputs ("gs", file);
13989 output_operand_lossage ("invalid operand code '%c'", code);
13994 print_reg (x, code, file);
13996 else if (MEM_P (x))
13998 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13999 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14000 && GET_MODE (x) != BLKmode)
14003 switch (GET_MODE_SIZE (GET_MODE (x)))
14005 case 1: size = "BYTE"; break;
14006 case 2: size = "WORD"; break;
14007 case 4: size = "DWORD"; break;
14008 case 8: size = "QWORD"; break;
14009 case 12: size = "TBYTE"; break;
14011 if (GET_MODE (x) == XFmode)
14016 case 32: size = "YMMWORD"; break;
14018 gcc_unreachable ();
14021 /* Check for explicit size override (codes 'b', 'w' and 'k') */
14024 else if (code == 'w')
14026 else if (code == 'k')
14029 fputs (size, file);
14030 fputs (" PTR ", file);
14034 /* Avoid (%rip) for call operands. */
14035 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14036 && !CONST_INT_P (x))
14037 output_addr_const (file, x);
14038 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14039 output_operand_lossage ("invalid constraints for operand");
14041 output_address (x);
14044 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14049 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14050 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14052 if (ASSEMBLER_DIALECT == ASM_ATT)
14054 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14056 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14058 fprintf (file, "0x%08x", (unsigned int) l);
14061 /* These float cases don't actually occur as immediate operands. */
14062 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14066 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14067 fputs (dstr, file);
14070 else if (GET_CODE (x) == CONST_DOUBLE
14071 && GET_MODE (x) == XFmode)
14075 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14076 fputs (dstr, file);
14081 /* We have patterns that allow zero sets of memory, for instance.
14082 In 64-bit mode, we should probably support all 8-byte vectors,
14083 since we can in fact encode that into an immediate. */
14084 if (GET_CODE (x) == CONST_VECTOR)
14086 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14092 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14094 if (ASSEMBLER_DIALECT == ASM_ATT)
14097 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14098 || GET_CODE (x) == LABEL_REF)
14100 if (ASSEMBLER_DIALECT == ASM_ATT)
14103 fputs ("OFFSET FLAT:", file);
14106 if (CONST_INT_P (x))
14107 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14108 else if (flag_pic || MACHOPIC_INDIRECT)
14109 output_pic_addr_const (file, x, code);
14111 output_addr_const (file, x);
14116 ix86_print_operand_punct_valid_p (unsigned char code)
14118 return (code == '@' || code == '*' || code == '+'
14119 || code == '&' || code == ';');
14122 /* Print a memory operand whose address is ADDR. */
14125 ix86_print_operand_address (FILE *file, rtx addr)
14127 struct ix86_address parts;
14128 rtx base, index, disp;
14130 int ok = ix86_decompose_address (addr, &parts);
14135 index = parts.index;
14137 scale = parts.scale;
14145 if (ASSEMBLER_DIALECT == ASM_ATT)
14147 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14150 gcc_unreachable ();
14153 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14154 if (TARGET_64BIT && !base && !index)
14158 if (GET_CODE (disp) == CONST
14159 && GET_CODE (XEXP (disp, 0)) == PLUS
14160 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14161 symbol = XEXP (XEXP (disp, 0), 0);
14163 if (GET_CODE (symbol) == LABEL_REF
14164 || (GET_CODE (symbol) == SYMBOL_REF
14165 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14168 if (!base && !index)
14170 /* Displacement only requires special attention. */
14172 if (CONST_INT_P (disp))
14174 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14175 fputs ("ds:", file);
14176 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14179 output_pic_addr_const (file, disp, 0);
14181 output_addr_const (file, disp);
14185 if (ASSEMBLER_DIALECT == ASM_ATT)
14190 output_pic_addr_const (file, disp, 0);
14191 else if (GET_CODE (disp) == LABEL_REF)
14192 output_asm_label (disp);
14194 output_addr_const (file, disp);
14199 print_reg (base, 0, file);
14203 print_reg (index, 0, file);
14205 fprintf (file, ",%d", scale);
14211 rtx offset = NULL_RTX;
14215 /* Pull out the offset of a symbol; print any symbol itself. */
14216 if (GET_CODE (disp) == CONST
14217 && GET_CODE (XEXP (disp, 0)) == PLUS
14218 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14220 offset = XEXP (XEXP (disp, 0), 1);
14221 disp = gen_rtx_CONST (VOIDmode,
14222 XEXP (XEXP (disp, 0), 0));
14226 output_pic_addr_const (file, disp, 0);
14227 else if (GET_CODE (disp) == LABEL_REF)
14228 output_asm_label (disp);
14229 else if (CONST_INT_P (disp))
14232 output_addr_const (file, disp);
14238 print_reg (base, 0, file);
14241 if (INTVAL (offset) >= 0)
14243 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14254 print_reg (index, 0, file);
14256 fprintf (file, "*%d", scale);
14263 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14266 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14270 if (GET_CODE (x) != UNSPEC)
14273 op = XVECEXP (x, 0, 0);
14274 switch (XINT (x, 1))
14276 case UNSPEC_GOTTPOFF:
14277 output_addr_const (file, op);
14278 /* FIXME: This might be @TPOFF in Sun ld. */
14279 fputs ("@gottpoff", file);
14282 output_addr_const (file, op);
14283 fputs ("@tpoff", file);
14285 case UNSPEC_NTPOFF:
14286 output_addr_const (file, op);
14288 fputs ("@tpoff", file);
14290 fputs ("@ntpoff", file);
14292 case UNSPEC_DTPOFF:
14293 output_addr_const (file, op);
14294 fputs ("@dtpoff", file);
14296 case UNSPEC_GOTNTPOFF:
14297 output_addr_const (file, op);
14299 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14300 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14302 fputs ("@gotntpoff", file);
14304 case UNSPEC_INDNTPOFF:
14305 output_addr_const (file, op);
14306 fputs ("@indntpoff", file);
14309 case UNSPEC_MACHOPIC_OFFSET:
14310 output_addr_const (file, op);
14312 machopic_output_function_base_name (file);
14316 case UNSPEC_STACK_CHECK:
14320 gcc_assert (flag_split_stack);
14322 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14323 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14325 gcc_unreachable ();
14328 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14339 /* Split one or more double-mode RTL references into pairs of half-mode
14340 references. The RTL can be REG, offsettable MEM, integer constant, or
14341 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14342 split and "num" is its length. lo_half and hi_half are output arrays
14343 that parallel "operands". */
14346 split_double_mode (enum machine_mode mode, rtx operands[],
14347 int num, rtx lo_half[], rtx hi_half[])
14349 enum machine_mode half_mode;
14355 half_mode = DImode;
14358 half_mode = SImode;
14361 gcc_unreachable ();
14364 byte = GET_MODE_SIZE (half_mode);
14368 rtx op = operands[num];
14370 /* simplify_subreg refuse to split volatile memory addresses,
14371 but we still have to handle it. */
14374 lo_half[num] = adjust_address (op, half_mode, 0);
14375 hi_half[num] = adjust_address (op, half_mode, byte);
14379 lo_half[num] = simplify_gen_subreg (half_mode, op,
14380 GET_MODE (op) == VOIDmode
14381 ? mode : GET_MODE (op), 0);
14382 hi_half[num] = simplify_gen_subreg (half_mode, op,
14383 GET_MODE (op) == VOIDmode
14384 ? mode : GET_MODE (op), byte);
14389 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14390 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14391 is the expression of the binary operation. The output may either be
14392 emitted here, or returned to the caller, like all output_* functions.
14394 There is no guarantee that the operands are the same mode, as they
14395 might be within FLOAT or FLOAT_EXTEND expressions. */
14397 #ifndef SYSV386_COMPAT
14398 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14399 wants to fix the assemblers because that causes incompatibility
14400 with gcc. No-one wants to fix gcc because that causes
14401 incompatibility with assemblers... You can use the option of
14402 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14403 #define SYSV386_COMPAT 1
14407 output_387_binary_op (rtx insn, rtx *operands)
14409 static char buf[40];
14412 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14414 #ifdef ENABLE_CHECKING
14415 /* Even if we do not want to check the inputs, this documents input
14416 constraints. Which helps in understanding the following code. */
14417 if (STACK_REG_P (operands[0])
14418 && ((REG_P (operands[1])
14419 && REGNO (operands[0]) == REGNO (operands[1])
14420 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14421 || (REG_P (operands[2])
14422 && REGNO (operands[0]) == REGNO (operands[2])
14423 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14424 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14427 gcc_assert (is_sse);
14430 switch (GET_CODE (operands[3]))
14433 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14434 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14442 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14443 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14451 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14452 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14460 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14461 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14469 gcc_unreachable ();
14476 strcpy (buf, ssep);
14477 if (GET_MODE (operands[0]) == SFmode)
14478 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14480 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14484 strcpy (buf, ssep + 1);
14485 if (GET_MODE (operands[0]) == SFmode)
14486 strcat (buf, "ss\t{%2, %0|%0, %2}");
14488 strcat (buf, "sd\t{%2, %0|%0, %2}");
14494 switch (GET_CODE (operands[3]))
14498 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14500 rtx temp = operands[2];
14501 operands[2] = operands[1];
14502 operands[1] = temp;
14505 /* know operands[0] == operands[1]. */
14507 if (MEM_P (operands[2]))
14513 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14515 if (STACK_TOP_P (operands[0]))
14516 /* How is it that we are storing to a dead operand[2]?
14517 Well, presumably operands[1] is dead too. We can't
14518 store the result to st(0) as st(0) gets popped on this
14519 instruction. Instead store to operands[2] (which I
14520 think has to be st(1)). st(1) will be popped later.
14521 gcc <= 2.8.1 didn't have this check and generated
14522 assembly code that the Unixware assembler rejected. */
14523 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14525 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14529 if (STACK_TOP_P (operands[0]))
14530 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14532 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14537 if (MEM_P (operands[1]))
14543 if (MEM_P (operands[2]))
14549 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14552 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14553 derived assemblers, confusingly reverse the direction of
14554 the operation for fsub{r} and fdiv{r} when the
14555 destination register is not st(0). The Intel assembler
14556 doesn't have this brain damage. Read !SYSV386_COMPAT to
14557 figure out what the hardware really does. */
14558 if (STACK_TOP_P (operands[0]))
14559 p = "{p\t%0, %2|rp\t%2, %0}";
14561 p = "{rp\t%2, %0|p\t%0, %2}";
14563 if (STACK_TOP_P (operands[0]))
14564 /* As above for fmul/fadd, we can't store to st(0). */
14565 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14567 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14572 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14575 if (STACK_TOP_P (operands[0]))
14576 p = "{rp\t%0, %1|p\t%1, %0}";
14578 p = "{p\t%1, %0|rp\t%0, %1}";
14580 if (STACK_TOP_P (operands[0]))
14581 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14583 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14588 if (STACK_TOP_P (operands[0]))
14590 if (STACK_TOP_P (operands[1]))
14591 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14593 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14596 else if (STACK_TOP_P (operands[1]))
14599 p = "{\t%1, %0|r\t%0, %1}";
14601 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14607 p = "{r\t%2, %0|\t%0, %2}";
14609 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14615 gcc_unreachable ();
14622 /* Return needed mode for entity in optimize_mode_switching pass. */
14625 ix86_mode_needed (int entity, rtx insn)
14627 enum attr_i387_cw mode;
14629 /* The mode UNINITIALIZED is used to store control word after a
14630 function call or ASM pattern. The mode ANY specify that function
14631 has no requirements on the control word and make no changes in the
14632 bits we are interested in. */
14635 || (NONJUMP_INSN_P (insn)
14636 && (asm_noperands (PATTERN (insn)) >= 0
14637 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14638 return I387_CW_UNINITIALIZED;
14640 if (recog_memoized (insn) < 0)
14641 return I387_CW_ANY;
14643 mode = get_attr_i387_cw (insn);
14648 if (mode == I387_CW_TRUNC)
14653 if (mode == I387_CW_FLOOR)
14658 if (mode == I387_CW_CEIL)
14663 if (mode == I387_CW_MASK_PM)
14668 gcc_unreachable ();
14671 return I387_CW_ANY;
14674 /* Output code to initialize control word copies used by trunc?f?i and
14675 rounding patterns. CURRENT_MODE is set to current control word,
14676 while NEW_MODE is set to new control word. */
14679 emit_i387_cw_initialization (int mode)
14681 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14684 enum ix86_stack_slot slot;
14686 rtx reg = gen_reg_rtx (HImode);
14688 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14689 emit_move_insn (reg, copy_rtx (stored_mode));
14691 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14692 || optimize_function_for_size_p (cfun))
14696 case I387_CW_TRUNC:
14697 /* round toward zero (truncate) */
14698 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14699 slot = SLOT_CW_TRUNC;
14702 case I387_CW_FLOOR:
14703 /* round down toward -oo */
14704 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14705 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14706 slot = SLOT_CW_FLOOR;
14710 /* round up toward +oo */
14711 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14712 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14713 slot = SLOT_CW_CEIL;
14716 case I387_CW_MASK_PM:
14717 /* mask precision exception for nearbyint() */
14718 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14719 slot = SLOT_CW_MASK_PM;
14723 gcc_unreachable ();
14730 case I387_CW_TRUNC:
14731 /* round toward zero (truncate) */
14732 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14733 slot = SLOT_CW_TRUNC;
14736 case I387_CW_FLOOR:
14737 /* round down toward -oo */
14738 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14739 slot = SLOT_CW_FLOOR;
14743 /* round up toward +oo */
14744 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14745 slot = SLOT_CW_CEIL;
14748 case I387_CW_MASK_PM:
14749 /* mask precision exception for nearbyint() */
14750 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14751 slot = SLOT_CW_MASK_PM;
14755 gcc_unreachable ();
14759 gcc_assert (slot < MAX_386_STACK_LOCALS);
14761 new_mode = assign_386_stack_local (HImode, slot);
14762 emit_move_insn (new_mode, reg);
14765 /* Output code for INSN to convert a float to a signed int. OPERANDS
14766 are the insn operands. The output may be [HSD]Imode and the input
14767 operand may be [SDX]Fmode. */
14770 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14772 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14773 int dimode_p = GET_MODE (operands[0]) == DImode;
14774 int round_mode = get_attr_i387_cw (insn);
14776 /* Jump through a hoop or two for DImode, since the hardware has no
14777 non-popping instruction. We used to do this a different way, but
14778 that was somewhat fragile and broke with post-reload splitters. */
14779 if ((dimode_p || fisttp) && !stack_top_dies)
14780 output_asm_insn ("fld\t%y1", operands);
14782 gcc_assert (STACK_TOP_P (operands[1]));
14783 gcc_assert (MEM_P (operands[0]));
14784 gcc_assert (GET_MODE (operands[1]) != TFmode);
14787 output_asm_insn ("fisttp%Z0\t%0", operands);
14790 if (round_mode != I387_CW_ANY)
14791 output_asm_insn ("fldcw\t%3", operands);
14792 if (stack_top_dies || dimode_p)
14793 output_asm_insn ("fistp%Z0\t%0", operands);
14795 output_asm_insn ("fist%Z0\t%0", operands);
14796 if (round_mode != I387_CW_ANY)
14797 output_asm_insn ("fldcw\t%2", operands);
14803 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14804 have the values zero or one, indicates the ffreep insn's operand
14805 from the OPERANDS array. */
14807 static const char *
14808 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14810 if (TARGET_USE_FFREEP)
14811 #ifdef HAVE_AS_IX86_FFREEP
14812 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14815 static char retval[32];
14816 int regno = REGNO (operands[opno]);
14818 gcc_assert (FP_REGNO_P (regno));
14820 regno -= FIRST_STACK_REG;
14822 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14827 return opno ? "fstp\t%y1" : "fstp\t%y0";
14831 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14832 should be used. UNORDERED_P is true when fucom should be used. */
14835 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14837 int stack_top_dies;
14838 rtx cmp_op0, cmp_op1;
14839 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14843 cmp_op0 = operands[0];
14844 cmp_op1 = operands[1];
14848 cmp_op0 = operands[1];
14849 cmp_op1 = operands[2];
14854 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14855 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14856 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14857 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14859 if (GET_MODE (operands[0]) == SFmode)
14861 return &ucomiss[TARGET_AVX ? 0 : 1];
14863 return &comiss[TARGET_AVX ? 0 : 1];
14866 return &ucomisd[TARGET_AVX ? 0 : 1];
14868 return &comisd[TARGET_AVX ? 0 : 1];
14871 gcc_assert (STACK_TOP_P (cmp_op0));
14873 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14875 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14877 if (stack_top_dies)
14879 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14880 return output_387_ffreep (operands, 1);
14883 return "ftst\n\tfnstsw\t%0";
14886 if (STACK_REG_P (cmp_op1)
14888 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14889 && REGNO (cmp_op1) != FIRST_STACK_REG)
14891 /* If both the top of the 387 stack dies, and the other operand
14892 is also a stack register that dies, then this must be a
14893 `fcompp' float compare */
14897 /* There is no double popping fcomi variant. Fortunately,
14898 eflags is immune from the fstp's cc clobbering. */
14900 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14902 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14903 return output_387_ffreep (operands, 0);
14908 return "fucompp\n\tfnstsw\t%0";
14910 return "fcompp\n\tfnstsw\t%0";
14915 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14917 static const char * const alt[16] =
14919 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14920 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14921 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14922 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14924 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14925 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14929 "fcomi\t{%y1, %0|%0, %y1}",
14930 "fcomip\t{%y1, %0|%0, %y1}",
14931 "fucomi\t{%y1, %0|%0, %y1}",
14932 "fucomip\t{%y1, %0|%0, %y1}",
14943 mask = eflags_p << 3;
14944 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14945 mask |= unordered_p << 1;
14946 mask |= stack_top_dies;
14948 gcc_assert (mask < 16);
14957 ix86_output_addr_vec_elt (FILE *file, int value)
14959 const char *directive = ASM_LONG;
14963 directive = ASM_QUAD;
14965 gcc_assert (!TARGET_64BIT);
14968 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14972 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14974 const char *directive = ASM_LONG;
14977 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14978 directive = ASM_QUAD;
14980 gcc_assert (!TARGET_64BIT);
14982 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14983 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14984 fprintf (file, "%s%s%d-%s%d\n",
14985 directive, LPREFIX, value, LPREFIX, rel);
14986 else if (HAVE_AS_GOTOFF_IN_DATA)
14987 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14989 else if (TARGET_MACHO)
14991 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14992 machopic_output_function_base_name (file);
14997 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14998 GOT_SYMBOL_NAME, LPREFIX, value);
15001 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15005 ix86_expand_clear (rtx dest)
15009 /* We play register width games, which are only valid after reload. */
15010 gcc_assert (reload_completed);
15012 /* Avoid HImode and its attendant prefix byte. */
15013 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
15014 dest = gen_rtx_REG (SImode, REGNO (dest));
15015 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
15017 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15018 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
15020 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15021 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
15027 /* X is an unchanging MEM. If it is a constant pool reference, return
15028 the constant pool rtx, else NULL. */
15031 maybe_get_pool_constant (rtx x)
15033 x = ix86_delegitimize_address (XEXP (x, 0));
15035 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
15036 return get_pool_constant (x);
15042 ix86_expand_move (enum machine_mode mode, rtx operands[])
15045 enum tls_model model;
15050 if (GET_CODE (op1) == SYMBOL_REF)
15052 model = SYMBOL_REF_TLS_MODEL (op1);
15055 op1 = legitimize_tls_address (op1, model, true);
15056 op1 = force_operand (op1, op0);
15060 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15061 && SYMBOL_REF_DLLIMPORT_P (op1))
15062 op1 = legitimize_dllimport_symbol (op1, false);
15064 else if (GET_CODE (op1) == CONST
15065 && GET_CODE (XEXP (op1, 0)) == PLUS
15066 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15068 rtx addend = XEXP (XEXP (op1, 0), 1);
15069 rtx symbol = XEXP (XEXP (op1, 0), 0);
15072 model = SYMBOL_REF_TLS_MODEL (symbol);
15074 tmp = legitimize_tls_address (symbol, model, true);
15075 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15076 && SYMBOL_REF_DLLIMPORT_P (symbol))
15077 tmp = legitimize_dllimport_symbol (symbol, true);
15081 tmp = force_operand (tmp, NULL);
15082 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15083 op0, 1, OPTAB_DIRECT);
15089 if ((flag_pic || MACHOPIC_INDIRECT)
15090 && mode == Pmode && symbolic_operand (op1, Pmode))
15092 if (TARGET_MACHO && !TARGET_64BIT)
15095 /* dynamic-no-pic */
15096 if (MACHOPIC_INDIRECT)
15098 rtx temp = ((reload_in_progress
15099 || ((op0 && REG_P (op0))
15101 ? op0 : gen_reg_rtx (Pmode));
15102 op1 = machopic_indirect_data_reference (op1, temp);
15104 op1 = machopic_legitimize_pic_address (op1, mode,
15105 temp == op1 ? 0 : temp);
15107 if (op0 != op1 && GET_CODE (op0) != MEM)
15109 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15113 if (GET_CODE (op0) == MEM)
15114 op1 = force_reg (Pmode, op1);
15118 if (GET_CODE (temp) != REG)
15119 temp = gen_reg_rtx (Pmode);
15120 temp = legitimize_pic_address (op1, temp);
15125 /* dynamic-no-pic */
15131 op1 = force_reg (Pmode, op1);
15132 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15134 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15135 op1 = legitimize_pic_address (op1, reg);
15144 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15145 || !push_operand (op0, mode))
15147 op1 = force_reg (mode, op1);
15149 if (push_operand (op0, mode)
15150 && ! general_no_elim_operand (op1, mode))
15151 op1 = copy_to_mode_reg (mode, op1);
15153 /* Force large constants in 64bit compilation into register
15154 to get them CSEed. */
15155 if (can_create_pseudo_p ()
15156 && (mode == DImode) && TARGET_64BIT
15157 && immediate_operand (op1, mode)
15158 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15159 && !register_operand (op0, mode)
15161 op1 = copy_to_mode_reg (mode, op1);
15163 if (can_create_pseudo_p ()
15164 && FLOAT_MODE_P (mode)
15165 && GET_CODE (op1) == CONST_DOUBLE)
15167 /* If we are loading a floating point constant to a register,
15168 force the value to memory now, since we'll get better code
15169 out the back end. */
15171 op1 = validize_mem (force_const_mem (mode, op1));
15172 if (!register_operand (op0, mode))
15174 rtx temp = gen_reg_rtx (mode);
15175 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15176 emit_move_insn (op0, temp);
15182 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15186 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15188 rtx op0 = operands[0], op1 = operands[1];
15189 unsigned int align = GET_MODE_ALIGNMENT (mode);
15191 /* Force constants other than zero into memory. We do not know how
15192 the instructions used to build constants modify the upper 64 bits
15193 of the register, once we have that information we may be able
15194 to handle some of them more efficiently. */
15195 if (can_create_pseudo_p ()
15196 && register_operand (op0, mode)
15197 && (CONSTANT_P (op1)
15198 || (GET_CODE (op1) == SUBREG
15199 && CONSTANT_P (SUBREG_REG (op1))))
15200 && !standard_sse_constant_p (op1))
15201 op1 = validize_mem (force_const_mem (mode, op1));
15203 /* We need to check memory alignment for SSE mode since attribute
15204 can make operands unaligned. */
15205 if (can_create_pseudo_p ()
15206 && SSE_REG_MODE_P (mode)
15207 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15208 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15212 /* ix86_expand_vector_move_misalign() does not like constants ... */
15213 if (CONSTANT_P (op1)
15214 || (GET_CODE (op1) == SUBREG
15215 && CONSTANT_P (SUBREG_REG (op1))))
15216 op1 = validize_mem (force_const_mem (mode, op1));
15218 /* ... nor both arguments in memory. */
15219 if (!register_operand (op0, mode)
15220 && !register_operand (op1, mode))
15221 op1 = force_reg (mode, op1);
15223 tmp[0] = op0; tmp[1] = op1;
15224 ix86_expand_vector_move_misalign (mode, tmp);
15228 /* Make operand1 a register if it isn't already. */
15229 if (can_create_pseudo_p ()
15230 && !register_operand (op0, mode)
15231 && !register_operand (op1, mode))
15233 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15237 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15240 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15241 straight to ix86_expand_vector_move. */
15242 /* Code generation for scalar reg-reg moves of single and double precision data:
15243 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15247 if (x86_sse_partial_reg_dependency == true)
15252 Code generation for scalar loads of double precision data:
15253 if (x86_sse_split_regs == true)
15254 movlpd mem, reg (gas syntax)
15258 Code generation for unaligned packed loads of single precision data
15259 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15260 if (x86_sse_unaligned_move_optimal)
15263 if (x86_sse_partial_reg_dependency == true)
15275 Code generation for unaligned packed loads of double precision data
15276 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15277 if (x86_sse_unaligned_move_optimal)
15280 if (x86_sse_split_regs == true)
15293 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15302 switch (GET_MODE_CLASS (mode))
15304 case MODE_VECTOR_INT:
15306 switch (GET_MODE_SIZE (mode))
15309 /* If we're optimizing for size, movups is the smallest. */
15310 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15312 op0 = gen_lowpart (V4SFmode, op0);
15313 op1 = gen_lowpart (V4SFmode, op1);
15314 emit_insn (gen_avx_movups (op0, op1));
15317 op0 = gen_lowpart (V16QImode, op0);
15318 op1 = gen_lowpart (V16QImode, op1);
15319 emit_insn (gen_avx_movdqu (op0, op1));
15322 op0 = gen_lowpart (V32QImode, op0);
15323 op1 = gen_lowpart (V32QImode, op1);
15324 emit_insn (gen_avx_movdqu256 (op0, op1));
15327 gcc_unreachable ();
15330 case MODE_VECTOR_FLOAT:
15331 op0 = gen_lowpart (mode, op0);
15332 op1 = gen_lowpart (mode, op1);
15337 emit_insn (gen_avx_movups (op0, op1));
15340 emit_insn (gen_avx_movups256 (op0, op1));
15343 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15345 op0 = gen_lowpart (V4SFmode, op0);
15346 op1 = gen_lowpart (V4SFmode, op1);
15347 emit_insn (gen_avx_movups (op0, op1));
15350 emit_insn (gen_avx_movupd (op0, op1));
15353 emit_insn (gen_avx_movupd256 (op0, op1));
15356 gcc_unreachable ();
15361 gcc_unreachable ();
15369 /* If we're optimizing for size, movups is the smallest. */
15370 if (optimize_insn_for_size_p ()
15371 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15373 op0 = gen_lowpart (V4SFmode, op0);
15374 op1 = gen_lowpart (V4SFmode, op1);
15375 emit_insn (gen_sse_movups (op0, op1));
15379 /* ??? If we have typed data, then it would appear that using
15380 movdqu is the only way to get unaligned data loaded with
15382 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15384 op0 = gen_lowpart (V16QImode, op0);
15385 op1 = gen_lowpart (V16QImode, op1);
15386 emit_insn (gen_sse2_movdqu (op0, op1));
15390 if (TARGET_SSE2 && mode == V2DFmode)
15394 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15396 op0 = gen_lowpart (V2DFmode, op0);
15397 op1 = gen_lowpart (V2DFmode, op1);
15398 emit_insn (gen_sse2_movupd (op0, op1));
15402 /* When SSE registers are split into halves, we can avoid
15403 writing to the top half twice. */
15404 if (TARGET_SSE_SPLIT_REGS)
15406 emit_clobber (op0);
15411 /* ??? Not sure about the best option for the Intel chips.
15412 The following would seem to satisfy; the register is
15413 entirely cleared, breaking the dependency chain. We
15414 then store to the upper half, with a dependency depth
15415 of one. A rumor has it that Intel recommends two movsd
15416 followed by an unpacklpd, but this is unconfirmed. And
15417 given that the dependency depth of the unpacklpd would
15418 still be one, I'm not sure why this would be better. */
15419 zero = CONST0_RTX (V2DFmode);
15422 m = adjust_address (op1, DFmode, 0);
15423 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15424 m = adjust_address (op1, DFmode, 8);
15425 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15429 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15431 op0 = gen_lowpart (V4SFmode, op0);
15432 op1 = gen_lowpart (V4SFmode, op1);
15433 emit_insn (gen_sse_movups (op0, op1));
15437 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15438 emit_move_insn (op0, CONST0_RTX (mode));
15440 emit_clobber (op0);
15442 if (mode != V4SFmode)
15443 op0 = gen_lowpart (V4SFmode, op0);
15444 m = adjust_address (op1, V2SFmode, 0);
15445 emit_insn (gen_sse_loadlps (op0, op0, m));
15446 m = adjust_address (op1, V2SFmode, 8);
15447 emit_insn (gen_sse_loadhps (op0, op0, m));
15450 else if (MEM_P (op0))
15452 /* If we're optimizing for size, movups is the smallest. */
15453 if (optimize_insn_for_size_p ()
15454 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15456 op0 = gen_lowpart (V4SFmode, op0);
15457 op1 = gen_lowpart (V4SFmode, op1);
15458 emit_insn (gen_sse_movups (op0, op1));
15462 /* ??? Similar to above, only less clear because of quote
15463 typeless stores unquote. */
15464 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15465 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15467 op0 = gen_lowpart (V16QImode, op0);
15468 op1 = gen_lowpart (V16QImode, op1);
15469 emit_insn (gen_sse2_movdqu (op0, op1));
15473 if (TARGET_SSE2 && mode == V2DFmode)
15475 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15477 op0 = gen_lowpart (V2DFmode, op0);
15478 op1 = gen_lowpart (V2DFmode, op1);
15479 emit_insn (gen_sse2_movupd (op0, op1));
15483 m = adjust_address (op0, DFmode, 0);
15484 emit_insn (gen_sse2_storelpd (m, op1));
15485 m = adjust_address (op0, DFmode, 8);
15486 emit_insn (gen_sse2_storehpd (m, op1));
15491 if (mode != V4SFmode)
15492 op1 = gen_lowpart (V4SFmode, op1);
15494 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15496 op0 = gen_lowpart (V4SFmode, op0);
15497 emit_insn (gen_sse_movups (op0, op1));
15501 m = adjust_address (op0, V2SFmode, 0);
15502 emit_insn (gen_sse_storelps (m, op1));
15503 m = adjust_address (op0, V2SFmode, 8);
15504 emit_insn (gen_sse_storehps (m, op1));
15509 gcc_unreachable ();
15512 /* Expand a push in MODE. This is some mode for which we do not support
15513 proper push instructions, at least from the registers that we expect
15514 the value to live in. */
15517 ix86_expand_push (enum machine_mode mode, rtx x)
15521 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15522 GEN_INT (-GET_MODE_SIZE (mode)),
15523 stack_pointer_rtx, 1, OPTAB_DIRECT);
15524 if (tmp != stack_pointer_rtx)
15525 emit_move_insn (stack_pointer_rtx, tmp);
15527 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15529 /* When we push an operand onto stack, it has to be aligned at least
15530 at the function argument boundary. However since we don't have
15531 the argument type, we can't determine the actual argument
15533 emit_move_insn (tmp, x);
15536 /* Helper function of ix86_fixup_binary_operands to canonicalize
15537 operand order. Returns true if the operands should be swapped. */
15540 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15543 rtx dst = operands[0];
15544 rtx src1 = operands[1];
15545 rtx src2 = operands[2];
15547 /* If the operation is not commutative, we can't do anything. */
15548 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15551 /* Highest priority is that src1 should match dst. */
15552 if (rtx_equal_p (dst, src1))
15554 if (rtx_equal_p (dst, src2))
15557 /* Next highest priority is that immediate constants come second. */
15558 if (immediate_operand (src2, mode))
15560 if (immediate_operand (src1, mode))
15563 /* Lowest priority is that memory references should come second. */
15573 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15574 destination to use for the operation. If different from the true
15575 destination in operands[0], a copy operation will be required. */
15578 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15581 rtx dst = operands[0];
15582 rtx src1 = operands[1];
15583 rtx src2 = operands[2];
15585 /* Canonicalize operand order. */
15586 if (ix86_swap_binary_operands_p (code, mode, operands))
15590 /* It is invalid to swap operands of different modes. */
15591 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15598 /* Both source operands cannot be in memory. */
15599 if (MEM_P (src1) && MEM_P (src2))
15601 /* Optimization: Only read from memory once. */
15602 if (rtx_equal_p (src1, src2))
15604 src2 = force_reg (mode, src2);
15608 src2 = force_reg (mode, src2);
15611 /* If the destination is memory, and we do not have matching source
15612 operands, do things in registers. */
15613 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15614 dst = gen_reg_rtx (mode);
15616 /* Source 1 cannot be a constant. */
15617 if (CONSTANT_P (src1))
15618 src1 = force_reg (mode, src1);
15620 /* Source 1 cannot be a non-matching memory. */
15621 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15622 src1 = force_reg (mode, src1);
15624 operands[1] = src1;
15625 operands[2] = src2;
15629 /* Similarly, but assume that the destination has already been
15630 set up properly. */
15633 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15634 enum machine_mode mode, rtx operands[])
15636 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15637 gcc_assert (dst == operands[0]);
15640 /* Attempt to expand a binary operator. Make the expansion closer to the
15641 actual machine, then just general_operand, which will allow 3 separate
15642 memory references (one output, two input) in a single insn. */
15645 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15648 rtx src1, src2, dst, op, clob;
15650 dst = ix86_fixup_binary_operands (code, mode, operands);
15651 src1 = operands[1];
15652 src2 = operands[2];
15654 /* Emit the instruction. */
15656 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15657 if (reload_in_progress)
15659 /* Reload doesn't know about the flags register, and doesn't know that
15660 it doesn't want to clobber it. We can only do this with PLUS. */
15661 gcc_assert (code == PLUS);
15664 else if (reload_completed
15666 && !rtx_equal_p (dst, src1))
15668 /* This is going to be an LEA; avoid splitting it later. */
15673 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15674 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15677 /* Fix up the destination if needed. */
15678 if (dst != operands[0])
15679 emit_move_insn (operands[0], dst);
15682 /* Return TRUE or FALSE depending on whether the binary operator meets the
15683 appropriate constraints. */
15686 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15689 rtx dst = operands[0];
15690 rtx src1 = operands[1];
15691 rtx src2 = operands[2];
15693 /* Both source operands cannot be in memory. */
15694 if (MEM_P (src1) && MEM_P (src2))
15697 /* Canonicalize operand order for commutative operators. */
15698 if (ix86_swap_binary_operands_p (code, mode, operands))
15705 /* If the destination is memory, we must have a matching source operand. */
15706 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15709 /* Source 1 cannot be a constant. */
15710 if (CONSTANT_P (src1))
15713 /* Source 1 cannot be a non-matching memory. */
15714 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15716 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15717 return (code == AND
15720 || (TARGET_64BIT && mode == DImode))
15721 && CONST_INT_P (src2)
15722 && (INTVAL (src2) == 0xff
15723 || INTVAL (src2) == 0xffff));
15729 /* Attempt to expand a unary operator. Make the expansion closer to the
15730 actual machine, then just general_operand, which will allow 2 separate
15731 memory references (one output, one input) in a single insn. */
15734 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15737 int matching_memory;
15738 rtx src, dst, op, clob;
15743 /* If the destination is memory, and we do not have matching source
15744 operands, do things in registers. */
15745 matching_memory = 0;
15748 if (rtx_equal_p (dst, src))
15749 matching_memory = 1;
15751 dst = gen_reg_rtx (mode);
15754 /* When source operand is memory, destination must match. */
15755 if (MEM_P (src) && !matching_memory)
15756 src = force_reg (mode, src);
15758 /* Emit the instruction. */
15760 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15761 if (reload_in_progress || code == NOT)
15763 /* Reload doesn't know about the flags register, and doesn't know that
15764 it doesn't want to clobber it. */
15765 gcc_assert (code == NOT);
15770 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15771 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15774 /* Fix up the destination if needed. */
15775 if (dst != operands[0])
15776 emit_move_insn (operands[0], dst);
15779 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15780 divisor are within the the range [0-255]. */
15783 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15786 rtx end_label, qimode_label;
15787 rtx insn, div, mod;
15788 rtx scratch, tmp0, tmp1, tmp2;
15789 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15790 rtx (*gen_zero_extend) (rtx, rtx);
15791 rtx (*gen_test_ccno_1) (rtx, rtx);
15796 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15797 gen_test_ccno_1 = gen_testsi_ccno_1;
15798 gen_zero_extend = gen_zero_extendqisi2;
15801 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15802 gen_test_ccno_1 = gen_testdi_ccno_1;
15803 gen_zero_extend = gen_zero_extendqidi2;
15806 gcc_unreachable ();
15809 end_label = gen_label_rtx ();
15810 qimode_label = gen_label_rtx ();
15812 scratch = gen_reg_rtx (mode);
15814 /* Use 8bit unsigned divimod if dividend and divisor are within the
15815 the range [0-255]. */
15816 emit_move_insn (scratch, operands[2]);
15817 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15818 scratch, 1, OPTAB_DIRECT);
15819 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15820 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15821 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15822 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15823 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15825 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15826 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15827 JUMP_LABEL (insn) = qimode_label;
15829 /* Generate original signed/unsigned divimod. */
15830 div = gen_divmod4_1 (operands[0], operands[1],
15831 operands[2], operands[3]);
15834 /* Branch to the end. */
15835 emit_jump_insn (gen_jump (end_label));
15838 /* Generate 8bit unsigned divide. */
15839 emit_label (qimode_label);
15840 /* Don't use operands[0] for result of 8bit divide since not all
15841 registers support QImode ZERO_EXTRACT. */
15842 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15843 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15844 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15845 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15849 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15850 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15854 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15855 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15858 /* Extract remainder from AH. */
15859 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15860 if (REG_P (operands[1]))
15861 insn = emit_move_insn (operands[1], tmp1);
15864 /* Need a new scratch register since the old one has result
15866 scratch = gen_reg_rtx (mode);
15867 emit_move_insn (scratch, tmp1);
15868 insn = emit_move_insn (operands[1], scratch);
15870 set_unique_reg_note (insn, REG_EQUAL, mod);
15872 /* Zero extend quotient from AL. */
15873 tmp1 = gen_lowpart (QImode, tmp0);
15874 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15875 set_unique_reg_note (insn, REG_EQUAL, div);
15877 emit_label (end_label);
15880 #define LEA_SEARCH_THRESHOLD 12
15882 /* Search backward for non-agu definition of register number REGNO1
15883 or register number REGNO2 in INSN's basic block until
15884 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15885 2. Reach BB boundary, or
15886 3. Reach agu definition.
15887 Returns the distance between the non-agu definition point and INSN.
15888 If no definition point, returns -1. */
15891 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15894 basic_block bb = BLOCK_FOR_INSN (insn);
15897 enum attr_type insn_type;
15899 if (insn != BB_HEAD (bb))
15901 rtx prev = PREV_INSN (insn);
15902 while (prev && distance < LEA_SEARCH_THRESHOLD)
15904 if (NONDEBUG_INSN_P (prev))
15907 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15908 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15909 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15910 && (regno1 == DF_REF_REGNO (*def_rec)
15911 || regno2 == DF_REF_REGNO (*def_rec)))
15913 insn_type = get_attr_type (prev);
15914 if (insn_type != TYPE_LEA)
15918 if (prev == BB_HEAD (bb))
15920 prev = PREV_INSN (prev);
15924 if (distance < LEA_SEARCH_THRESHOLD)
15928 bool simple_loop = false;
15930 FOR_EACH_EDGE (e, ei, bb->preds)
15933 simple_loop = true;
15939 rtx prev = BB_END (bb);
15942 && distance < LEA_SEARCH_THRESHOLD)
15944 if (NONDEBUG_INSN_P (prev))
15947 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15948 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15949 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15950 && (regno1 == DF_REF_REGNO (*def_rec)
15951 || regno2 == DF_REF_REGNO (*def_rec)))
15953 insn_type = get_attr_type (prev);
15954 if (insn_type != TYPE_LEA)
15958 prev = PREV_INSN (prev);
15966 /* get_attr_type may modify recog data. We want to make sure
15967 that recog data is valid for instruction INSN, on which
15968 distance_non_agu_define is called. INSN is unchanged here. */
15969 extract_insn_cached (insn);
15973 /* Return the distance between INSN and the next insn that uses
15974 register number REGNO0 in memory address. Return -1 if no such
15975 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15978 distance_agu_use (unsigned int regno0, rtx insn)
15980 basic_block bb = BLOCK_FOR_INSN (insn);
15985 if (insn != BB_END (bb))
15987 rtx next = NEXT_INSN (insn);
15988 while (next && distance < LEA_SEARCH_THRESHOLD)
15990 if (NONDEBUG_INSN_P (next))
15994 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15995 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15996 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15997 && regno0 == DF_REF_REGNO (*use_rec))
15999 /* Return DISTANCE if OP0 is used in memory
16000 address in NEXT. */
16004 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16005 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16006 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16007 && regno0 == DF_REF_REGNO (*def_rec))
16009 /* Return -1 if OP0 is set in NEXT. */
16013 if (next == BB_END (bb))
16015 next = NEXT_INSN (next);
16019 if (distance < LEA_SEARCH_THRESHOLD)
16023 bool simple_loop = false;
16025 FOR_EACH_EDGE (e, ei, bb->succs)
16028 simple_loop = true;
16034 rtx next = BB_HEAD (bb);
16037 && distance < LEA_SEARCH_THRESHOLD)
16039 if (NONDEBUG_INSN_P (next))
16043 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16044 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16045 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16046 && regno0 == DF_REF_REGNO (*use_rec))
16048 /* Return DISTANCE if OP0 is used in memory
16049 address in NEXT. */
16053 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16054 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16055 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16056 && regno0 == DF_REF_REGNO (*def_rec))
16058 /* Return -1 if OP0 is set in NEXT. */
16063 next = NEXT_INSN (next);
16071 /* Define this macro to tune LEA priority vs ADD, it take effect when
16072 there is a dilemma of choicing LEA or ADD
16073 Negative value: ADD is more preferred than LEA
16075 Positive value: LEA is more preferred than ADD*/
16076 #define IX86_LEA_PRIORITY 2
16078 /* Return true if it is ok to optimize an ADD operation to LEA
16079 operation to avoid flag register consumation. For most processors,
16080 ADD is faster than LEA. For the processors like ATOM, if the
16081 destination register of LEA holds an actual address which will be
16082 used soon, LEA is better and otherwise ADD is better. */
16085 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16087 unsigned int regno0 = true_regnum (operands[0]);
16088 unsigned int regno1 = true_regnum (operands[1]);
16089 unsigned int regno2 = true_regnum (operands[2]);
16091 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16092 if (regno0 != regno1 && regno0 != regno2)
16095 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16099 int dist_define, dist_use;
16101 /* Return false if REGNO0 isn't used in memory address. */
16102 dist_use = distance_agu_use (regno0, insn);
16106 dist_define = distance_non_agu_define (regno1, regno2, insn);
16107 if (dist_define <= 0)
16110 /* If this insn has both backward non-agu dependence and forward
16111 agu dependence, the one with short distance take effect. */
16112 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16119 /* Return true if destination reg of SET_BODY is shift count of
16123 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16129 /* Retrieve destination of SET_BODY. */
16130 switch (GET_CODE (set_body))
16133 set_dest = SET_DEST (set_body);
16134 if (!set_dest || !REG_P (set_dest))
16138 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16139 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16147 /* Retrieve shift count of USE_BODY. */
16148 switch (GET_CODE (use_body))
16151 shift_rtx = XEXP (use_body, 1);
16154 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16155 if (ix86_dep_by_shift_count_body (set_body,
16156 XVECEXP (use_body, 0, i)))
16164 && (GET_CODE (shift_rtx) == ASHIFT
16165 || GET_CODE (shift_rtx) == LSHIFTRT
16166 || GET_CODE (shift_rtx) == ASHIFTRT
16167 || GET_CODE (shift_rtx) == ROTATE
16168 || GET_CODE (shift_rtx) == ROTATERT))
16170 rtx shift_count = XEXP (shift_rtx, 1);
16172 /* Return true if shift count is dest of SET_BODY. */
16173 if (REG_P (shift_count)
16174 && true_regnum (set_dest) == true_regnum (shift_count))
16181 /* Return true if destination reg of SET_INSN is shift count of
16185 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16187 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16188 PATTERN (use_insn));
16191 /* Return TRUE or FALSE depending on whether the unary operator meets the
16192 appropriate constraints. */
16195 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16196 enum machine_mode mode ATTRIBUTE_UNUSED,
16197 rtx operands[2] ATTRIBUTE_UNUSED)
16199 /* If one of operands is memory, source and destination must match. */
16200 if ((MEM_P (operands[0])
16201 || MEM_P (operands[1]))
16202 && ! rtx_equal_p (operands[0], operands[1]))
16207 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16208 are ok, keeping in mind the possible movddup alternative. */
16211 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16213 if (MEM_P (operands[0]))
16214 return rtx_equal_p (operands[0], operands[1 + high]);
16215 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16216 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16220 /* Post-reload splitter for converting an SF or DFmode value in an
16221 SSE register into an unsigned SImode. */
16224 ix86_split_convert_uns_si_sse (rtx operands[])
16226 enum machine_mode vecmode;
16227 rtx value, large, zero_or_two31, input, two31, x;
16229 large = operands[1];
16230 zero_or_two31 = operands[2];
16231 input = operands[3];
16232 two31 = operands[4];
16233 vecmode = GET_MODE (large);
16234 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16236 /* Load up the value into the low element. We must ensure that the other
16237 elements are valid floats -- zero is the easiest such value. */
16240 if (vecmode == V4SFmode)
16241 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16243 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16247 input = gen_rtx_REG (vecmode, REGNO (input));
16248 emit_move_insn (value, CONST0_RTX (vecmode));
16249 if (vecmode == V4SFmode)
16250 emit_insn (gen_sse_movss (value, value, input));
16252 emit_insn (gen_sse2_movsd (value, value, input));
16255 emit_move_insn (large, two31);
16256 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16258 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16259 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16261 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16262 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16264 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16265 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16267 large = gen_rtx_REG (V4SImode, REGNO (large));
16268 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16270 x = gen_rtx_REG (V4SImode, REGNO (value));
16271 if (vecmode == V4SFmode)
16272 emit_insn (gen_sse2_cvttps2dq (x, value));
16274 emit_insn (gen_sse2_cvttpd2dq (x, value));
16277 emit_insn (gen_xorv4si3 (value, value, large));
16280 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16281 Expects the 64-bit DImode to be supplied in a pair of integral
16282 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16283 -mfpmath=sse, !optimize_size only. */
16286 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16288 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16289 rtx int_xmm, fp_xmm;
16290 rtx biases, exponents;
16293 int_xmm = gen_reg_rtx (V4SImode);
16294 if (TARGET_INTER_UNIT_MOVES)
16295 emit_insn (gen_movdi_to_sse (int_xmm, input));
16296 else if (TARGET_SSE_SPLIT_REGS)
16298 emit_clobber (int_xmm);
16299 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16303 x = gen_reg_rtx (V2DImode);
16304 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16305 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16308 x = gen_rtx_CONST_VECTOR (V4SImode,
16309 gen_rtvec (4, GEN_INT (0x43300000UL),
16310 GEN_INT (0x45300000UL),
16311 const0_rtx, const0_rtx));
16312 exponents = validize_mem (force_const_mem (V4SImode, x));
16314 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16315 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16317 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16318 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16319 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16320 (0x1.0p84 + double(fp_value_hi_xmm)).
16321 Note these exponents differ by 32. */
16323 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16325 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16326 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16327 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16328 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16329 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16330 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16331 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16332 biases = validize_mem (force_const_mem (V2DFmode, biases));
16333 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16335 /* Add the upper and lower DFmode values together. */
16337 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16340 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16341 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16342 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16345 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16348 /* Not used, but eases macroization of patterns. */
16350 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16351 rtx input ATTRIBUTE_UNUSED)
16353 gcc_unreachable ();
16356 /* Convert an unsigned SImode value into a DFmode. Only currently used
16357 for SSE, but applicable anywhere. */
16360 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16362 REAL_VALUE_TYPE TWO31r;
16365 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16366 NULL, 1, OPTAB_DIRECT);
16368 fp = gen_reg_rtx (DFmode);
16369 emit_insn (gen_floatsidf2 (fp, x));
16371 real_ldexp (&TWO31r, &dconst1, 31);
16372 x = const_double_from_real_value (TWO31r, DFmode);
16374 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16376 emit_move_insn (target, x);
16379 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16380 32-bit mode; otherwise we have a direct convert instruction. */
16383 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16385 REAL_VALUE_TYPE TWO32r;
16386 rtx fp_lo, fp_hi, x;
16388 fp_lo = gen_reg_rtx (DFmode);
16389 fp_hi = gen_reg_rtx (DFmode);
16391 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16393 real_ldexp (&TWO32r, &dconst1, 32);
16394 x = const_double_from_real_value (TWO32r, DFmode);
16395 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16397 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16399 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16402 emit_move_insn (target, x);
16405 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16406 For x86_32, -mfpmath=sse, !optimize_size only. */
16408 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16410 REAL_VALUE_TYPE ONE16r;
16411 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16413 real_ldexp (&ONE16r, &dconst1, 16);
16414 x = const_double_from_real_value (ONE16r, SFmode);
16415 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16416 NULL, 0, OPTAB_DIRECT);
16417 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16418 NULL, 0, OPTAB_DIRECT);
16419 fp_hi = gen_reg_rtx (SFmode);
16420 fp_lo = gen_reg_rtx (SFmode);
16421 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16422 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16423 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16425 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16427 if (!rtx_equal_p (target, fp_hi))
16428 emit_move_insn (target, fp_hi);
16431 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16432 then replicate the value for all elements of the vector
16436 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16443 v = gen_rtvec (4, value, value, value, value);
16444 return gen_rtx_CONST_VECTOR (V4SImode, v);
16448 v = gen_rtvec (2, value, value);
16449 return gen_rtx_CONST_VECTOR (V2DImode, v);
16453 v = gen_rtvec (8, value, value, value, value,
16454 value, value, value, value);
16456 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16457 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16458 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16459 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16460 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16464 v = gen_rtvec (4, value, value, value, value);
16466 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16467 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16468 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16472 v = gen_rtvec (4, value, value, value, value);
16474 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16475 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16476 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16480 v = gen_rtvec (2, value, value);
16482 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16483 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16486 gcc_unreachable ();
16490 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16491 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16492 for an SSE register. If VECT is true, then replicate the mask for
16493 all elements of the vector register. If INVERT is true, then create
16494 a mask excluding the sign bit. */
16497 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16499 enum machine_mode vec_mode, imode;
16500 HOST_WIDE_INT hi, lo;
16505 /* Find the sign bit, sign extended to 2*HWI. */
16512 mode = GET_MODE_INNER (mode);
16514 lo = 0x80000000, hi = lo < 0;
16521 mode = GET_MODE_INNER (mode);
16523 if (HOST_BITS_PER_WIDE_INT >= 64)
16524 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16526 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16531 vec_mode = VOIDmode;
16532 if (HOST_BITS_PER_WIDE_INT >= 64)
16535 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16542 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16546 lo = ~lo, hi = ~hi;
16552 mask = immed_double_const (lo, hi, imode);
16554 vec = gen_rtvec (2, v, mask);
16555 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16556 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16563 gcc_unreachable ();
16567 lo = ~lo, hi = ~hi;
16569 /* Force this value into the low part of a fp vector constant. */
16570 mask = immed_double_const (lo, hi, imode);
16571 mask = gen_lowpart (mode, mask);
16573 if (vec_mode == VOIDmode)
16574 return force_reg (mode, mask);
16576 v = ix86_build_const_vector (vec_mode, vect, mask);
16577 return force_reg (vec_mode, v);
16580 /* Generate code for floating point ABS or NEG. */
16583 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16586 rtx mask, set, dst, src;
16587 bool use_sse = false;
16588 bool vector_mode = VECTOR_MODE_P (mode);
16589 enum machine_mode vmode = mode;
16593 else if (mode == TFmode)
16595 else if (TARGET_SSE_MATH)
16597 use_sse = SSE_FLOAT_MODE_P (mode);
16598 if (mode == SFmode)
16600 else if (mode == DFmode)
16604 /* NEG and ABS performed with SSE use bitwise mask operations.
16605 Create the appropriate mask now. */
16607 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16614 set = gen_rtx_fmt_e (code, mode, src);
16615 set = gen_rtx_SET (VOIDmode, dst, set);
16622 use = gen_rtx_USE (VOIDmode, mask);
16624 par = gen_rtvec (2, set, use);
16627 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16628 par = gen_rtvec (3, set, use, clob);
16630 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16636 /* Expand a copysign operation. Special case operand 0 being a constant. */
16639 ix86_expand_copysign (rtx operands[])
16641 enum machine_mode mode, vmode;
16642 rtx dest, op0, op1, mask, nmask;
16644 dest = operands[0];
16648 mode = GET_MODE (dest);
16650 if (mode == SFmode)
16652 else if (mode == DFmode)
16657 if (GET_CODE (op0) == CONST_DOUBLE)
16659 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16661 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16662 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16664 if (mode == SFmode || mode == DFmode)
16666 if (op0 == CONST0_RTX (mode))
16667 op0 = CONST0_RTX (vmode);
16670 rtx v = ix86_build_const_vector (vmode, false, op0);
16672 op0 = force_reg (vmode, v);
16675 else if (op0 != CONST0_RTX (mode))
16676 op0 = force_reg (mode, op0);
16678 mask = ix86_build_signbit_mask (vmode, 0, 0);
16680 if (mode == SFmode)
16681 copysign_insn = gen_copysignsf3_const;
16682 else if (mode == DFmode)
16683 copysign_insn = gen_copysigndf3_const;
16685 copysign_insn = gen_copysigntf3_const;
16687 emit_insn (copysign_insn (dest, op0, op1, mask));
16691 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16693 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16694 mask = ix86_build_signbit_mask (vmode, 0, 0);
16696 if (mode == SFmode)
16697 copysign_insn = gen_copysignsf3_var;
16698 else if (mode == DFmode)
16699 copysign_insn = gen_copysigndf3_var;
16701 copysign_insn = gen_copysigntf3_var;
16703 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16707 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16708 be a constant, and so has already been expanded into a vector constant. */
16711 ix86_split_copysign_const (rtx operands[])
16713 enum machine_mode mode, vmode;
16714 rtx dest, op0, mask, x;
16716 dest = operands[0];
16718 mask = operands[3];
16720 mode = GET_MODE (dest);
16721 vmode = GET_MODE (mask);
16723 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16724 x = gen_rtx_AND (vmode, dest, mask);
16725 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16727 if (op0 != CONST0_RTX (vmode))
16729 x = gen_rtx_IOR (vmode, dest, op0);
16730 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16734 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16735 so we have to do two masks. */
16738 ix86_split_copysign_var (rtx operands[])
16740 enum machine_mode mode, vmode;
16741 rtx dest, scratch, op0, op1, mask, nmask, x;
16743 dest = operands[0];
16744 scratch = operands[1];
16747 nmask = operands[4];
16748 mask = operands[5];
16750 mode = GET_MODE (dest);
16751 vmode = GET_MODE (mask);
16753 if (rtx_equal_p (op0, op1))
16755 /* Shouldn't happen often (it's useless, obviously), but when it does
16756 we'd generate incorrect code if we continue below. */
16757 emit_move_insn (dest, op0);
16761 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16763 gcc_assert (REGNO (op1) == REGNO (scratch));
16765 x = gen_rtx_AND (vmode, scratch, mask);
16766 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16769 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16770 x = gen_rtx_NOT (vmode, dest);
16771 x = gen_rtx_AND (vmode, x, op0);
16772 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16776 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16778 x = gen_rtx_AND (vmode, scratch, mask);
16780 else /* alternative 2,4 */
16782 gcc_assert (REGNO (mask) == REGNO (scratch));
16783 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16784 x = gen_rtx_AND (vmode, scratch, op1);
16786 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16788 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16790 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16791 x = gen_rtx_AND (vmode, dest, nmask);
16793 else /* alternative 3,4 */
16795 gcc_assert (REGNO (nmask) == REGNO (dest));
16797 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16798 x = gen_rtx_AND (vmode, dest, op0);
16800 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16803 x = gen_rtx_IOR (vmode, dest, scratch);
16804 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16807 /* Return TRUE or FALSE depending on whether the first SET in INSN
16808 has source and destination with matching CC modes, and that the
16809 CC mode is at least as constrained as REQ_MODE. */
16812 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16815 enum machine_mode set_mode;
16817 set = PATTERN (insn);
16818 if (GET_CODE (set) == PARALLEL)
16819 set = XVECEXP (set, 0, 0);
16820 gcc_assert (GET_CODE (set) == SET);
16821 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16823 set_mode = GET_MODE (SET_DEST (set));
16827 if (req_mode != CCNOmode
16828 && (req_mode != CCmode
16829 || XEXP (SET_SRC (set), 1) != const0_rtx))
16833 if (req_mode == CCGCmode)
16837 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16841 if (req_mode == CCZmode)
16852 gcc_unreachable ();
16855 return GET_MODE (SET_SRC (set)) == set_mode;
16858 /* Generate insn patterns to do an integer compare of OPERANDS. */
16861 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16863 enum machine_mode cmpmode;
16866 cmpmode = SELECT_CC_MODE (code, op0, op1);
16867 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16869 /* This is very simple, but making the interface the same as in the
16870 FP case makes the rest of the code easier. */
16871 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16872 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16874 /* Return the test that should be put into the flags user, i.e.
16875 the bcc, scc, or cmov instruction. */
16876 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16879 /* Figure out whether to use ordered or unordered fp comparisons.
16880 Return the appropriate mode to use. */
16883 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16885 /* ??? In order to make all comparisons reversible, we do all comparisons
16886 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16887 all forms trapping and nontrapping comparisons, we can make inequality
16888 comparisons trapping again, since it results in better code when using
16889 FCOM based compares. */
16890 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16894 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16896 enum machine_mode mode = GET_MODE (op0);
16898 if (SCALAR_FLOAT_MODE_P (mode))
16900 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16901 return ix86_fp_compare_mode (code);
16906 /* Only zero flag is needed. */
16907 case EQ: /* ZF=0 */
16908 case NE: /* ZF!=0 */
16910 /* Codes needing carry flag. */
16911 case GEU: /* CF=0 */
16912 case LTU: /* CF=1 */
16913 /* Detect overflow checks. They need just the carry flag. */
16914 if (GET_CODE (op0) == PLUS
16915 && rtx_equal_p (op1, XEXP (op0, 0)))
16919 case GTU: /* CF=0 & ZF=0 */
16920 case LEU: /* CF=1 | ZF=1 */
16921 /* Detect overflow checks. They need just the carry flag. */
16922 if (GET_CODE (op0) == MINUS
16923 && rtx_equal_p (op1, XEXP (op0, 0)))
16927 /* Codes possibly doable only with sign flag when
16928 comparing against zero. */
16929 case GE: /* SF=OF or SF=0 */
16930 case LT: /* SF<>OF or SF=1 */
16931 if (op1 == const0_rtx)
16934 /* For other cases Carry flag is not required. */
16936 /* Codes doable only with sign flag when comparing
16937 against zero, but we miss jump instruction for it
16938 so we need to use relational tests against overflow
16939 that thus needs to be zero. */
16940 case GT: /* ZF=0 & SF=OF */
16941 case LE: /* ZF=1 | SF<>OF */
16942 if (op1 == const0_rtx)
16946 /* strcmp pattern do (use flags) and combine may ask us for proper
16951 gcc_unreachable ();
16955 /* Return the fixed registers used for condition codes. */
16958 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16965 /* If two condition code modes are compatible, return a condition code
16966 mode which is compatible with both. Otherwise, return
16969 static enum machine_mode
16970 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16975 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16978 if ((m1 == CCGCmode && m2 == CCGOCmode)
16979 || (m1 == CCGOCmode && m2 == CCGCmode))
16985 gcc_unreachable ();
17015 /* These are only compatible with themselves, which we already
17022 /* Return a comparison we can do and that it is equivalent to
17023 swap_condition (code) apart possibly from orderedness.
17024 But, never change orderedness if TARGET_IEEE_FP, returning
17025 UNKNOWN in that case if necessary. */
17027 static enum rtx_code
17028 ix86_fp_swap_condition (enum rtx_code code)
17032 case GT: /* GTU - CF=0 & ZF=0 */
17033 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
17034 case GE: /* GEU - CF=0 */
17035 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
17036 case UNLT: /* LTU - CF=1 */
17037 return TARGET_IEEE_FP ? UNKNOWN : GT;
17038 case UNLE: /* LEU - CF=1 | ZF=1 */
17039 return TARGET_IEEE_FP ? UNKNOWN : GE;
17041 return swap_condition (code);
17045 /* Return cost of comparison CODE using the best strategy for performance.
17046 All following functions do use number of instructions as a cost metrics.
17047 In future this should be tweaked to compute bytes for optimize_size and
17048 take into account performance of various instructions on various CPUs. */
17051 ix86_fp_comparison_cost (enum rtx_code code)
17055 /* The cost of code using bit-twiddling on %ah. */
17072 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17076 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17079 gcc_unreachable ();
17082 switch (ix86_fp_comparison_strategy (code))
17084 case IX86_FPCMP_COMI:
17085 return arith_cost > 4 ? 3 : 2;
17086 case IX86_FPCMP_SAHF:
17087 return arith_cost > 4 ? 4 : 3;
17093 /* Return strategy to use for floating-point. We assume that fcomi is always
17094 preferrable where available, since that is also true when looking at size
17095 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17097 enum ix86_fpcmp_strategy
17098 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17100 /* Do fcomi/sahf based test when profitable. */
17103 return IX86_FPCMP_COMI;
17105 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17106 return IX86_FPCMP_SAHF;
17108 return IX86_FPCMP_ARITH;
17111 /* Swap, force into registers, or otherwise massage the two operands
17112 to a fp comparison. The operands are updated in place; the new
17113 comparison code is returned. */
17115 static enum rtx_code
17116 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17118 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17119 rtx op0 = *pop0, op1 = *pop1;
17120 enum machine_mode op_mode = GET_MODE (op0);
17121 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17123 /* All of the unordered compare instructions only work on registers.
17124 The same is true of the fcomi compare instructions. The XFmode
17125 compare instructions require registers except when comparing
17126 against zero or when converting operand 1 from fixed point to
17130 && (fpcmp_mode == CCFPUmode
17131 || (op_mode == XFmode
17132 && ! (standard_80387_constant_p (op0) == 1
17133 || standard_80387_constant_p (op1) == 1)
17134 && GET_CODE (op1) != FLOAT)
17135 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17137 op0 = force_reg (op_mode, op0);
17138 op1 = force_reg (op_mode, op1);
17142 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17143 things around if they appear profitable, otherwise force op0
17144 into a register. */
17146 if (standard_80387_constant_p (op0) == 0
17148 && ! (standard_80387_constant_p (op1) == 0
17151 enum rtx_code new_code = ix86_fp_swap_condition (code);
17152 if (new_code != UNKNOWN)
17155 tmp = op0, op0 = op1, op1 = tmp;
17161 op0 = force_reg (op_mode, op0);
17163 if (CONSTANT_P (op1))
17165 int tmp = standard_80387_constant_p (op1);
17167 op1 = validize_mem (force_const_mem (op_mode, op1));
17171 op1 = force_reg (op_mode, op1);
17174 op1 = force_reg (op_mode, op1);
17178 /* Try to rearrange the comparison to make it cheaper. */
17179 if (ix86_fp_comparison_cost (code)
17180 > ix86_fp_comparison_cost (swap_condition (code))
17181 && (REG_P (op1) || can_create_pseudo_p ()))
17184 tmp = op0, op0 = op1, op1 = tmp;
17185 code = swap_condition (code);
17187 op0 = force_reg (op_mode, op0);
17195 /* Convert comparison codes we use to represent FP comparison to integer
17196 code that will result in proper branch. Return UNKNOWN if no such code
17200 ix86_fp_compare_code_to_integer (enum rtx_code code)
17229 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17232 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17234 enum machine_mode fpcmp_mode, intcmp_mode;
17237 fpcmp_mode = ix86_fp_compare_mode (code);
17238 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17240 /* Do fcomi/sahf based test when profitable. */
17241 switch (ix86_fp_comparison_strategy (code))
17243 case IX86_FPCMP_COMI:
17244 intcmp_mode = fpcmp_mode;
17245 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17246 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17251 case IX86_FPCMP_SAHF:
17252 intcmp_mode = fpcmp_mode;
17253 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17254 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17258 scratch = gen_reg_rtx (HImode);
17259 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17260 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17263 case IX86_FPCMP_ARITH:
17264 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17265 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17266 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17268 scratch = gen_reg_rtx (HImode);
17269 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17271 /* In the unordered case, we have to check C2 for NaN's, which
17272 doesn't happen to work out to anything nice combination-wise.
17273 So do some bit twiddling on the value we've got in AH to come
17274 up with an appropriate set of condition codes. */
17276 intcmp_mode = CCNOmode;
17281 if (code == GT || !TARGET_IEEE_FP)
17283 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17288 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17289 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17290 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17291 intcmp_mode = CCmode;
17297 if (code == LT && TARGET_IEEE_FP)
17299 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17300 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17301 intcmp_mode = CCmode;
17306 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17312 if (code == GE || !TARGET_IEEE_FP)
17314 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17326 if (code == LE && TARGET_IEEE_FP)
17328 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17329 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17330 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17331 intcmp_mode = CCmode;
17336 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17342 if (code == EQ && TARGET_IEEE_FP)
17344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17345 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17346 intcmp_mode = CCmode;
17351 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17357 if (code == NE && TARGET_IEEE_FP)
17359 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17360 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17366 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17372 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17376 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17381 gcc_unreachable ();
17389 /* Return the test that should be put into the flags user, i.e.
17390 the bcc, scc, or cmov instruction. */
17391 return gen_rtx_fmt_ee (code, VOIDmode,
17392 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17397 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17401 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17402 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17404 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17406 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17407 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17410 ret = ix86_expand_int_compare (code, op0, op1);
17416 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17418 enum machine_mode mode = GET_MODE (op0);
17430 tmp = ix86_expand_compare (code, op0, op1);
17431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17432 gen_rtx_LABEL_REF (VOIDmode, label),
17434 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17441 /* Expand DImode branch into multiple compare+branch. */
17443 rtx lo[2], hi[2], label2;
17444 enum rtx_code code1, code2, code3;
17445 enum machine_mode submode;
17447 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17449 tmp = op0, op0 = op1, op1 = tmp;
17450 code = swap_condition (code);
17453 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17454 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17456 submode = mode == DImode ? SImode : DImode;
17458 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17459 avoid two branches. This costs one extra insn, so disable when
17460 optimizing for size. */
17462 if ((code == EQ || code == NE)
17463 && (!optimize_insn_for_size_p ()
17464 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17469 if (hi[1] != const0_rtx)
17470 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17471 NULL_RTX, 0, OPTAB_WIDEN);
17474 if (lo[1] != const0_rtx)
17475 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17476 NULL_RTX, 0, OPTAB_WIDEN);
17478 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17479 NULL_RTX, 0, OPTAB_WIDEN);
17481 ix86_expand_branch (code, tmp, const0_rtx, label);
17485 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17486 op1 is a constant and the low word is zero, then we can just
17487 examine the high word. Similarly for low word -1 and
17488 less-or-equal-than or greater-than. */
17490 if (CONST_INT_P (hi[1]))
17493 case LT: case LTU: case GE: case GEU:
17494 if (lo[1] == const0_rtx)
17496 ix86_expand_branch (code, hi[0], hi[1], label);
17500 case LE: case LEU: case GT: case GTU:
17501 if (lo[1] == constm1_rtx)
17503 ix86_expand_branch (code, hi[0], hi[1], label);
17511 /* Otherwise, we need two or three jumps. */
17513 label2 = gen_label_rtx ();
17516 code2 = swap_condition (code);
17517 code3 = unsigned_condition (code);
17521 case LT: case GT: case LTU: case GTU:
17524 case LE: code1 = LT; code2 = GT; break;
17525 case GE: code1 = GT; code2 = LT; break;
17526 case LEU: code1 = LTU; code2 = GTU; break;
17527 case GEU: code1 = GTU; code2 = LTU; break;
17529 case EQ: code1 = UNKNOWN; code2 = NE; break;
17530 case NE: code2 = UNKNOWN; break;
17533 gcc_unreachable ();
17538 * if (hi(a) < hi(b)) goto true;
17539 * if (hi(a) > hi(b)) goto false;
17540 * if (lo(a) < lo(b)) goto true;
17544 if (code1 != UNKNOWN)
17545 ix86_expand_branch (code1, hi[0], hi[1], label);
17546 if (code2 != UNKNOWN)
17547 ix86_expand_branch (code2, hi[0], hi[1], label2);
17549 ix86_expand_branch (code3, lo[0], lo[1], label);
17551 if (code2 != UNKNOWN)
17552 emit_label (label2);
17557 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17562 /* Split branch based on floating point condition. */
17564 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17565 rtx target1, rtx target2, rtx tmp, rtx pushed)
17570 if (target2 != pc_rtx)
17573 code = reverse_condition_maybe_unordered (code);
17578 condition = ix86_expand_fp_compare (code, op1, op2,
17581 /* Remove pushed operand from stack. */
17583 ix86_free_from_memory (GET_MODE (pushed));
17585 i = emit_jump_insn (gen_rtx_SET
17587 gen_rtx_IF_THEN_ELSE (VOIDmode,
17588 condition, target1, target2)));
17589 if (split_branch_probability >= 0)
17590 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17594 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17598 gcc_assert (GET_MODE (dest) == QImode);
17600 ret = ix86_expand_compare (code, op0, op1);
17601 PUT_MODE (ret, QImode);
17602 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17605 /* Expand comparison setting or clearing carry flag. Return true when
17606 successful and set pop for the operation. */
17608 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17610 enum machine_mode mode =
17611 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17613 /* Do not handle double-mode compares that go through special path. */
17614 if (mode == (TARGET_64BIT ? TImode : DImode))
17617 if (SCALAR_FLOAT_MODE_P (mode))
17619 rtx compare_op, compare_seq;
17621 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17623 /* Shortcut: following common codes never translate
17624 into carry flag compares. */
17625 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17626 || code == ORDERED || code == UNORDERED)
17629 /* These comparisons require zero flag; swap operands so they won't. */
17630 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17631 && !TARGET_IEEE_FP)
17636 code = swap_condition (code);
17639 /* Try to expand the comparison and verify that we end up with
17640 carry flag based comparison. This fails to be true only when
17641 we decide to expand comparison using arithmetic that is not
17642 too common scenario. */
17644 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17645 compare_seq = get_insns ();
17648 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17649 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17650 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17652 code = GET_CODE (compare_op);
17654 if (code != LTU && code != GEU)
17657 emit_insn (compare_seq);
17662 if (!INTEGRAL_MODE_P (mode))
17671 /* Convert a==0 into (unsigned)a<1. */
17674 if (op1 != const0_rtx)
17677 code = (code == EQ ? LTU : GEU);
17680 /* Convert a>b into b<a or a>=b-1. */
17683 if (CONST_INT_P (op1))
17685 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17686 /* Bail out on overflow. We still can swap operands but that
17687 would force loading of the constant into register. */
17688 if (op1 == const0_rtx
17689 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17691 code = (code == GTU ? GEU : LTU);
17698 code = (code == GTU ? LTU : GEU);
17702 /* Convert a>=0 into (unsigned)a<0x80000000. */
17705 if (mode == DImode || op1 != const0_rtx)
17707 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17708 code = (code == LT ? GEU : LTU);
17712 if (mode == DImode || op1 != constm1_rtx)
17714 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17715 code = (code == LE ? GEU : LTU);
17721 /* Swapping operands may cause constant to appear as first operand. */
17722 if (!nonimmediate_operand (op0, VOIDmode))
17724 if (!can_create_pseudo_p ())
17726 op0 = force_reg (mode, op0);
17728 *pop = ix86_expand_compare (code, op0, op1);
17729 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17734 ix86_expand_int_movcc (rtx operands[])
17736 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17737 rtx compare_seq, compare_op;
17738 enum machine_mode mode = GET_MODE (operands[0]);
17739 bool sign_bit_compare_p = false;
17740 rtx op0 = XEXP (operands[1], 0);
17741 rtx op1 = XEXP (operands[1], 1);
17744 compare_op = ix86_expand_compare (code, op0, op1);
17745 compare_seq = get_insns ();
17748 compare_code = GET_CODE (compare_op);
17750 if ((op1 == const0_rtx && (code == GE || code == LT))
17751 || (op1 == constm1_rtx && (code == GT || code == LE)))
17752 sign_bit_compare_p = true;
17754 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17755 HImode insns, we'd be swallowed in word prefix ops. */
17757 if ((mode != HImode || TARGET_FAST_PREFIX)
17758 && (mode != (TARGET_64BIT ? TImode : DImode))
17759 && CONST_INT_P (operands[2])
17760 && CONST_INT_P (operands[3]))
17762 rtx out = operands[0];
17763 HOST_WIDE_INT ct = INTVAL (operands[2]);
17764 HOST_WIDE_INT cf = INTVAL (operands[3]);
17765 HOST_WIDE_INT diff;
17768 /* Sign bit compares are better done using shifts than we do by using
17770 if (sign_bit_compare_p
17771 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17773 /* Detect overlap between destination and compare sources. */
17776 if (!sign_bit_compare_p)
17779 bool fpcmp = false;
17781 compare_code = GET_CODE (compare_op);
17783 flags = XEXP (compare_op, 0);
17785 if (GET_MODE (flags) == CCFPmode
17786 || GET_MODE (flags) == CCFPUmode)
17790 = ix86_fp_compare_code_to_integer (compare_code);
17793 /* To simplify rest of code, restrict to the GEU case. */
17794 if (compare_code == LTU)
17796 HOST_WIDE_INT tmp = ct;
17799 compare_code = reverse_condition (compare_code);
17800 code = reverse_condition (code);
17805 PUT_CODE (compare_op,
17806 reverse_condition_maybe_unordered
17807 (GET_CODE (compare_op)));
17809 PUT_CODE (compare_op,
17810 reverse_condition (GET_CODE (compare_op)));
17814 if (reg_overlap_mentioned_p (out, op0)
17815 || reg_overlap_mentioned_p (out, op1))
17816 tmp = gen_reg_rtx (mode);
17818 if (mode == DImode)
17819 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17821 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17822 flags, compare_op));
17826 if (code == GT || code == GE)
17827 code = reverse_condition (code);
17830 HOST_WIDE_INT tmp = ct;
17835 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17848 tmp = expand_simple_binop (mode, PLUS,
17850 copy_rtx (tmp), 1, OPTAB_DIRECT);
17861 tmp = expand_simple_binop (mode, IOR,
17863 copy_rtx (tmp), 1, OPTAB_DIRECT);
17865 else if (diff == -1 && ct)
17875 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17877 tmp = expand_simple_binop (mode, PLUS,
17878 copy_rtx (tmp), GEN_INT (cf),
17879 copy_rtx (tmp), 1, OPTAB_DIRECT);
17887 * andl cf - ct, dest
17897 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17900 tmp = expand_simple_binop (mode, AND,
17902 gen_int_mode (cf - ct, mode),
17903 copy_rtx (tmp), 1, OPTAB_DIRECT);
17905 tmp = expand_simple_binop (mode, PLUS,
17906 copy_rtx (tmp), GEN_INT (ct),
17907 copy_rtx (tmp), 1, OPTAB_DIRECT);
17910 if (!rtx_equal_p (tmp, out))
17911 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17918 enum machine_mode cmp_mode = GET_MODE (op0);
17921 tmp = ct, ct = cf, cf = tmp;
17924 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17926 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17928 /* We may be reversing unordered compare to normal compare, that
17929 is not valid in general (we may convert non-trapping condition
17930 to trapping one), however on i386 we currently emit all
17931 comparisons unordered. */
17932 compare_code = reverse_condition_maybe_unordered (compare_code);
17933 code = reverse_condition_maybe_unordered (code);
17937 compare_code = reverse_condition (compare_code);
17938 code = reverse_condition (code);
17942 compare_code = UNKNOWN;
17943 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17944 && CONST_INT_P (op1))
17946 if (op1 == const0_rtx
17947 && (code == LT || code == GE))
17948 compare_code = code;
17949 else if (op1 == constm1_rtx)
17953 else if (code == GT)
17958 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17959 if (compare_code != UNKNOWN
17960 && GET_MODE (op0) == GET_MODE (out)
17961 && (cf == -1 || ct == -1))
17963 /* If lea code below could be used, only optimize
17964 if it results in a 2 insn sequence. */
17966 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17967 || diff == 3 || diff == 5 || diff == 9)
17968 || (compare_code == LT && ct == -1)
17969 || (compare_code == GE && cf == -1))
17972 * notl op1 (if necessary)
17980 code = reverse_condition (code);
17983 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17985 out = expand_simple_binop (mode, IOR,
17987 out, 1, OPTAB_DIRECT);
17988 if (out != operands[0])
17989 emit_move_insn (operands[0], out);
17996 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17997 || diff == 3 || diff == 5 || diff == 9)
17998 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
18000 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
18006 * lea cf(dest*(ct-cf)),dest
18010 * This also catches the degenerate setcc-only case.
18016 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18019 /* On x86_64 the lea instruction operates on Pmode, so we need
18020 to get arithmetics done in proper mode to match. */
18022 tmp = copy_rtx (out);
18026 out1 = copy_rtx (out);
18027 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
18031 tmp = gen_rtx_PLUS (mode, tmp, out1);
18037 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
18040 if (!rtx_equal_p (tmp, out))
18043 out = force_operand (tmp, copy_rtx (out));
18045 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18047 if (!rtx_equal_p (out, operands[0]))
18048 emit_move_insn (operands[0], copy_rtx (out));
18054 * General case: Jumpful:
18055 * xorl dest,dest cmpl op1, op2
18056 * cmpl op1, op2 movl ct, dest
18057 * setcc dest jcc 1f
18058 * decl dest movl cf, dest
18059 * andl (cf-ct),dest 1:
18062 * Size 20. Size 14.
18064 * This is reasonably steep, but branch mispredict costs are
18065 * high on modern cpus, so consider failing only if optimizing
18069 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18070 && BRANCH_COST (optimize_insn_for_speed_p (),
18075 enum machine_mode cmp_mode = GET_MODE (op0);
18080 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18082 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18084 /* We may be reversing unordered compare to normal compare,
18085 that is not valid in general (we may convert non-trapping
18086 condition to trapping one), however on i386 we currently
18087 emit all comparisons unordered. */
18088 code = reverse_condition_maybe_unordered (code);
18092 code = reverse_condition (code);
18093 if (compare_code != UNKNOWN)
18094 compare_code = reverse_condition (compare_code);
18098 if (compare_code != UNKNOWN)
18100 /* notl op1 (if needed)
18105 For x < 0 (resp. x <= -1) there will be no notl,
18106 so if possible swap the constants to get rid of the
18108 True/false will be -1/0 while code below (store flag
18109 followed by decrement) is 0/-1, so the constants need
18110 to be exchanged once more. */
18112 if (compare_code == GE || !cf)
18114 code = reverse_condition (code);
18119 HOST_WIDE_INT tmp = cf;
18124 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18128 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18130 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18132 copy_rtx (out), 1, OPTAB_DIRECT);
18135 out = expand_simple_binop (mode, AND, copy_rtx (out),
18136 gen_int_mode (cf - ct, mode),
18137 copy_rtx (out), 1, OPTAB_DIRECT);
18139 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18140 copy_rtx (out), 1, OPTAB_DIRECT);
18141 if (!rtx_equal_p (out, operands[0]))
18142 emit_move_insn (operands[0], copy_rtx (out));
18148 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18150 /* Try a few things more with specific constants and a variable. */
18153 rtx var, orig_out, out, tmp;
18155 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18158 /* If one of the two operands is an interesting constant, load a
18159 constant with the above and mask it in with a logical operation. */
18161 if (CONST_INT_P (operands[2]))
18164 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18165 operands[3] = constm1_rtx, op = and_optab;
18166 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18167 operands[3] = const0_rtx, op = ior_optab;
18171 else if (CONST_INT_P (operands[3]))
18174 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18175 operands[2] = constm1_rtx, op = and_optab;
18176 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18177 operands[2] = const0_rtx, op = ior_optab;
18184 orig_out = operands[0];
18185 tmp = gen_reg_rtx (mode);
18188 /* Recurse to get the constant loaded. */
18189 if (ix86_expand_int_movcc (operands) == 0)
18192 /* Mask in the interesting variable. */
18193 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18195 if (!rtx_equal_p (out, orig_out))
18196 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18202 * For comparison with above,
18212 if (! nonimmediate_operand (operands[2], mode))
18213 operands[2] = force_reg (mode, operands[2]);
18214 if (! nonimmediate_operand (operands[3], mode))
18215 operands[3] = force_reg (mode, operands[3]);
18217 if (! register_operand (operands[2], VOIDmode)
18219 || ! register_operand (operands[3], VOIDmode)))
18220 operands[2] = force_reg (mode, operands[2]);
18223 && ! register_operand (operands[3], VOIDmode))
18224 operands[3] = force_reg (mode, operands[3]);
18226 emit_insn (compare_seq);
18227 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18228 gen_rtx_IF_THEN_ELSE (mode,
18229 compare_op, operands[2],
18234 /* Swap, force into registers, or otherwise massage the two operands
18235 to an sse comparison with a mask result. Thus we differ a bit from
18236 ix86_prepare_fp_compare_args which expects to produce a flags result.
18238 The DEST operand exists to help determine whether to commute commutative
18239 operators. The POP0/POP1 operands are updated in place. The new
18240 comparison code is returned, or UNKNOWN if not implementable. */
18242 static enum rtx_code
18243 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18244 rtx *pop0, rtx *pop1)
18252 /* We have no LTGT as an operator. We could implement it with
18253 NE & ORDERED, but this requires an extra temporary. It's
18254 not clear that it's worth it. */
18261 /* These are supported directly. */
18268 /* For commutative operators, try to canonicalize the destination
18269 operand to be first in the comparison - this helps reload to
18270 avoid extra moves. */
18271 if (!dest || !rtx_equal_p (dest, *pop1))
18279 /* These are not supported directly. Swap the comparison operands
18280 to transform into something that is supported. */
18284 code = swap_condition (code);
18288 gcc_unreachable ();
18294 /* Detect conditional moves that exactly match min/max operational
18295 semantics. Note that this is IEEE safe, as long as we don't
18296 interchange the operands.
18298 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18299 and TRUE if the operation is successful and instructions are emitted. */
18302 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18303 rtx cmp_op1, rtx if_true, rtx if_false)
18305 enum machine_mode mode;
18311 else if (code == UNGE)
18314 if_true = if_false;
18320 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18322 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18327 mode = GET_MODE (dest);
18329 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18330 but MODE may be a vector mode and thus not appropriate. */
18331 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18333 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18336 if_true = force_reg (mode, if_true);
18337 v = gen_rtvec (2, if_true, if_false);
18338 tmp = gen_rtx_UNSPEC (mode, v, u);
18342 code = is_min ? SMIN : SMAX;
18343 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18346 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18350 /* Expand an sse vector comparison. Return the register with the result. */
18353 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18354 rtx op_true, rtx op_false)
18356 enum machine_mode mode = GET_MODE (dest);
18359 cmp_op0 = force_reg (mode, cmp_op0);
18360 if (!nonimmediate_operand (cmp_op1, mode))
18361 cmp_op1 = force_reg (mode, cmp_op1);
18364 || reg_overlap_mentioned_p (dest, op_true)
18365 || reg_overlap_mentioned_p (dest, op_false))
18366 dest = gen_reg_rtx (mode);
18368 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18369 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18374 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18375 operations. This is used for both scalar and vector conditional moves. */
18378 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18380 enum machine_mode mode = GET_MODE (dest);
18383 if (op_false == CONST0_RTX (mode))
18385 op_true = force_reg (mode, op_true);
18386 x = gen_rtx_AND (mode, cmp, op_true);
18387 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18389 else if (op_true == CONST0_RTX (mode))
18391 op_false = force_reg (mode, op_false);
18392 x = gen_rtx_NOT (mode, cmp);
18393 x = gen_rtx_AND (mode, x, op_false);
18394 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18396 else if (TARGET_XOP)
18398 rtx pcmov = gen_rtx_SET (mode, dest,
18399 gen_rtx_IF_THEN_ELSE (mode, cmp,
18406 op_true = force_reg (mode, op_true);
18407 op_false = force_reg (mode, op_false);
18409 t2 = gen_reg_rtx (mode);
18411 t3 = gen_reg_rtx (mode);
18415 x = gen_rtx_AND (mode, op_true, cmp);
18416 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18418 x = gen_rtx_NOT (mode, cmp);
18419 x = gen_rtx_AND (mode, x, op_false);
18420 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18422 x = gen_rtx_IOR (mode, t3, t2);
18423 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18427 /* Expand a floating-point conditional move. Return true if successful. */
18430 ix86_expand_fp_movcc (rtx operands[])
18432 enum machine_mode mode = GET_MODE (operands[0]);
18433 enum rtx_code code = GET_CODE (operands[1]);
18434 rtx tmp, compare_op;
18435 rtx op0 = XEXP (operands[1], 0);
18436 rtx op1 = XEXP (operands[1], 1);
18438 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18440 enum machine_mode cmode;
18442 /* Since we've no cmove for sse registers, don't force bad register
18443 allocation just to gain access to it. Deny movcc when the
18444 comparison mode doesn't match the move mode. */
18445 cmode = GET_MODE (op0);
18446 if (cmode == VOIDmode)
18447 cmode = GET_MODE (op1);
18451 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18452 if (code == UNKNOWN)
18455 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18456 operands[2], operands[3]))
18459 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18460 operands[2], operands[3]);
18461 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18465 /* The floating point conditional move instructions don't directly
18466 support conditions resulting from a signed integer comparison. */
18468 compare_op = ix86_expand_compare (code, op0, op1);
18469 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18471 tmp = gen_reg_rtx (QImode);
18472 ix86_expand_setcc (tmp, code, op0, op1);
18474 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18477 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18478 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18479 operands[2], operands[3])));
18484 /* Expand a floating-point vector conditional move; a vcond operation
18485 rather than a movcc operation. */
18488 ix86_expand_fp_vcond (rtx operands[])
18490 enum rtx_code code = GET_CODE (operands[3]);
18493 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18494 &operands[4], &operands[5]);
18495 if (code == UNKNOWN)
18498 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18499 operands[5], operands[1], operands[2]))
18502 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18503 operands[1], operands[2]);
18504 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18508 /* Expand a signed/unsigned integral vector conditional move. */
18511 ix86_expand_int_vcond (rtx operands[])
18513 enum machine_mode mode = GET_MODE (operands[0]);
18514 enum rtx_code code = GET_CODE (operands[3]);
18515 bool negate = false;
18518 cop0 = operands[4];
18519 cop1 = operands[5];
18521 /* XOP supports all of the comparisons on all vector int types. */
18524 /* Canonicalize the comparison to EQ, GT, GTU. */
18535 code = reverse_condition (code);
18541 code = reverse_condition (code);
18547 code = swap_condition (code);
18548 x = cop0, cop0 = cop1, cop1 = x;
18552 gcc_unreachable ();
18555 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18556 if (mode == V2DImode)
18561 /* SSE4.1 supports EQ. */
18562 if (!TARGET_SSE4_1)
18568 /* SSE4.2 supports GT/GTU. */
18569 if (!TARGET_SSE4_2)
18574 gcc_unreachable ();
18578 /* Unsigned parallel compare is not supported by the hardware.
18579 Play some tricks to turn this into a signed comparison
18583 cop0 = force_reg (mode, cop0);
18591 rtx (*gen_sub3) (rtx, rtx, rtx);
18593 /* Subtract (-(INT MAX) - 1) from both operands to make
18595 mask = ix86_build_signbit_mask (mode, true, false);
18596 gen_sub3 = (mode == V4SImode
18597 ? gen_subv4si3 : gen_subv2di3);
18598 t1 = gen_reg_rtx (mode);
18599 emit_insn (gen_sub3 (t1, cop0, mask));
18601 t2 = gen_reg_rtx (mode);
18602 emit_insn (gen_sub3 (t2, cop1, mask));
18612 /* Perform a parallel unsigned saturating subtraction. */
18613 x = gen_reg_rtx (mode);
18614 emit_insn (gen_rtx_SET (VOIDmode, x,
18615 gen_rtx_US_MINUS (mode, cop0, cop1)));
18618 cop1 = CONST0_RTX (mode);
18624 gcc_unreachable ();
18629 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18630 operands[1+negate], operands[2-negate]);
18632 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18633 operands[2-negate]);
18637 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18638 true if we should do zero extension, else sign extension. HIGH_P is
18639 true if we want the N/2 high elements, else the low elements. */
18642 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18644 enum machine_mode imode = GET_MODE (operands[1]);
18645 rtx (*unpack)(rtx, rtx, rtx);
18652 unpack = gen_vec_interleave_highv16qi;
18654 unpack = gen_vec_interleave_lowv16qi;
18658 unpack = gen_vec_interleave_highv8hi;
18660 unpack = gen_vec_interleave_lowv8hi;
18664 unpack = gen_vec_interleave_highv4si;
18666 unpack = gen_vec_interleave_lowv4si;
18669 gcc_unreachable ();
18672 dest = gen_lowpart (imode, operands[0]);
18675 se = force_reg (imode, CONST0_RTX (imode));
18677 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18678 operands[1], pc_rtx, pc_rtx);
18680 emit_insn (unpack (dest, operands[1], se));
18683 /* This function performs the same task as ix86_expand_sse_unpack,
18684 but with SSE4.1 instructions. */
18687 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18689 enum machine_mode imode = GET_MODE (operands[1]);
18690 rtx (*unpack)(rtx, rtx);
18697 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18699 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18703 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18705 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18709 unpack = gen_sse4_1_zero_extendv2siv2di2;
18711 unpack = gen_sse4_1_sign_extendv2siv2di2;
18714 gcc_unreachable ();
18717 dest = operands[0];
18720 /* Shift higher 8 bytes to lower 8 bytes. */
18721 src = gen_reg_rtx (imode);
18722 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18723 gen_lowpart (V1TImode, operands[1]),
18729 emit_insn (unpack (dest, src));
18732 /* Expand conditional increment or decrement using adb/sbb instructions.
18733 The default case using setcc followed by the conditional move can be
18734 done by generic code. */
18736 ix86_expand_int_addcc (rtx operands[])
18738 enum rtx_code code = GET_CODE (operands[1]);
18740 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18742 rtx val = const0_rtx;
18743 bool fpcmp = false;
18744 enum machine_mode mode;
18745 rtx op0 = XEXP (operands[1], 0);
18746 rtx op1 = XEXP (operands[1], 1);
18748 if (operands[3] != const1_rtx
18749 && operands[3] != constm1_rtx)
18751 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18753 code = GET_CODE (compare_op);
18755 flags = XEXP (compare_op, 0);
18757 if (GET_MODE (flags) == CCFPmode
18758 || GET_MODE (flags) == CCFPUmode)
18761 code = ix86_fp_compare_code_to_integer (code);
18768 PUT_CODE (compare_op,
18769 reverse_condition_maybe_unordered
18770 (GET_CODE (compare_op)));
18772 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18775 mode = GET_MODE (operands[0]);
18777 /* Construct either adc or sbb insn. */
18778 if ((code == LTU) == (operands[3] == constm1_rtx))
18783 insn = gen_subqi3_carry;
18786 insn = gen_subhi3_carry;
18789 insn = gen_subsi3_carry;
18792 insn = gen_subdi3_carry;
18795 gcc_unreachable ();
18803 insn = gen_addqi3_carry;
18806 insn = gen_addhi3_carry;
18809 insn = gen_addsi3_carry;
18812 insn = gen_adddi3_carry;
18815 gcc_unreachable ();
18818 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18824 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18825 but works for floating pointer parameters and nonoffsetable memories.
18826 For pushes, it returns just stack offsets; the values will be saved
18827 in the right order. Maximally three parts are generated. */
18830 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18835 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18837 size = (GET_MODE_SIZE (mode) + 4) / 8;
18839 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18840 gcc_assert (size >= 2 && size <= 4);
18842 /* Optimize constant pool reference to immediates. This is used by fp
18843 moves, that force all constants to memory to allow combining. */
18844 if (MEM_P (operand) && MEM_READONLY_P (operand))
18846 rtx tmp = maybe_get_pool_constant (operand);
18851 if (MEM_P (operand) && !offsettable_memref_p (operand))
18853 /* The only non-offsetable memories we handle are pushes. */
18854 int ok = push_operand (operand, VOIDmode);
18858 operand = copy_rtx (operand);
18859 PUT_MODE (operand, Pmode);
18860 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18864 if (GET_CODE (operand) == CONST_VECTOR)
18866 enum machine_mode imode = int_mode_for_mode (mode);
18867 /* Caution: if we looked through a constant pool memory above,
18868 the operand may actually have a different mode now. That's
18869 ok, since we want to pun this all the way back to an integer. */
18870 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18871 gcc_assert (operand != NULL);
18877 if (mode == DImode)
18878 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18883 if (REG_P (operand))
18885 gcc_assert (reload_completed);
18886 for (i = 0; i < size; i++)
18887 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18889 else if (offsettable_memref_p (operand))
18891 operand = adjust_address (operand, SImode, 0);
18892 parts[0] = operand;
18893 for (i = 1; i < size; i++)
18894 parts[i] = adjust_address (operand, SImode, 4 * i);
18896 else if (GET_CODE (operand) == CONST_DOUBLE)
18901 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18905 real_to_target (l, &r, mode);
18906 parts[3] = gen_int_mode (l[3], SImode);
18907 parts[2] = gen_int_mode (l[2], SImode);
18910 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18911 parts[2] = gen_int_mode (l[2], SImode);
18914 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18917 gcc_unreachable ();
18919 parts[1] = gen_int_mode (l[1], SImode);
18920 parts[0] = gen_int_mode (l[0], SImode);
18923 gcc_unreachable ();
18928 if (mode == TImode)
18929 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18930 if (mode == XFmode || mode == TFmode)
18932 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18933 if (REG_P (operand))
18935 gcc_assert (reload_completed);
18936 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18937 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18939 else if (offsettable_memref_p (operand))
18941 operand = adjust_address (operand, DImode, 0);
18942 parts[0] = operand;
18943 parts[1] = adjust_address (operand, upper_mode, 8);
18945 else if (GET_CODE (operand) == CONST_DOUBLE)
18950 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18951 real_to_target (l, &r, mode);
18953 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18954 if (HOST_BITS_PER_WIDE_INT >= 64)
18957 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18958 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18961 parts[0] = immed_double_const (l[0], l[1], DImode);
18963 if (upper_mode == SImode)
18964 parts[1] = gen_int_mode (l[2], SImode);
18965 else if (HOST_BITS_PER_WIDE_INT >= 64)
18968 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18969 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18972 parts[1] = immed_double_const (l[2], l[3], DImode);
18975 gcc_unreachable ();
18982 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18983 Return false when normal moves are needed; true when all required
18984 insns have been emitted. Operands 2-4 contain the input values
18985 int the correct order; operands 5-7 contain the output values. */
18988 ix86_split_long_move (rtx operands[])
18993 int collisions = 0;
18994 enum machine_mode mode = GET_MODE (operands[0]);
18995 bool collisionparts[4];
18997 /* The DFmode expanders may ask us to move double.
18998 For 64bit target this is single move. By hiding the fact
18999 here we simplify i386.md splitters. */
19000 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
19002 /* Optimize constant pool reference to immediates. This is used by
19003 fp moves, that force all constants to memory to allow combining. */
19005 if (MEM_P (operands[1])
19006 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
19007 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
19008 operands[1] = get_pool_constant (XEXP (operands[1], 0));
19009 if (push_operand (operands[0], VOIDmode))
19011 operands[0] = copy_rtx (operands[0]);
19012 PUT_MODE (operands[0], Pmode);
19015 operands[0] = gen_lowpart (DImode, operands[0]);
19016 operands[1] = gen_lowpart (DImode, operands[1]);
19017 emit_move_insn (operands[0], operands[1]);
19021 /* The only non-offsettable memory we handle is push. */
19022 if (push_operand (operands[0], VOIDmode))
19025 gcc_assert (!MEM_P (operands[0])
19026 || offsettable_memref_p (operands[0]));
19028 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
19029 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
19031 /* When emitting push, take care for source operands on the stack. */
19032 if (push && MEM_P (operands[1])
19033 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
19035 rtx src_base = XEXP (part[1][nparts - 1], 0);
19037 /* Compensate for the stack decrement by 4. */
19038 if (!TARGET_64BIT && nparts == 3
19039 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
19040 src_base = plus_constant (src_base, 4);
19042 /* src_base refers to the stack pointer and is
19043 automatically decreased by emitted push. */
19044 for (i = 0; i < nparts; i++)
19045 part[1][i] = change_address (part[1][i],
19046 GET_MODE (part[1][i]), src_base);
19049 /* We need to do copy in the right order in case an address register
19050 of the source overlaps the destination. */
19051 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19055 for (i = 0; i < nparts; i++)
19058 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19059 if (collisionparts[i])
19063 /* Collision in the middle part can be handled by reordering. */
19064 if (collisions == 1 && nparts == 3 && collisionparts [1])
19066 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19067 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19069 else if (collisions == 1
19071 && (collisionparts [1] || collisionparts [2]))
19073 if (collisionparts [1])
19075 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19076 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19080 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19081 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19085 /* If there are more collisions, we can't handle it by reordering.
19086 Do an lea to the last part and use only one colliding move. */
19087 else if (collisions > 1)
19093 base = part[0][nparts - 1];
19095 /* Handle the case when the last part isn't valid for lea.
19096 Happens in 64-bit mode storing the 12-byte XFmode. */
19097 if (GET_MODE (base) != Pmode)
19098 base = gen_rtx_REG (Pmode, REGNO (base));
19100 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19101 part[1][0] = replace_equiv_address (part[1][0], base);
19102 for (i = 1; i < nparts; i++)
19104 tmp = plus_constant (base, UNITS_PER_WORD * i);
19105 part[1][i] = replace_equiv_address (part[1][i], tmp);
19116 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19117 emit_insn (gen_addsi3 (stack_pointer_rtx,
19118 stack_pointer_rtx, GEN_INT (-4)));
19119 emit_move_insn (part[0][2], part[1][2]);
19121 else if (nparts == 4)
19123 emit_move_insn (part[0][3], part[1][3]);
19124 emit_move_insn (part[0][2], part[1][2]);
19129 /* In 64bit mode we don't have 32bit push available. In case this is
19130 register, it is OK - we will just use larger counterpart. We also
19131 retype memory - these comes from attempt to avoid REX prefix on
19132 moving of second half of TFmode value. */
19133 if (GET_MODE (part[1][1]) == SImode)
19135 switch (GET_CODE (part[1][1]))
19138 part[1][1] = adjust_address (part[1][1], DImode, 0);
19142 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19146 gcc_unreachable ();
19149 if (GET_MODE (part[1][0]) == SImode)
19150 part[1][0] = part[1][1];
19153 emit_move_insn (part[0][1], part[1][1]);
19154 emit_move_insn (part[0][0], part[1][0]);
19158 /* Choose correct order to not overwrite the source before it is copied. */
19159 if ((REG_P (part[0][0])
19160 && REG_P (part[1][1])
19161 && (REGNO (part[0][0]) == REGNO (part[1][1])
19163 && REGNO (part[0][0]) == REGNO (part[1][2]))
19165 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19167 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19169 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19171 operands[2 + i] = part[0][j];
19172 operands[6 + i] = part[1][j];
19177 for (i = 0; i < nparts; i++)
19179 operands[2 + i] = part[0][i];
19180 operands[6 + i] = part[1][i];
19184 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19185 if (optimize_insn_for_size_p ())
19187 for (j = 0; j < nparts - 1; j++)
19188 if (CONST_INT_P (operands[6 + j])
19189 && operands[6 + j] != const0_rtx
19190 && REG_P (operands[2 + j]))
19191 for (i = j; i < nparts - 1; i++)
19192 if (CONST_INT_P (operands[7 + i])
19193 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19194 operands[7 + i] = operands[2 + j];
19197 for (i = 0; i < nparts; i++)
19198 emit_move_insn (operands[2 + i], operands[6 + i]);
19203 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19204 left shift by a constant, either using a single shift or
19205 a sequence of add instructions. */
19208 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19210 rtx (*insn)(rtx, rtx, rtx);
19213 || (count * ix86_cost->add <= ix86_cost->shift_const
19214 && !optimize_insn_for_size_p ()))
19216 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19217 while (count-- > 0)
19218 emit_insn (insn (operand, operand, operand));
19222 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19223 emit_insn (insn (operand, operand, GEN_INT (count)));
19228 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19230 rtx (*gen_ashl3)(rtx, rtx, rtx);
19231 rtx (*gen_shld)(rtx, rtx, rtx);
19232 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19234 rtx low[2], high[2];
19237 if (CONST_INT_P (operands[2]))
19239 split_double_mode (mode, operands, 2, low, high);
19240 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19242 if (count >= half_width)
19244 emit_move_insn (high[0], low[1]);
19245 emit_move_insn (low[0], const0_rtx);
19247 if (count > half_width)
19248 ix86_expand_ashl_const (high[0], count - half_width, mode);
19252 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19254 if (!rtx_equal_p (operands[0], operands[1]))
19255 emit_move_insn (operands[0], operands[1]);
19257 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19258 ix86_expand_ashl_const (low[0], count, mode);
19263 split_double_mode (mode, operands, 1, low, high);
19265 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19267 if (operands[1] == const1_rtx)
19269 /* Assuming we've chosen a QImode capable registers, then 1 << N
19270 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19271 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19273 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19275 ix86_expand_clear (low[0]);
19276 ix86_expand_clear (high[0]);
19277 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19279 d = gen_lowpart (QImode, low[0]);
19280 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19281 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19282 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19284 d = gen_lowpart (QImode, high[0]);
19285 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19286 s = gen_rtx_NE (QImode, flags, const0_rtx);
19287 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19290 /* Otherwise, we can get the same results by manually performing
19291 a bit extract operation on bit 5/6, and then performing the two
19292 shifts. The two methods of getting 0/1 into low/high are exactly
19293 the same size. Avoiding the shift in the bit extract case helps
19294 pentium4 a bit; no one else seems to care much either way. */
19297 enum machine_mode half_mode;
19298 rtx (*gen_lshr3)(rtx, rtx, rtx);
19299 rtx (*gen_and3)(rtx, rtx, rtx);
19300 rtx (*gen_xor3)(rtx, rtx, rtx);
19301 HOST_WIDE_INT bits;
19304 if (mode == DImode)
19306 half_mode = SImode;
19307 gen_lshr3 = gen_lshrsi3;
19308 gen_and3 = gen_andsi3;
19309 gen_xor3 = gen_xorsi3;
19314 half_mode = DImode;
19315 gen_lshr3 = gen_lshrdi3;
19316 gen_and3 = gen_anddi3;
19317 gen_xor3 = gen_xordi3;
19321 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19322 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19324 x = gen_lowpart (half_mode, operands[2]);
19325 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19327 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19328 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19329 emit_move_insn (low[0], high[0]);
19330 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19333 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19334 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19338 if (operands[1] == constm1_rtx)
19340 /* For -1 << N, we can avoid the shld instruction, because we
19341 know that we're shifting 0...31/63 ones into a -1. */
19342 emit_move_insn (low[0], constm1_rtx);
19343 if (optimize_insn_for_size_p ())
19344 emit_move_insn (high[0], low[0]);
19346 emit_move_insn (high[0], constm1_rtx);
19350 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19352 if (!rtx_equal_p (operands[0], operands[1]))
19353 emit_move_insn (operands[0], operands[1]);
19355 split_double_mode (mode, operands, 1, low, high);
19356 emit_insn (gen_shld (high[0], low[0], operands[2]));
19359 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19361 if (TARGET_CMOVE && scratch)
19363 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19364 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19366 ix86_expand_clear (scratch);
19367 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19371 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19372 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19374 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19379 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19381 rtx (*gen_ashr3)(rtx, rtx, rtx)
19382 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19383 rtx (*gen_shrd)(rtx, rtx, rtx);
19384 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19386 rtx low[2], high[2];
19389 if (CONST_INT_P (operands[2]))
19391 split_double_mode (mode, operands, 2, low, high);
19392 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19394 if (count == GET_MODE_BITSIZE (mode) - 1)
19396 emit_move_insn (high[0], high[1]);
19397 emit_insn (gen_ashr3 (high[0], high[0],
19398 GEN_INT (half_width - 1)));
19399 emit_move_insn (low[0], high[0]);
19402 else if (count >= half_width)
19404 emit_move_insn (low[0], high[1]);
19405 emit_move_insn (high[0], low[0]);
19406 emit_insn (gen_ashr3 (high[0], high[0],
19407 GEN_INT (half_width - 1)));
19409 if (count > half_width)
19410 emit_insn (gen_ashr3 (low[0], low[0],
19411 GEN_INT (count - half_width)));
19415 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19417 if (!rtx_equal_p (operands[0], operands[1]))
19418 emit_move_insn (operands[0], operands[1]);
19420 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19421 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19426 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19428 if (!rtx_equal_p (operands[0], operands[1]))
19429 emit_move_insn (operands[0], operands[1]);
19431 split_double_mode (mode, operands, 1, low, high);
19433 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19434 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19436 if (TARGET_CMOVE && scratch)
19438 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19439 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19441 emit_move_insn (scratch, high[0]);
19442 emit_insn (gen_ashr3 (scratch, scratch,
19443 GEN_INT (half_width - 1)));
19444 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19449 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19450 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19452 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19458 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19460 rtx (*gen_lshr3)(rtx, rtx, rtx)
19461 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19462 rtx (*gen_shrd)(rtx, rtx, rtx);
19463 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19465 rtx low[2], high[2];
19468 if (CONST_INT_P (operands[2]))
19470 split_double_mode (mode, operands, 2, low, high);
19471 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19473 if (count >= half_width)
19475 emit_move_insn (low[0], high[1]);
19476 ix86_expand_clear (high[0]);
19478 if (count > half_width)
19479 emit_insn (gen_lshr3 (low[0], low[0],
19480 GEN_INT (count - half_width)));
19484 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19486 if (!rtx_equal_p (operands[0], operands[1]))
19487 emit_move_insn (operands[0], operands[1]);
19489 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19490 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19495 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19497 if (!rtx_equal_p (operands[0], operands[1]))
19498 emit_move_insn (operands[0], operands[1]);
19500 split_double_mode (mode, operands, 1, low, high);
19502 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19503 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19505 if (TARGET_CMOVE && scratch)
19507 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19508 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19510 ix86_expand_clear (scratch);
19511 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19516 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19517 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19519 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19524 /* Predict just emitted jump instruction to be taken with probability PROB. */
19526 predict_jump (int prob)
19528 rtx insn = get_last_insn ();
19529 gcc_assert (JUMP_P (insn));
19530 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19533 /* Helper function for the string operations below. Dest VARIABLE whether
19534 it is aligned to VALUE bytes. If true, jump to the label. */
19536 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19538 rtx label = gen_label_rtx ();
19539 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19540 if (GET_MODE (variable) == DImode)
19541 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19543 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19544 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19547 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19549 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19553 /* Adjust COUNTER by the VALUE. */
19555 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19557 rtx (*gen_add)(rtx, rtx, rtx)
19558 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19560 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19563 /* Zero extend possibly SImode EXP to Pmode register. */
19565 ix86_zero_extend_to_Pmode (rtx exp)
19568 if (GET_MODE (exp) == VOIDmode)
19569 return force_reg (Pmode, exp);
19570 if (GET_MODE (exp) == Pmode)
19571 return copy_to_mode_reg (Pmode, exp);
19572 r = gen_reg_rtx (Pmode);
19573 emit_insn (gen_zero_extendsidi2 (r, exp));
19577 /* Divide COUNTREG by SCALE. */
19579 scale_counter (rtx countreg, int scale)
19585 if (CONST_INT_P (countreg))
19586 return GEN_INT (INTVAL (countreg) / scale);
19587 gcc_assert (REG_P (countreg));
19589 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19590 GEN_INT (exact_log2 (scale)),
19591 NULL, 1, OPTAB_DIRECT);
19595 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19596 DImode for constant loop counts. */
19598 static enum machine_mode
19599 counter_mode (rtx count_exp)
19601 if (GET_MODE (count_exp) != VOIDmode)
19602 return GET_MODE (count_exp);
19603 if (!CONST_INT_P (count_exp))
19605 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19610 /* When SRCPTR is non-NULL, output simple loop to move memory
19611 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19612 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19613 equivalent loop to set memory by VALUE (supposed to be in MODE).
19615 The size is rounded down to whole number of chunk size moved at once.
19616 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19620 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19621 rtx destptr, rtx srcptr, rtx value,
19622 rtx count, enum machine_mode mode, int unroll,
19625 rtx out_label, top_label, iter, tmp;
19626 enum machine_mode iter_mode = counter_mode (count);
19627 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19628 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19634 top_label = gen_label_rtx ();
19635 out_label = gen_label_rtx ();
19636 iter = gen_reg_rtx (iter_mode);
19638 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19639 NULL, 1, OPTAB_DIRECT);
19640 /* Those two should combine. */
19641 if (piece_size == const1_rtx)
19643 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19645 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19647 emit_move_insn (iter, const0_rtx);
19649 emit_label (top_label);
19651 tmp = convert_modes (Pmode, iter_mode, iter, true);
19652 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19653 destmem = change_address (destmem, mode, x_addr);
19657 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19658 srcmem = change_address (srcmem, mode, y_addr);
19660 /* When unrolling for chips that reorder memory reads and writes,
19661 we can save registers by using single temporary.
19662 Also using 4 temporaries is overkill in 32bit mode. */
19663 if (!TARGET_64BIT && 0)
19665 for (i = 0; i < unroll; i++)
19670 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19672 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19674 emit_move_insn (destmem, srcmem);
19680 gcc_assert (unroll <= 4);
19681 for (i = 0; i < unroll; i++)
19683 tmpreg[i] = gen_reg_rtx (mode);
19687 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19689 emit_move_insn (tmpreg[i], srcmem);
19691 for (i = 0; i < unroll; i++)
19696 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19698 emit_move_insn (destmem, tmpreg[i]);
19703 for (i = 0; i < unroll; i++)
19707 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19708 emit_move_insn (destmem, value);
19711 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19712 true, OPTAB_LIB_WIDEN);
19714 emit_move_insn (iter, tmp);
19716 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19718 if (expected_size != -1)
19720 expected_size /= GET_MODE_SIZE (mode) * unroll;
19721 if (expected_size == 0)
19723 else if (expected_size > REG_BR_PROB_BASE)
19724 predict_jump (REG_BR_PROB_BASE - 1);
19726 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19729 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19730 iter = ix86_zero_extend_to_Pmode (iter);
19731 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19732 true, OPTAB_LIB_WIDEN);
19733 if (tmp != destptr)
19734 emit_move_insn (destptr, tmp);
19737 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19738 true, OPTAB_LIB_WIDEN);
19740 emit_move_insn (srcptr, tmp);
19742 emit_label (out_label);
19745 /* Output "rep; mov" instruction.
19746 Arguments have same meaning as for previous function */
19748 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19749 rtx destptr, rtx srcptr,
19751 enum machine_mode mode)
19757 /* If the size is known, it is shorter to use rep movs. */
19758 if (mode == QImode && CONST_INT_P (count)
19759 && !(INTVAL (count) & 3))
19762 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19763 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19764 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19765 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19766 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19767 if (mode != QImode)
19769 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19770 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19771 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19772 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19773 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19774 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19778 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19779 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19781 if (CONST_INT_P (count))
19783 count = GEN_INT (INTVAL (count)
19784 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19785 destmem = shallow_copy_rtx (destmem);
19786 srcmem = shallow_copy_rtx (srcmem);
19787 set_mem_size (destmem, count);
19788 set_mem_size (srcmem, count);
19792 if (MEM_SIZE (destmem))
19793 set_mem_size (destmem, NULL_RTX);
19794 if (MEM_SIZE (srcmem))
19795 set_mem_size (srcmem, NULL_RTX);
19797 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19801 /* Output "rep; stos" instruction.
19802 Arguments have same meaning as for previous function */
19804 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19805 rtx count, enum machine_mode mode,
19811 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19812 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19813 value = force_reg (mode, gen_lowpart (mode, value));
19814 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19815 if (mode != QImode)
19817 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19818 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19819 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19822 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19823 if (orig_value == const0_rtx && CONST_INT_P (count))
19825 count = GEN_INT (INTVAL (count)
19826 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19827 destmem = shallow_copy_rtx (destmem);
19828 set_mem_size (destmem, count);
19830 else if (MEM_SIZE (destmem))
19831 set_mem_size (destmem, NULL_RTX);
19832 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19836 emit_strmov (rtx destmem, rtx srcmem,
19837 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19839 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19840 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19841 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19844 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19846 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19847 rtx destptr, rtx srcptr, rtx count, int max_size)
19850 if (CONST_INT_P (count))
19852 HOST_WIDE_INT countval = INTVAL (count);
19855 if ((countval & 0x10) && max_size > 16)
19859 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19860 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19863 gcc_unreachable ();
19866 if ((countval & 0x08) && max_size > 8)
19869 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19872 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19873 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19877 if ((countval & 0x04) && max_size > 4)
19879 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19882 if ((countval & 0x02) && max_size > 2)
19884 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19887 if ((countval & 0x01) && max_size > 1)
19889 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19896 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19897 count, 1, OPTAB_DIRECT);
19898 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19899 count, QImode, 1, 4);
19903 /* When there are stringops, we can cheaply increase dest and src pointers.
19904 Otherwise we save code size by maintaining offset (zero is readily
19905 available from preceding rep operation) and using x86 addressing modes.
19907 if (TARGET_SINGLE_STRINGOP)
19911 rtx label = ix86_expand_aligntest (count, 4, true);
19912 src = change_address (srcmem, SImode, srcptr);
19913 dest = change_address (destmem, SImode, destptr);
19914 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19915 emit_label (label);
19916 LABEL_NUSES (label) = 1;
19920 rtx label = ix86_expand_aligntest (count, 2, true);
19921 src = change_address (srcmem, HImode, srcptr);
19922 dest = change_address (destmem, HImode, destptr);
19923 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19924 emit_label (label);
19925 LABEL_NUSES (label) = 1;
19929 rtx label = ix86_expand_aligntest (count, 1, true);
19930 src = change_address (srcmem, QImode, srcptr);
19931 dest = change_address (destmem, QImode, destptr);
19932 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19933 emit_label (label);
19934 LABEL_NUSES (label) = 1;
19939 rtx offset = force_reg (Pmode, const0_rtx);
19944 rtx label = ix86_expand_aligntest (count, 4, true);
19945 src = change_address (srcmem, SImode, srcptr);
19946 dest = change_address (destmem, SImode, destptr);
19947 emit_move_insn (dest, src);
19948 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19949 true, OPTAB_LIB_WIDEN);
19951 emit_move_insn (offset, tmp);
19952 emit_label (label);
19953 LABEL_NUSES (label) = 1;
19957 rtx label = ix86_expand_aligntest (count, 2, true);
19958 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19959 src = change_address (srcmem, HImode, tmp);
19960 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19961 dest = change_address (destmem, HImode, tmp);
19962 emit_move_insn (dest, src);
19963 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19964 true, OPTAB_LIB_WIDEN);
19966 emit_move_insn (offset, tmp);
19967 emit_label (label);
19968 LABEL_NUSES (label) = 1;
19972 rtx label = ix86_expand_aligntest (count, 1, true);
19973 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19974 src = change_address (srcmem, QImode, tmp);
19975 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19976 dest = change_address (destmem, QImode, tmp);
19977 emit_move_insn (dest, src);
19978 emit_label (label);
19979 LABEL_NUSES (label) = 1;
19984 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19986 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19987 rtx count, int max_size)
19990 expand_simple_binop (counter_mode (count), AND, count,
19991 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19992 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19993 gen_lowpart (QImode, value), count, QImode,
19997 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19999 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
20003 if (CONST_INT_P (count))
20005 HOST_WIDE_INT countval = INTVAL (count);
20008 if ((countval & 0x10) && max_size > 16)
20012 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20013 emit_insn (gen_strset (destptr, dest, value));
20014 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
20015 emit_insn (gen_strset (destptr, dest, value));
20018 gcc_unreachable ();
20021 if ((countval & 0x08) && max_size > 8)
20025 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
20026 emit_insn (gen_strset (destptr, dest, value));
20030 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20031 emit_insn (gen_strset (destptr, dest, value));
20032 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
20033 emit_insn (gen_strset (destptr, dest, value));
20037 if ((countval & 0x04) && max_size > 4)
20039 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
20040 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20043 if ((countval & 0x02) && max_size > 2)
20045 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20046 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20049 if ((countval & 0x01) && max_size > 1)
20051 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20052 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20059 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20064 rtx label = ix86_expand_aligntest (count, 16, true);
20067 dest = change_address (destmem, DImode, destptr);
20068 emit_insn (gen_strset (destptr, dest, value));
20069 emit_insn (gen_strset (destptr, dest, value));
20073 dest = change_address (destmem, SImode, destptr);
20074 emit_insn (gen_strset (destptr, dest, value));
20075 emit_insn (gen_strset (destptr, dest, value));
20076 emit_insn (gen_strset (destptr, dest, value));
20077 emit_insn (gen_strset (destptr, dest, value));
20079 emit_label (label);
20080 LABEL_NUSES (label) = 1;
20084 rtx label = ix86_expand_aligntest (count, 8, true);
20087 dest = change_address (destmem, DImode, destptr);
20088 emit_insn (gen_strset (destptr, dest, value));
20092 dest = change_address (destmem, SImode, destptr);
20093 emit_insn (gen_strset (destptr, dest, value));
20094 emit_insn (gen_strset (destptr, dest, value));
20096 emit_label (label);
20097 LABEL_NUSES (label) = 1;
20101 rtx label = ix86_expand_aligntest (count, 4, true);
20102 dest = change_address (destmem, SImode, destptr);
20103 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20104 emit_label (label);
20105 LABEL_NUSES (label) = 1;
20109 rtx label = ix86_expand_aligntest (count, 2, true);
20110 dest = change_address (destmem, HImode, destptr);
20111 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20112 emit_label (label);
20113 LABEL_NUSES (label) = 1;
20117 rtx label = ix86_expand_aligntest (count, 1, true);
20118 dest = change_address (destmem, QImode, destptr);
20119 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20120 emit_label (label);
20121 LABEL_NUSES (label) = 1;
20125 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20126 DESIRED_ALIGNMENT. */
20128 expand_movmem_prologue (rtx destmem, rtx srcmem,
20129 rtx destptr, rtx srcptr, rtx count,
20130 int align, int desired_alignment)
20132 if (align <= 1 && desired_alignment > 1)
20134 rtx label = ix86_expand_aligntest (destptr, 1, false);
20135 srcmem = change_address (srcmem, QImode, srcptr);
20136 destmem = change_address (destmem, QImode, destptr);
20137 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20138 ix86_adjust_counter (count, 1);
20139 emit_label (label);
20140 LABEL_NUSES (label) = 1;
20142 if (align <= 2 && desired_alignment > 2)
20144 rtx label = ix86_expand_aligntest (destptr, 2, false);
20145 srcmem = change_address (srcmem, HImode, srcptr);
20146 destmem = change_address (destmem, HImode, destptr);
20147 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20148 ix86_adjust_counter (count, 2);
20149 emit_label (label);
20150 LABEL_NUSES (label) = 1;
20152 if (align <= 4 && desired_alignment > 4)
20154 rtx label = ix86_expand_aligntest (destptr, 4, false);
20155 srcmem = change_address (srcmem, SImode, srcptr);
20156 destmem = change_address (destmem, SImode, destptr);
20157 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20158 ix86_adjust_counter (count, 4);
20159 emit_label (label);
20160 LABEL_NUSES (label) = 1;
20162 gcc_assert (desired_alignment <= 8);
20165 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20166 ALIGN_BYTES is how many bytes need to be copied. */
20168 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20169 int desired_align, int align_bytes)
20172 rtx src_size, dst_size;
20174 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20175 if (src_align_bytes >= 0)
20176 src_align_bytes = desired_align - src_align_bytes;
20177 src_size = MEM_SIZE (src);
20178 dst_size = MEM_SIZE (dst);
20179 if (align_bytes & 1)
20181 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20182 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20184 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20186 if (align_bytes & 2)
20188 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20189 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20190 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20191 set_mem_align (dst, 2 * BITS_PER_UNIT);
20192 if (src_align_bytes >= 0
20193 && (src_align_bytes & 1) == (align_bytes & 1)
20194 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20195 set_mem_align (src, 2 * BITS_PER_UNIT);
20197 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20199 if (align_bytes & 4)
20201 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20202 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20203 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20204 set_mem_align (dst, 4 * BITS_PER_UNIT);
20205 if (src_align_bytes >= 0)
20207 unsigned int src_align = 0;
20208 if ((src_align_bytes & 3) == (align_bytes & 3))
20210 else if ((src_align_bytes & 1) == (align_bytes & 1))
20212 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20213 set_mem_align (src, src_align * BITS_PER_UNIT);
20216 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20218 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20219 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20220 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20221 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20222 if (src_align_bytes >= 0)
20224 unsigned int src_align = 0;
20225 if ((src_align_bytes & 7) == (align_bytes & 7))
20227 else if ((src_align_bytes & 3) == (align_bytes & 3))
20229 else if ((src_align_bytes & 1) == (align_bytes & 1))
20231 if (src_align > (unsigned int) desired_align)
20232 src_align = desired_align;
20233 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20234 set_mem_align (src, src_align * BITS_PER_UNIT);
20237 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20239 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20244 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20245 DESIRED_ALIGNMENT. */
20247 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20248 int align, int desired_alignment)
20250 if (align <= 1 && desired_alignment > 1)
20252 rtx label = ix86_expand_aligntest (destptr, 1, false);
20253 destmem = change_address (destmem, QImode, destptr);
20254 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20255 ix86_adjust_counter (count, 1);
20256 emit_label (label);
20257 LABEL_NUSES (label) = 1;
20259 if (align <= 2 && desired_alignment > 2)
20261 rtx label = ix86_expand_aligntest (destptr, 2, false);
20262 destmem = change_address (destmem, HImode, destptr);
20263 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20264 ix86_adjust_counter (count, 2);
20265 emit_label (label);
20266 LABEL_NUSES (label) = 1;
20268 if (align <= 4 && desired_alignment > 4)
20270 rtx label = ix86_expand_aligntest (destptr, 4, false);
20271 destmem = change_address (destmem, SImode, destptr);
20272 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20273 ix86_adjust_counter (count, 4);
20274 emit_label (label);
20275 LABEL_NUSES (label) = 1;
20277 gcc_assert (desired_alignment <= 8);
20280 /* Set enough from DST to align DST known to by aligned by ALIGN to
20281 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20283 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20284 int desired_align, int align_bytes)
20287 rtx dst_size = MEM_SIZE (dst);
20288 if (align_bytes & 1)
20290 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20292 emit_insn (gen_strset (destreg, dst,
20293 gen_lowpart (QImode, value)));
20295 if (align_bytes & 2)
20297 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20298 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20299 set_mem_align (dst, 2 * BITS_PER_UNIT);
20301 emit_insn (gen_strset (destreg, dst,
20302 gen_lowpart (HImode, value)));
20304 if (align_bytes & 4)
20306 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20307 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20308 set_mem_align (dst, 4 * BITS_PER_UNIT);
20310 emit_insn (gen_strset (destreg, dst,
20311 gen_lowpart (SImode, value)));
20313 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20314 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20315 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20317 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20321 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20322 static enum stringop_alg
20323 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20324 int *dynamic_check)
20326 const struct stringop_algs * algs;
20327 bool optimize_for_speed;
20328 /* Algorithms using the rep prefix want at least edi and ecx;
20329 additionally, memset wants eax and memcpy wants esi. Don't
20330 consider such algorithms if the user has appropriated those
20331 registers for their own purposes. */
20332 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20334 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20336 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20337 || (alg != rep_prefix_1_byte \
20338 && alg != rep_prefix_4_byte \
20339 && alg != rep_prefix_8_byte))
20340 const struct processor_costs *cost;
20342 /* Even if the string operation call is cold, we still might spend a lot
20343 of time processing large blocks. */
20344 if (optimize_function_for_size_p (cfun)
20345 || (optimize_insn_for_size_p ()
20346 && expected_size != -1 && expected_size < 256))
20347 optimize_for_speed = false;
20349 optimize_for_speed = true;
20351 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20353 *dynamic_check = -1;
20355 algs = &cost->memset[TARGET_64BIT != 0];
20357 algs = &cost->memcpy[TARGET_64BIT != 0];
20358 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20359 return stringop_alg;
20360 /* rep; movq or rep; movl is the smallest variant. */
20361 else if (!optimize_for_speed)
20363 if (!count || (count & 3))
20364 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20366 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20368 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20370 else if (expected_size != -1 && expected_size < 4)
20371 return loop_1_byte;
20372 else if (expected_size != -1)
20375 enum stringop_alg alg = libcall;
20376 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20378 /* We get here if the algorithms that were not libcall-based
20379 were rep-prefix based and we are unable to use rep prefixes
20380 based on global register usage. Break out of the loop and
20381 use the heuristic below. */
20382 if (algs->size[i].max == 0)
20384 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20386 enum stringop_alg candidate = algs->size[i].alg;
20388 if (candidate != libcall && ALG_USABLE_P (candidate))
20390 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20391 last non-libcall inline algorithm. */
20392 if (TARGET_INLINE_ALL_STRINGOPS)
20394 /* When the current size is best to be copied by a libcall,
20395 but we are still forced to inline, run the heuristic below
20396 that will pick code for medium sized blocks. */
20397 if (alg != libcall)
20401 else if (ALG_USABLE_P (candidate))
20405 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20407 /* When asked to inline the call anyway, try to pick meaningful choice.
20408 We look for maximal size of block that is faster to copy by hand and
20409 take blocks of at most of that size guessing that average size will
20410 be roughly half of the block.
20412 If this turns out to be bad, we might simply specify the preferred
20413 choice in ix86_costs. */
20414 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20415 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20418 enum stringop_alg alg;
20420 bool any_alg_usable_p = true;
20422 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20424 enum stringop_alg candidate = algs->size[i].alg;
20425 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20427 if (candidate != libcall && candidate
20428 && ALG_USABLE_P (candidate))
20429 max = algs->size[i].max;
20431 /* If there aren't any usable algorithms, then recursing on
20432 smaller sizes isn't going to find anything. Just return the
20433 simple byte-at-a-time copy loop. */
20434 if (!any_alg_usable_p)
20436 /* Pick something reasonable. */
20437 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20438 *dynamic_check = 128;
20439 return loop_1_byte;
20443 alg = decide_alg (count, max / 2, memset, dynamic_check);
20444 gcc_assert (*dynamic_check == -1);
20445 gcc_assert (alg != libcall);
20446 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20447 *dynamic_check = max;
20450 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20451 #undef ALG_USABLE_P
20454 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20455 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20457 decide_alignment (int align,
20458 enum stringop_alg alg,
20461 int desired_align = 0;
20465 gcc_unreachable ();
20467 case unrolled_loop:
20468 desired_align = GET_MODE_SIZE (Pmode);
20470 case rep_prefix_8_byte:
20473 case rep_prefix_4_byte:
20474 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20475 copying whole cacheline at once. */
20476 if (TARGET_PENTIUMPRO)
20481 case rep_prefix_1_byte:
20482 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20483 copying whole cacheline at once. */
20484 if (TARGET_PENTIUMPRO)
20498 if (desired_align < align)
20499 desired_align = align;
20500 if (expected_size != -1 && expected_size < 4)
20501 desired_align = align;
20502 return desired_align;
20505 /* Return the smallest power of 2 greater than VAL. */
20507 smallest_pow2_greater_than (int val)
20515 /* Expand string move (memcpy) operation. Use i386 string operations when
20516 profitable. expand_setmem contains similar code. The code depends upon
20517 architecture, block size and alignment, but always has the same
20520 1) Prologue guard: Conditional that jumps up to epilogues for small
20521 blocks that can be handled by epilogue alone. This is faster but
20522 also needed for correctness, since prologue assume the block is larger
20523 than the desired alignment.
20525 Optional dynamic check for size and libcall for large
20526 blocks is emitted here too, with -minline-stringops-dynamically.
20528 2) Prologue: copy first few bytes in order to get destination aligned
20529 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20530 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20531 We emit either a jump tree on power of two sized blocks, or a byte loop.
20533 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20534 with specified algorithm.
20536 4) Epilogue: code copying tail of the block that is too small to be
20537 handled by main body (or up to size guarded by prologue guard). */
20540 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20541 rtx expected_align_exp, rtx expected_size_exp)
20547 rtx jump_around_label = NULL;
20548 HOST_WIDE_INT align = 1;
20549 unsigned HOST_WIDE_INT count = 0;
20550 HOST_WIDE_INT expected_size = -1;
20551 int size_needed = 0, epilogue_size_needed;
20552 int desired_align = 0, align_bytes = 0;
20553 enum stringop_alg alg;
20555 bool need_zero_guard = false;
20557 if (CONST_INT_P (align_exp))
20558 align = INTVAL (align_exp);
20559 /* i386 can do misaligned access on reasonably increased cost. */
20560 if (CONST_INT_P (expected_align_exp)
20561 && INTVAL (expected_align_exp) > align)
20562 align = INTVAL (expected_align_exp);
20563 /* ALIGN is the minimum of destination and source alignment, but we care here
20564 just about destination alignment. */
20565 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20566 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20568 if (CONST_INT_P (count_exp))
20569 count = expected_size = INTVAL (count_exp);
20570 if (CONST_INT_P (expected_size_exp) && count == 0)
20571 expected_size = INTVAL (expected_size_exp);
20573 /* Make sure we don't need to care about overflow later on. */
20574 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20577 /* Step 0: Decide on preferred algorithm, desired alignment and
20578 size of chunks to be copied by main loop. */
20580 alg = decide_alg (count, expected_size, false, &dynamic_check);
20581 desired_align = decide_alignment (align, alg, expected_size);
20583 if (!TARGET_ALIGN_STRINGOPS)
20584 align = desired_align;
20586 if (alg == libcall)
20588 gcc_assert (alg != no_stringop);
20590 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20591 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20592 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20597 gcc_unreachable ();
20599 need_zero_guard = true;
20600 size_needed = GET_MODE_SIZE (Pmode);
20602 case unrolled_loop:
20603 need_zero_guard = true;
20604 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20606 case rep_prefix_8_byte:
20609 case rep_prefix_4_byte:
20612 case rep_prefix_1_byte:
20616 need_zero_guard = true;
20621 epilogue_size_needed = size_needed;
20623 /* Step 1: Prologue guard. */
20625 /* Alignment code needs count to be in register. */
20626 if (CONST_INT_P (count_exp) && desired_align > align)
20628 if (INTVAL (count_exp) > desired_align
20629 && INTVAL (count_exp) > size_needed)
20632 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20633 if (align_bytes <= 0)
20636 align_bytes = desired_align - align_bytes;
20638 if (align_bytes == 0)
20639 count_exp = force_reg (counter_mode (count_exp), count_exp);
20641 gcc_assert (desired_align >= 1 && align >= 1);
20643 /* Ensure that alignment prologue won't copy past end of block. */
20644 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20646 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20647 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20648 Make sure it is power of 2. */
20649 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20653 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20655 /* If main algorithm works on QImode, no epilogue is needed.
20656 For small sizes just don't align anything. */
20657 if (size_needed == 1)
20658 desired_align = align;
20665 label = gen_label_rtx ();
20666 emit_cmp_and_jump_insns (count_exp,
20667 GEN_INT (epilogue_size_needed),
20668 LTU, 0, counter_mode (count_exp), 1, label);
20669 if (expected_size == -1 || expected_size < epilogue_size_needed)
20670 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20672 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20676 /* Emit code to decide on runtime whether library call or inline should be
20678 if (dynamic_check != -1)
20680 if (CONST_INT_P (count_exp))
20682 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20684 emit_block_move_via_libcall (dst, src, count_exp, false);
20685 count_exp = const0_rtx;
20691 rtx hot_label = gen_label_rtx ();
20692 jump_around_label = gen_label_rtx ();
20693 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20694 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20695 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20696 emit_block_move_via_libcall (dst, src, count_exp, false);
20697 emit_jump (jump_around_label);
20698 emit_label (hot_label);
20702 /* Step 2: Alignment prologue. */
20704 if (desired_align > align)
20706 if (align_bytes == 0)
20708 /* Except for the first move in epilogue, we no longer know
20709 constant offset in aliasing info. It don't seems to worth
20710 the pain to maintain it for the first move, so throw away
20712 src = change_address (src, BLKmode, srcreg);
20713 dst = change_address (dst, BLKmode, destreg);
20714 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20719 /* If we know how many bytes need to be stored before dst is
20720 sufficiently aligned, maintain aliasing info accurately. */
20721 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20722 desired_align, align_bytes);
20723 count_exp = plus_constant (count_exp, -align_bytes);
20724 count -= align_bytes;
20726 if (need_zero_guard
20727 && (count < (unsigned HOST_WIDE_INT) size_needed
20728 || (align_bytes == 0
20729 && count < ((unsigned HOST_WIDE_INT) size_needed
20730 + desired_align - align))))
20732 /* It is possible that we copied enough so the main loop will not
20734 gcc_assert (size_needed > 1);
20735 if (label == NULL_RTX)
20736 label = gen_label_rtx ();
20737 emit_cmp_and_jump_insns (count_exp,
20738 GEN_INT (size_needed),
20739 LTU, 0, counter_mode (count_exp), 1, label);
20740 if (expected_size == -1
20741 || expected_size < (desired_align - align) / 2 + size_needed)
20742 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20744 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20747 if (label && size_needed == 1)
20749 emit_label (label);
20750 LABEL_NUSES (label) = 1;
20752 epilogue_size_needed = 1;
20754 else if (label == NULL_RTX)
20755 epilogue_size_needed = size_needed;
20757 /* Step 3: Main loop. */
20763 gcc_unreachable ();
20765 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20766 count_exp, QImode, 1, expected_size);
20769 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20770 count_exp, Pmode, 1, expected_size);
20772 case unrolled_loop:
20773 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20774 registers for 4 temporaries anyway. */
20775 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20776 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20779 case rep_prefix_8_byte:
20780 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20783 case rep_prefix_4_byte:
20784 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20787 case rep_prefix_1_byte:
20788 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20792 /* Adjust properly the offset of src and dest memory for aliasing. */
20793 if (CONST_INT_P (count_exp))
20795 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20796 (count / size_needed) * size_needed);
20797 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20798 (count / size_needed) * size_needed);
20802 src = change_address (src, BLKmode, srcreg);
20803 dst = change_address (dst, BLKmode, destreg);
20806 /* Step 4: Epilogue to copy the remaining bytes. */
20810 /* When the main loop is done, COUNT_EXP might hold original count,
20811 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20812 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20813 bytes. Compensate if needed. */
20815 if (size_needed < epilogue_size_needed)
20818 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20819 GEN_INT (size_needed - 1), count_exp, 1,
20821 if (tmp != count_exp)
20822 emit_move_insn (count_exp, tmp);
20824 emit_label (label);
20825 LABEL_NUSES (label) = 1;
20828 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20829 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20830 epilogue_size_needed);
20831 if (jump_around_label)
20832 emit_label (jump_around_label);
20836 /* Helper function for memcpy. For QImode value 0xXY produce
20837 0xXYXYXYXY of wide specified by MODE. This is essentially
20838 a * 0x10101010, but we can do slightly better than
20839 synth_mult by unwinding the sequence by hand on CPUs with
20842 promote_duplicated_reg (enum machine_mode mode, rtx val)
20844 enum machine_mode valmode = GET_MODE (val);
20846 int nops = mode == DImode ? 3 : 2;
20848 gcc_assert (mode == SImode || mode == DImode);
20849 if (val == const0_rtx)
20850 return copy_to_mode_reg (mode, const0_rtx);
20851 if (CONST_INT_P (val))
20853 HOST_WIDE_INT v = INTVAL (val) & 255;
20857 if (mode == DImode)
20858 v |= (v << 16) << 16;
20859 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20862 if (valmode == VOIDmode)
20864 if (valmode != QImode)
20865 val = gen_lowpart (QImode, val);
20866 if (mode == QImode)
20868 if (!TARGET_PARTIAL_REG_STALL)
20870 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20871 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20872 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20873 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20875 rtx reg = convert_modes (mode, QImode, val, true);
20876 tmp = promote_duplicated_reg (mode, const1_rtx);
20877 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20882 rtx reg = convert_modes (mode, QImode, val, true);
20884 if (!TARGET_PARTIAL_REG_STALL)
20885 if (mode == SImode)
20886 emit_insn (gen_movsi_insv_1 (reg, reg));
20888 emit_insn (gen_movdi_insv_1 (reg, reg));
20891 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20892 NULL, 1, OPTAB_DIRECT);
20894 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20896 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20897 NULL, 1, OPTAB_DIRECT);
20898 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20899 if (mode == SImode)
20901 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20902 NULL, 1, OPTAB_DIRECT);
20903 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20908 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20909 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20910 alignment from ALIGN to DESIRED_ALIGN. */
20912 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20917 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20918 promoted_val = promote_duplicated_reg (DImode, val);
20919 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20920 promoted_val = promote_duplicated_reg (SImode, val);
20921 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20922 promoted_val = promote_duplicated_reg (HImode, val);
20924 promoted_val = val;
20926 return promoted_val;
20929 /* Expand string clear operation (bzero). Use i386 string operations when
20930 profitable. See expand_movmem comment for explanation of individual
20931 steps performed. */
20933 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20934 rtx expected_align_exp, rtx expected_size_exp)
20939 rtx jump_around_label = NULL;
20940 HOST_WIDE_INT align = 1;
20941 unsigned HOST_WIDE_INT count = 0;
20942 HOST_WIDE_INT expected_size = -1;
20943 int size_needed = 0, epilogue_size_needed;
20944 int desired_align = 0, align_bytes = 0;
20945 enum stringop_alg alg;
20946 rtx promoted_val = NULL;
20947 bool force_loopy_epilogue = false;
20949 bool need_zero_guard = false;
20951 if (CONST_INT_P (align_exp))
20952 align = INTVAL (align_exp);
20953 /* i386 can do misaligned access on reasonably increased cost. */
20954 if (CONST_INT_P (expected_align_exp)
20955 && INTVAL (expected_align_exp) > align)
20956 align = INTVAL (expected_align_exp);
20957 if (CONST_INT_P (count_exp))
20958 count = expected_size = INTVAL (count_exp);
20959 if (CONST_INT_P (expected_size_exp) && count == 0)
20960 expected_size = INTVAL (expected_size_exp);
20962 /* Make sure we don't need to care about overflow later on. */
20963 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20966 /* Step 0: Decide on preferred algorithm, desired alignment and
20967 size of chunks to be copied by main loop. */
20969 alg = decide_alg (count, expected_size, true, &dynamic_check);
20970 desired_align = decide_alignment (align, alg, expected_size);
20972 if (!TARGET_ALIGN_STRINGOPS)
20973 align = desired_align;
20975 if (alg == libcall)
20977 gcc_assert (alg != no_stringop);
20979 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20980 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20985 gcc_unreachable ();
20987 need_zero_guard = true;
20988 size_needed = GET_MODE_SIZE (Pmode);
20990 case unrolled_loop:
20991 need_zero_guard = true;
20992 size_needed = GET_MODE_SIZE (Pmode) * 4;
20994 case rep_prefix_8_byte:
20997 case rep_prefix_4_byte:
21000 case rep_prefix_1_byte:
21004 need_zero_guard = true;
21008 epilogue_size_needed = size_needed;
21010 /* Step 1: Prologue guard. */
21012 /* Alignment code needs count to be in register. */
21013 if (CONST_INT_P (count_exp) && desired_align > align)
21015 if (INTVAL (count_exp) > desired_align
21016 && INTVAL (count_exp) > size_needed)
21019 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
21020 if (align_bytes <= 0)
21023 align_bytes = desired_align - align_bytes;
21025 if (align_bytes == 0)
21027 enum machine_mode mode = SImode;
21028 if (TARGET_64BIT && (count & ~0xffffffff))
21030 count_exp = force_reg (mode, count_exp);
21033 /* Do the cheap promotion to allow better CSE across the
21034 main loop and epilogue (ie one load of the big constant in the
21035 front of all code. */
21036 if (CONST_INT_P (val_exp))
21037 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21038 desired_align, align);
21039 /* Ensure that alignment prologue won't copy past end of block. */
21040 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
21042 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
21043 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21044 Make sure it is power of 2. */
21045 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21047 /* To improve performance of small blocks, we jump around the VAL
21048 promoting mode. This mean that if the promoted VAL is not constant,
21049 we might not use it in the epilogue and have to use byte
21051 if (epilogue_size_needed > 2 && !promoted_val)
21052 force_loopy_epilogue = true;
21055 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21057 /* If main algorithm works on QImode, no epilogue is needed.
21058 For small sizes just don't align anything. */
21059 if (size_needed == 1)
21060 desired_align = align;
21067 label = gen_label_rtx ();
21068 emit_cmp_and_jump_insns (count_exp,
21069 GEN_INT (epilogue_size_needed),
21070 LTU, 0, counter_mode (count_exp), 1, label);
21071 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21072 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21074 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21077 if (dynamic_check != -1)
21079 rtx hot_label = gen_label_rtx ();
21080 jump_around_label = gen_label_rtx ();
21081 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21082 LEU, 0, counter_mode (count_exp), 1, hot_label);
21083 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21084 set_storage_via_libcall (dst, count_exp, val_exp, false);
21085 emit_jump (jump_around_label);
21086 emit_label (hot_label);
21089 /* Step 2: Alignment prologue. */
21091 /* Do the expensive promotion once we branched off the small blocks. */
21093 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21094 desired_align, align);
21095 gcc_assert (desired_align >= 1 && align >= 1);
21097 if (desired_align > align)
21099 if (align_bytes == 0)
21101 /* Except for the first move in epilogue, we no longer know
21102 constant offset in aliasing info. It don't seems to worth
21103 the pain to maintain it for the first move, so throw away
21105 dst = change_address (dst, BLKmode, destreg);
21106 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21111 /* If we know how many bytes need to be stored before dst is
21112 sufficiently aligned, maintain aliasing info accurately. */
21113 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21114 desired_align, align_bytes);
21115 count_exp = plus_constant (count_exp, -align_bytes);
21116 count -= align_bytes;
21118 if (need_zero_guard
21119 && (count < (unsigned HOST_WIDE_INT) size_needed
21120 || (align_bytes == 0
21121 && count < ((unsigned HOST_WIDE_INT) size_needed
21122 + desired_align - align))))
21124 /* It is possible that we copied enough so the main loop will not
21126 gcc_assert (size_needed > 1);
21127 if (label == NULL_RTX)
21128 label = gen_label_rtx ();
21129 emit_cmp_and_jump_insns (count_exp,
21130 GEN_INT (size_needed),
21131 LTU, 0, counter_mode (count_exp), 1, label);
21132 if (expected_size == -1
21133 || expected_size < (desired_align - align) / 2 + size_needed)
21134 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21136 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21139 if (label && size_needed == 1)
21141 emit_label (label);
21142 LABEL_NUSES (label) = 1;
21144 promoted_val = val_exp;
21145 epilogue_size_needed = 1;
21147 else if (label == NULL_RTX)
21148 epilogue_size_needed = size_needed;
21150 /* Step 3: Main loop. */
21156 gcc_unreachable ();
21158 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21159 count_exp, QImode, 1, expected_size);
21162 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21163 count_exp, Pmode, 1, expected_size);
21165 case unrolled_loop:
21166 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21167 count_exp, Pmode, 4, expected_size);
21169 case rep_prefix_8_byte:
21170 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21173 case rep_prefix_4_byte:
21174 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21177 case rep_prefix_1_byte:
21178 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21182 /* Adjust properly the offset of src and dest memory for aliasing. */
21183 if (CONST_INT_P (count_exp))
21184 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21185 (count / size_needed) * size_needed);
21187 dst = change_address (dst, BLKmode, destreg);
21189 /* Step 4: Epilogue to copy the remaining bytes. */
21193 /* When the main loop is done, COUNT_EXP might hold original count,
21194 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21195 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21196 bytes. Compensate if needed. */
21198 if (size_needed < epilogue_size_needed)
21201 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21202 GEN_INT (size_needed - 1), count_exp, 1,
21204 if (tmp != count_exp)
21205 emit_move_insn (count_exp, tmp);
21207 emit_label (label);
21208 LABEL_NUSES (label) = 1;
21211 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21213 if (force_loopy_epilogue)
21214 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21215 epilogue_size_needed);
21217 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21218 epilogue_size_needed);
21220 if (jump_around_label)
21221 emit_label (jump_around_label);
21225 /* Expand the appropriate insns for doing strlen if not just doing
21228 out = result, initialized with the start address
21229 align_rtx = alignment of the address.
21230 scratch = scratch register, initialized with the startaddress when
21231 not aligned, otherwise undefined
21233 This is just the body. It needs the initializations mentioned above and
21234 some address computing at the end. These things are done in i386.md. */
21237 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21241 rtx align_2_label = NULL_RTX;
21242 rtx align_3_label = NULL_RTX;
21243 rtx align_4_label = gen_label_rtx ();
21244 rtx end_0_label = gen_label_rtx ();
21246 rtx tmpreg = gen_reg_rtx (SImode);
21247 rtx scratch = gen_reg_rtx (SImode);
21251 if (CONST_INT_P (align_rtx))
21252 align = INTVAL (align_rtx);
21254 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21256 /* Is there a known alignment and is it less than 4? */
21259 rtx scratch1 = gen_reg_rtx (Pmode);
21260 emit_move_insn (scratch1, out);
21261 /* Is there a known alignment and is it not 2? */
21264 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21265 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21267 /* Leave just the 3 lower bits. */
21268 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21269 NULL_RTX, 0, OPTAB_WIDEN);
21271 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21272 Pmode, 1, align_4_label);
21273 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21274 Pmode, 1, align_2_label);
21275 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21276 Pmode, 1, align_3_label);
21280 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21281 check if is aligned to 4 - byte. */
21283 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21284 NULL_RTX, 0, OPTAB_WIDEN);
21286 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21287 Pmode, 1, align_4_label);
21290 mem = change_address (src, QImode, out);
21292 /* Now compare the bytes. */
21294 /* Compare the first n unaligned byte on a byte per byte basis. */
21295 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21296 QImode, 1, end_0_label);
21298 /* Increment the address. */
21299 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21301 /* Not needed with an alignment of 2 */
21304 emit_label (align_2_label);
21306 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21309 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21311 emit_label (align_3_label);
21314 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21317 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21320 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21321 align this loop. It gives only huge programs, but does not help to
21323 emit_label (align_4_label);
21325 mem = change_address (src, SImode, out);
21326 emit_move_insn (scratch, mem);
21327 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21329 /* This formula yields a nonzero result iff one of the bytes is zero.
21330 This saves three branches inside loop and many cycles. */
21332 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21333 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21334 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21335 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21336 gen_int_mode (0x80808080, SImode)));
21337 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21342 rtx reg = gen_reg_rtx (SImode);
21343 rtx reg2 = gen_reg_rtx (Pmode);
21344 emit_move_insn (reg, tmpreg);
21345 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21347 /* If zero is not in the first two bytes, move two bytes forward. */
21348 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21349 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21350 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21351 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21352 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21355 /* Emit lea manually to avoid clobbering of flags. */
21356 emit_insn (gen_rtx_SET (SImode, reg2,
21357 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21359 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21360 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21361 emit_insn (gen_rtx_SET (VOIDmode, out,
21362 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21368 rtx end_2_label = gen_label_rtx ();
21369 /* Is zero in the first two bytes? */
21371 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21372 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21373 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21374 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21375 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21377 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21378 JUMP_LABEL (tmp) = end_2_label;
21380 /* Not in the first two. Move two bytes forward. */
21381 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21382 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21384 emit_label (end_2_label);
21388 /* Avoid branch in fixing the byte. */
21389 tmpreg = gen_lowpart (QImode, tmpreg);
21390 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21391 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21392 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21393 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21395 emit_label (end_0_label);
21398 /* Expand strlen. */
21401 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21403 rtx addr, scratch1, scratch2, scratch3, scratch4;
21405 /* The generic case of strlen expander is long. Avoid it's
21406 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21408 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21409 && !TARGET_INLINE_ALL_STRINGOPS
21410 && !optimize_insn_for_size_p ()
21411 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21414 addr = force_reg (Pmode, XEXP (src, 0));
21415 scratch1 = gen_reg_rtx (Pmode);
21417 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21418 && !optimize_insn_for_size_p ())
21420 /* Well it seems that some optimizer does not combine a call like
21421 foo(strlen(bar), strlen(bar));
21422 when the move and the subtraction is done here. It does calculate
21423 the length just once when these instructions are done inside of
21424 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21425 often used and I use one fewer register for the lifetime of
21426 output_strlen_unroll() this is better. */
21428 emit_move_insn (out, addr);
21430 ix86_expand_strlensi_unroll_1 (out, src, align);
21432 /* strlensi_unroll_1 returns the address of the zero at the end of
21433 the string, like memchr(), so compute the length by subtracting
21434 the start address. */
21435 emit_insn (ix86_gen_sub3 (out, out, addr));
21441 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21442 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21445 scratch2 = gen_reg_rtx (Pmode);
21446 scratch3 = gen_reg_rtx (Pmode);
21447 scratch4 = force_reg (Pmode, constm1_rtx);
21449 emit_move_insn (scratch3, addr);
21450 eoschar = force_reg (QImode, eoschar);
21452 src = replace_equiv_address_nv (src, scratch3);
21454 /* If .md starts supporting :P, this can be done in .md. */
21455 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21456 scratch4), UNSPEC_SCAS);
21457 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21458 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21459 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21464 /* For given symbol (function) construct code to compute address of it's PLT
21465 entry in large x86-64 PIC model. */
21467 construct_plt_address (rtx symbol)
21469 rtx tmp = gen_reg_rtx (Pmode);
21470 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21472 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21473 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21475 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21476 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21481 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21483 rtx pop, int sibcall)
21485 rtx use = NULL, call;
21487 if (pop == const0_rtx)
21489 gcc_assert (!TARGET_64BIT || !pop);
21491 if (TARGET_MACHO && !TARGET_64BIT)
21494 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21495 fnaddr = machopic_indirect_call_target (fnaddr);
21500 /* Static functions and indirect calls don't need the pic register. */
21501 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21502 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21503 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21504 use_reg (&use, pic_offset_table_rtx);
21507 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21509 rtx al = gen_rtx_REG (QImode, AX_REG);
21510 emit_move_insn (al, callarg2);
21511 use_reg (&use, al);
21514 if (ix86_cmodel == CM_LARGE_PIC
21516 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21517 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21518 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21520 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21521 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21523 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21524 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21527 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21529 call = gen_rtx_SET (VOIDmode, retval, call);
21532 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21533 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21534 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21537 && ix86_cfun_abi () == MS_ABI
21538 && (!callarg2 || INTVAL (callarg2) != -2))
21540 /* We need to represent that SI and DI registers are clobbered
21542 static int clobbered_registers[] = {
21543 XMM6_REG, XMM7_REG, XMM8_REG,
21544 XMM9_REG, XMM10_REG, XMM11_REG,
21545 XMM12_REG, XMM13_REG, XMM14_REG,
21546 XMM15_REG, SI_REG, DI_REG
21549 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21550 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21551 UNSPEC_MS_TO_SYSV_CALL);
21555 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21556 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21559 (SSE_REGNO_P (clobbered_registers[i])
21561 clobbered_registers[i]));
21563 call = gen_rtx_PARALLEL (VOIDmode,
21564 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21568 /* Emit vzeroupper if needed. */
21569 if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
21572 cfun->machine->use_vzeroupper_p = 1;
21573 if (cfun->machine->callee_pass_avx256_p)
21575 if (cfun->machine->callee_return_avx256_p)
21576 avx256 = callee_return_pass_avx256;
21578 avx256 = callee_pass_avx256;
21580 else if (cfun->machine->callee_return_avx256_p)
21581 avx256 = callee_return_avx256;
21583 avx256 = call_no_avx256;
21584 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
21587 call = emit_call_insn (call);
21589 CALL_INSN_FUNCTION_USAGE (call) = use;
21594 /* Output the assembly for a call instruction. */
21597 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21599 bool direct_p = constant_call_address_operand (call_op, Pmode);
21600 bool seh_nop_p = false;
21602 gcc_assert (addr_op == 0 || addr_op == 1);
21604 if (SIBLING_CALL_P (insn))
21607 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21608 /* SEH epilogue detection requires the indirect branch case
21609 to include REX.W. */
21610 else if (TARGET_SEH)
21611 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21613 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21616 /* SEH unwinding can require an extra nop to be emitted in several
21617 circumstances. Determine if we have one of those. */
21622 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21624 /* If we get to another real insn, we don't need the nop. */
21628 /* If we get to the epilogue note, prevent a catch region from
21629 being adjacent to the standard epilogue sequence. If non-
21630 call-exceptions, we'll have done this during epilogue emission. */
21631 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21632 && !flag_non_call_exceptions
21633 && !can_throw_internal (insn))
21640 /* If we didn't find a real insn following the call, prevent the
21641 unwinder from looking into the next function. */
21649 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21651 return addr_op ? "call\t%P1" : "call\t%P0";
21656 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
21658 return addr_op ? "call\t%A1" : "call\t%A0";
21662 /* Clear stack slot assignments remembered from previous functions.
21663 This is called from INIT_EXPANDERS once before RTL is emitted for each
21666 static struct machine_function *
21667 ix86_init_machine_status (void)
21669 struct machine_function *f;
21671 f = ggc_alloc_cleared_machine_function ();
21672 f->use_fast_prologue_epilogue_nregs = -1;
21673 f->tls_descriptor_call_expanded_p = 0;
21674 f->call_abi = ix86_abi;
21679 /* Return a MEM corresponding to a stack slot with mode MODE.
21680 Allocate a new slot if necessary.
21682 The RTL for a function can have several slots available: N is
21683 which slot to use. */
21686 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21688 struct stack_local_entry *s;
21690 gcc_assert (n < MAX_386_STACK_LOCALS);
21692 /* Virtual slot is valid only before vregs are instantiated. */
21693 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21695 for (s = ix86_stack_locals; s; s = s->next)
21696 if (s->mode == mode && s->n == n)
21697 return copy_rtx (s->rtl);
21699 s = ggc_alloc_stack_local_entry ();
21702 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21704 s->next = ix86_stack_locals;
21705 ix86_stack_locals = s;
21709 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21711 static GTY(()) rtx ix86_tls_symbol;
21713 ix86_tls_get_addr (void)
21716 if (!ix86_tls_symbol)
21718 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
21719 (TARGET_ANY_GNU_TLS
21721 ? "___tls_get_addr"
21722 : "__tls_get_addr");
21725 return ix86_tls_symbol;
21728 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21730 static GTY(()) rtx ix86_tls_module_base_symbol;
21732 ix86_tls_module_base (void)
21735 if (!ix86_tls_module_base_symbol)
21737 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
21738 "_TLS_MODULE_BASE_");
21739 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
21740 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
21743 return ix86_tls_module_base_symbol;
21746 /* Calculate the length of the memory address in the instruction
21747 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21750 memory_address_length (rtx addr)
21752 struct ix86_address parts;
21753 rtx base, index, disp;
21757 if (GET_CODE (addr) == PRE_DEC
21758 || GET_CODE (addr) == POST_INC
21759 || GET_CODE (addr) == PRE_MODIFY
21760 || GET_CODE (addr) == POST_MODIFY)
21763 ok = ix86_decompose_address (addr, &parts);
21766 if (parts.base && GET_CODE (parts.base) == SUBREG)
21767 parts.base = SUBREG_REG (parts.base);
21768 if (parts.index && GET_CODE (parts.index) == SUBREG)
21769 parts.index = SUBREG_REG (parts.index);
21772 index = parts.index;
21777 - esp as the base always wants an index,
21778 - ebp as the base always wants a displacement,
21779 - r12 as the base always wants an index,
21780 - r13 as the base always wants a displacement. */
21782 /* Register Indirect. */
21783 if (base && !index && !disp)
21785 /* esp (for its index) and ebp (for its displacement) need
21786 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21789 && (addr == arg_pointer_rtx
21790 || addr == frame_pointer_rtx
21791 || REGNO (addr) == SP_REG
21792 || REGNO (addr) == BP_REG
21793 || REGNO (addr) == R12_REG
21794 || REGNO (addr) == R13_REG))
21798 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21799 is not disp32, but disp32(%rip), so for disp32
21800 SIB byte is needed, unless print_operand_address
21801 optimizes it into disp32(%rip) or (%rip) is implied
21803 else if (disp && !base && !index)
21810 if (GET_CODE (disp) == CONST)
21811 symbol = XEXP (disp, 0);
21812 if (GET_CODE (symbol) == PLUS
21813 && CONST_INT_P (XEXP (symbol, 1)))
21814 symbol = XEXP (symbol, 0);
21816 if (GET_CODE (symbol) != LABEL_REF
21817 && (GET_CODE (symbol) != SYMBOL_REF
21818 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21819 && (GET_CODE (symbol) != UNSPEC
21820 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21821 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21828 /* Find the length of the displacement constant. */
21831 if (base && satisfies_constraint_K (disp))
21836 /* ebp always wants a displacement. Similarly r13. */
21837 else if (base && REG_P (base)
21838 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21841 /* An index requires the two-byte modrm form.... */
21843 /* ...like esp (or r12), which always wants an index. */
21844 || base == arg_pointer_rtx
21845 || base == frame_pointer_rtx
21846 || (base && REG_P (base)
21847 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21864 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21865 is set, expect that insn have 8bit immediate alternative. */
21867 ix86_attr_length_immediate_default (rtx insn, int shortform)
21871 extract_insn_cached (insn);
21872 for (i = recog_data.n_operands - 1; i >= 0; --i)
21873 if (CONSTANT_P (recog_data.operand[i]))
21875 enum attr_mode mode = get_attr_mode (insn);
21878 if (shortform && CONST_INT_P (recog_data.operand[i]))
21880 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21887 ival = trunc_int_for_mode (ival, HImode);
21890 ival = trunc_int_for_mode (ival, SImode);
21895 if (IN_RANGE (ival, -128, 127))
21912 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21917 fatal_insn ("unknown insn mode", insn);
21922 /* Compute default value for "length_address" attribute. */
21924 ix86_attr_length_address_default (rtx insn)
21928 if (get_attr_type (insn) == TYPE_LEA)
21930 rtx set = PATTERN (insn), addr;
21932 if (GET_CODE (set) == PARALLEL)
21933 set = XVECEXP (set, 0, 0);
21935 gcc_assert (GET_CODE (set) == SET);
21937 addr = SET_SRC (set);
21938 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21940 if (GET_CODE (addr) == ZERO_EXTEND)
21941 addr = XEXP (addr, 0);
21942 if (GET_CODE (addr) == SUBREG)
21943 addr = SUBREG_REG (addr);
21946 return memory_address_length (addr);
21949 extract_insn_cached (insn);
21950 for (i = recog_data.n_operands - 1; i >= 0; --i)
21951 if (MEM_P (recog_data.operand[i]))
21953 constrain_operands_cached (reload_completed);
21954 if (which_alternative != -1)
21956 const char *constraints = recog_data.constraints[i];
21957 int alt = which_alternative;
21959 while (*constraints == '=' || *constraints == '+')
21962 while (*constraints++ != ',')
21964 /* Skip ignored operands. */
21965 if (*constraints == 'X')
21968 return memory_address_length (XEXP (recog_data.operand[i], 0));
21973 /* Compute default value for "length_vex" attribute. It includes
21974 2 or 3 byte VEX prefix and 1 opcode byte. */
21977 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21982 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21983 byte VEX prefix. */
21984 if (!has_0f_opcode || has_vex_w)
21987 /* We can always use 2 byte VEX prefix in 32bit. */
21991 extract_insn_cached (insn);
21993 for (i = recog_data.n_operands - 1; i >= 0; --i)
21994 if (REG_P (recog_data.operand[i]))
21996 /* REX.W bit uses 3 byte VEX prefix. */
21997 if (GET_MODE (recog_data.operand[i]) == DImode
21998 && GENERAL_REG_P (recog_data.operand[i]))
22003 /* REX.X or REX.B bits use 3 byte VEX prefix. */
22004 if (MEM_P (recog_data.operand[i])
22005 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
22012 /* Return the maximum number of instructions a cpu can issue. */
22015 ix86_issue_rate (void)
22019 case PROCESSOR_PENTIUM:
22020 case PROCESSOR_ATOM:
22024 case PROCESSOR_PENTIUMPRO:
22025 case PROCESSOR_PENTIUM4:
22026 case PROCESSOR_ATHLON:
22028 case PROCESSOR_AMDFAM10:
22029 case PROCESSOR_NOCONA:
22030 case PROCESSOR_GENERIC32:
22031 case PROCESSOR_GENERIC64:
22032 case PROCESSOR_BDVER1:
22035 case PROCESSOR_CORE2:
22043 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22044 by DEP_INSN and nothing set by DEP_INSN. */
22047 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22051 /* Simplify the test for uninteresting insns. */
22052 if (insn_type != TYPE_SETCC
22053 && insn_type != TYPE_ICMOV
22054 && insn_type != TYPE_FCMOV
22055 && insn_type != TYPE_IBR)
22058 if ((set = single_set (dep_insn)) != 0)
22060 set = SET_DEST (set);
22063 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22064 && XVECLEN (PATTERN (dep_insn), 0) == 2
22065 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22066 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22068 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22069 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22074 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22077 /* This test is true if the dependent insn reads the flags but
22078 not any other potentially set register. */
22079 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22082 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22088 /* Return true iff USE_INSN has a memory address with operands set by
22092 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22095 extract_insn_cached (use_insn);
22096 for (i = recog_data.n_operands - 1; i >= 0; --i)
22097 if (MEM_P (recog_data.operand[i]))
22099 rtx addr = XEXP (recog_data.operand[i], 0);
22100 return modified_in_p (addr, set_insn) != 0;
22106 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22108 enum attr_type insn_type, dep_insn_type;
22109 enum attr_memory memory;
22111 int dep_insn_code_number;
22113 /* Anti and output dependencies have zero cost on all CPUs. */
22114 if (REG_NOTE_KIND (link) != 0)
22117 dep_insn_code_number = recog_memoized (dep_insn);
22119 /* If we can't recognize the insns, we can't really do anything. */
22120 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22123 insn_type = get_attr_type (insn);
22124 dep_insn_type = get_attr_type (dep_insn);
22128 case PROCESSOR_PENTIUM:
22129 /* Address Generation Interlock adds a cycle of latency. */
22130 if (insn_type == TYPE_LEA)
22132 rtx addr = PATTERN (insn);
22134 if (GET_CODE (addr) == PARALLEL)
22135 addr = XVECEXP (addr, 0, 0);
22137 gcc_assert (GET_CODE (addr) == SET);
22139 addr = SET_SRC (addr);
22140 if (modified_in_p (addr, dep_insn))
22143 else if (ix86_agi_dependent (dep_insn, insn))
22146 /* ??? Compares pair with jump/setcc. */
22147 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22150 /* Floating point stores require value to be ready one cycle earlier. */
22151 if (insn_type == TYPE_FMOV
22152 && get_attr_memory (insn) == MEMORY_STORE
22153 && !ix86_agi_dependent (dep_insn, insn))
22157 case PROCESSOR_PENTIUMPRO:
22158 memory = get_attr_memory (insn);
22160 /* INT->FP conversion is expensive. */
22161 if (get_attr_fp_int_src (dep_insn))
22164 /* There is one cycle extra latency between an FP op and a store. */
22165 if (insn_type == TYPE_FMOV
22166 && (set = single_set (dep_insn)) != NULL_RTX
22167 && (set2 = single_set (insn)) != NULL_RTX
22168 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22169 && MEM_P (SET_DEST (set2)))
22172 /* Show ability of reorder buffer to hide latency of load by executing
22173 in parallel with previous instruction in case
22174 previous instruction is not needed to compute the address. */
22175 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22176 && !ix86_agi_dependent (dep_insn, insn))
22178 /* Claim moves to take one cycle, as core can issue one load
22179 at time and the next load can start cycle later. */
22180 if (dep_insn_type == TYPE_IMOV
22181 || dep_insn_type == TYPE_FMOV)
22189 memory = get_attr_memory (insn);
22191 /* The esp dependency is resolved before the instruction is really
22193 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22194 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22197 /* INT->FP conversion is expensive. */
22198 if (get_attr_fp_int_src (dep_insn))
22201 /* Show ability of reorder buffer to hide latency of load by executing
22202 in parallel with previous instruction in case
22203 previous instruction is not needed to compute the address. */
22204 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22205 && !ix86_agi_dependent (dep_insn, insn))
22207 /* Claim moves to take one cycle, as core can issue one load
22208 at time and the next load can start cycle later. */
22209 if (dep_insn_type == TYPE_IMOV
22210 || dep_insn_type == TYPE_FMOV)
22219 case PROCESSOR_ATHLON:
22221 case PROCESSOR_AMDFAM10:
22222 case PROCESSOR_BDVER1:
22223 case PROCESSOR_ATOM:
22224 case PROCESSOR_GENERIC32:
22225 case PROCESSOR_GENERIC64:
22226 memory = get_attr_memory (insn);
22228 /* Show ability of reorder buffer to hide latency of load by executing
22229 in parallel with previous instruction in case
22230 previous instruction is not needed to compute the address. */
22231 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22232 && !ix86_agi_dependent (dep_insn, insn))
22234 enum attr_unit unit = get_attr_unit (insn);
22237 /* Because of the difference between the length of integer and
22238 floating unit pipeline preparation stages, the memory operands
22239 for floating point are cheaper.
22241 ??? For Athlon it the difference is most probably 2. */
22242 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22245 loadcost = TARGET_ATHLON ? 2 : 0;
22247 if (cost >= loadcost)
22260 /* How many alternative schedules to try. This should be as wide as the
22261 scheduling freedom in the DFA, but no wider. Making this value too
22262 large results extra work for the scheduler. */
22265 ia32_multipass_dfa_lookahead (void)
22269 case PROCESSOR_PENTIUM:
22272 case PROCESSOR_PENTIUMPRO:
22276 case PROCESSOR_CORE2:
22277 case PROCESSOR_COREI7_32:
22278 case PROCESSOR_COREI7_64:
22279 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22280 as many instructions can be executed on a cycle, i.e.,
22281 issue_rate. I wonder why tuning for many CPUs does not do this. */
22282 return ix86_issue_rate ();
22291 /* Model decoder of Core 2/i7.
22292 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22293 track the instruction fetch block boundaries and make sure that long
22294 (9+ bytes) instructions are assigned to D0. */
22296 /* Maximum length of an insn that can be handled by
22297 a secondary decoder unit. '8' for Core 2/i7. */
22298 static int core2i7_secondary_decoder_max_insn_size;
22300 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22301 '16' for Core 2/i7. */
22302 static int core2i7_ifetch_block_size;
22304 /* Maximum number of instructions decoder can handle per cycle.
22305 '6' for Core 2/i7. */
22306 static int core2i7_ifetch_block_max_insns;
22308 typedef struct ix86_first_cycle_multipass_data_ *
22309 ix86_first_cycle_multipass_data_t;
22310 typedef const struct ix86_first_cycle_multipass_data_ *
22311 const_ix86_first_cycle_multipass_data_t;
22313 /* A variable to store target state across calls to max_issue within
22315 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22316 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22318 /* Initialize DATA. */
22320 core2i7_first_cycle_multipass_init (void *_data)
22322 ix86_first_cycle_multipass_data_t data
22323 = (ix86_first_cycle_multipass_data_t) _data;
22325 data->ifetch_block_len = 0;
22326 data->ifetch_block_n_insns = 0;
22327 data->ready_try_change = NULL;
22328 data->ready_try_change_size = 0;
22331 /* Advancing the cycle; reset ifetch block counts. */
22333 core2i7_dfa_post_advance_cycle (void)
22335 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22337 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22339 data->ifetch_block_len = 0;
22340 data->ifetch_block_n_insns = 0;
22343 static int min_insn_size (rtx);
22345 /* Filter out insns from ready_try that the core will not be able to issue
22346 on current cycle due to decoder. */
22348 core2i7_first_cycle_multipass_filter_ready_try
22349 (const_ix86_first_cycle_multipass_data_t data,
22350 char *ready_try, int n_ready, bool first_cycle_insn_p)
22357 if (ready_try[n_ready])
22360 insn = get_ready_element (n_ready);
22361 insn_size = min_insn_size (insn);
22363 if (/* If this is a too long an insn for a secondary decoder ... */
22364 (!first_cycle_insn_p
22365 && insn_size > core2i7_secondary_decoder_max_insn_size)
22366 /* ... or it would not fit into the ifetch block ... */
22367 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22368 /* ... or the decoder is full already ... */
22369 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22370 /* ... mask the insn out. */
22372 ready_try[n_ready] = 1;
22374 if (data->ready_try_change)
22375 SET_BIT (data->ready_try_change, n_ready);
22380 /* Prepare for a new round of multipass lookahead scheduling. */
22382 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22383 bool first_cycle_insn_p)
22385 ix86_first_cycle_multipass_data_t data
22386 = (ix86_first_cycle_multipass_data_t) _data;
22387 const_ix86_first_cycle_multipass_data_t prev_data
22388 = ix86_first_cycle_multipass_data;
22390 /* Restore the state from the end of the previous round. */
22391 data->ifetch_block_len = prev_data->ifetch_block_len;
22392 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22394 /* Filter instructions that cannot be issued on current cycle due to
22395 decoder restrictions. */
22396 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22397 first_cycle_insn_p);
22400 /* INSN is being issued in current solution. Account for its impact on
22401 the decoder model. */
22403 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22404 rtx insn, const void *_prev_data)
22406 ix86_first_cycle_multipass_data_t data
22407 = (ix86_first_cycle_multipass_data_t) _data;
22408 const_ix86_first_cycle_multipass_data_t prev_data
22409 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22411 int insn_size = min_insn_size (insn);
22413 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22414 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22415 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22416 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22418 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22419 if (!data->ready_try_change)
22421 data->ready_try_change = sbitmap_alloc (n_ready);
22422 data->ready_try_change_size = n_ready;
22424 else if (data->ready_try_change_size < n_ready)
22426 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22428 data->ready_try_change_size = n_ready;
22430 sbitmap_zero (data->ready_try_change);
22432 /* Filter out insns from ready_try that the core will not be able to issue
22433 on current cycle due to decoder. */
22434 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22438 /* Revert the effect on ready_try. */
22440 core2i7_first_cycle_multipass_backtrack (const void *_data,
22442 int n_ready ATTRIBUTE_UNUSED)
22444 const_ix86_first_cycle_multipass_data_t data
22445 = (const_ix86_first_cycle_multipass_data_t) _data;
22446 unsigned int i = 0;
22447 sbitmap_iterator sbi;
22449 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22450 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22456 /* Save the result of multipass lookahead scheduling for the next round. */
22458 core2i7_first_cycle_multipass_end (const void *_data)
22460 const_ix86_first_cycle_multipass_data_t data
22461 = (const_ix86_first_cycle_multipass_data_t) _data;
22462 ix86_first_cycle_multipass_data_t next_data
22463 = ix86_first_cycle_multipass_data;
22467 next_data->ifetch_block_len = data->ifetch_block_len;
22468 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22472 /* Deallocate target data. */
22474 core2i7_first_cycle_multipass_fini (void *_data)
22476 ix86_first_cycle_multipass_data_t data
22477 = (ix86_first_cycle_multipass_data_t) _data;
22479 if (data->ready_try_change)
22481 sbitmap_free (data->ready_try_change);
22482 data->ready_try_change = NULL;
22483 data->ready_try_change_size = 0;
22487 /* Prepare for scheduling pass. */
22489 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22490 int verbose ATTRIBUTE_UNUSED,
22491 int max_uid ATTRIBUTE_UNUSED)
22493 /* Install scheduling hooks for current CPU. Some of these hooks are used
22494 in time-critical parts of the scheduler, so we only set them up when
22495 they are actually used. */
22498 case PROCESSOR_CORE2:
22499 case PROCESSOR_COREI7_32:
22500 case PROCESSOR_COREI7_64:
22501 targetm.sched.dfa_post_advance_cycle
22502 = core2i7_dfa_post_advance_cycle;
22503 targetm.sched.first_cycle_multipass_init
22504 = core2i7_first_cycle_multipass_init;
22505 targetm.sched.first_cycle_multipass_begin
22506 = core2i7_first_cycle_multipass_begin;
22507 targetm.sched.first_cycle_multipass_issue
22508 = core2i7_first_cycle_multipass_issue;
22509 targetm.sched.first_cycle_multipass_backtrack
22510 = core2i7_first_cycle_multipass_backtrack;
22511 targetm.sched.first_cycle_multipass_end
22512 = core2i7_first_cycle_multipass_end;
22513 targetm.sched.first_cycle_multipass_fini
22514 = core2i7_first_cycle_multipass_fini;
22516 /* Set decoder parameters. */
22517 core2i7_secondary_decoder_max_insn_size = 8;
22518 core2i7_ifetch_block_size = 16;
22519 core2i7_ifetch_block_max_insns = 6;
22523 targetm.sched.dfa_post_advance_cycle = NULL;
22524 targetm.sched.first_cycle_multipass_init = NULL;
22525 targetm.sched.first_cycle_multipass_begin = NULL;
22526 targetm.sched.first_cycle_multipass_issue = NULL;
22527 targetm.sched.first_cycle_multipass_backtrack = NULL;
22528 targetm.sched.first_cycle_multipass_end = NULL;
22529 targetm.sched.first_cycle_multipass_fini = NULL;
22535 /* Compute the alignment given to a constant that is being placed in memory.
22536 EXP is the constant and ALIGN is the alignment that the object would
22538 The value of this function is used instead of that alignment to align
22542 ix86_constant_alignment (tree exp, int align)
22544 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22545 || TREE_CODE (exp) == INTEGER_CST)
22547 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22549 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22552 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22553 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22554 return BITS_PER_WORD;
22559 /* Compute the alignment for a static variable.
22560 TYPE is the data type, and ALIGN is the alignment that
22561 the object would ordinarily have. The value of this function is used
22562 instead of that alignment to align the object. */
22565 ix86_data_alignment (tree type, int align)
22567 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22569 if (AGGREGATE_TYPE_P (type)
22570 && TYPE_SIZE (type)
22571 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22572 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22573 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22574 && align < max_align)
22577 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22578 to 16byte boundary. */
22581 if (AGGREGATE_TYPE_P (type)
22582 && TYPE_SIZE (type)
22583 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22584 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22585 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22589 if (TREE_CODE (type) == ARRAY_TYPE)
22591 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22593 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22596 else if (TREE_CODE (type) == COMPLEX_TYPE)
22599 if (TYPE_MODE (type) == DCmode && align < 64)
22601 if ((TYPE_MODE (type) == XCmode
22602 || TYPE_MODE (type) == TCmode) && align < 128)
22605 else if ((TREE_CODE (type) == RECORD_TYPE
22606 || TREE_CODE (type) == UNION_TYPE
22607 || TREE_CODE (type) == QUAL_UNION_TYPE)
22608 && TYPE_FIELDS (type))
22610 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22612 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22615 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22616 || TREE_CODE (type) == INTEGER_TYPE)
22618 if (TYPE_MODE (type) == DFmode && align < 64)
22620 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22627 /* Compute the alignment for a local variable or a stack slot. EXP is
22628 the data type or decl itself, MODE is the widest mode available and
22629 ALIGN is the alignment that the object would ordinarily have. The
22630 value of this macro is used instead of that alignment to align the
22634 ix86_local_alignment (tree exp, enum machine_mode mode,
22635 unsigned int align)
22639 if (exp && DECL_P (exp))
22641 type = TREE_TYPE (exp);
22650 if (use_avx256_p (mode, type))
22651 cfun->machine->use_avx256_p = true;
22653 /* Don't do dynamic stack realignment for long long objects with
22654 -mpreferred-stack-boundary=2. */
22657 && ix86_preferred_stack_boundary < 64
22658 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22659 && (!type || !TYPE_USER_ALIGN (type))
22660 && (!decl || !DECL_USER_ALIGN (decl)))
22663 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22664 register in MODE. We will return the largest alignment of XF
22668 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22669 align = GET_MODE_ALIGNMENT (DFmode);
22673 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22674 to 16byte boundary. Exact wording is:
22676 An array uses the same alignment as its elements, except that a local or
22677 global array variable of length at least 16 bytes or
22678 a C99 variable-length array variable always has alignment of at least 16 bytes.
22680 This was added to allow use of aligned SSE instructions at arrays. This
22681 rule is meant for static storage (where compiler can not do the analysis
22682 by itself). We follow it for automatic variables only when convenient.
22683 We fully control everything in the function compiled and functions from
22684 other unit can not rely on the alignment.
22686 Exclude va_list type. It is the common case of local array where
22687 we can not benefit from the alignment. */
22688 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22691 if (AGGREGATE_TYPE_P (type)
22692 && (TYPE_MAIN_VARIANT (type)
22693 != TYPE_MAIN_VARIANT (va_list_type_node))
22694 && TYPE_SIZE (type)
22695 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22696 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22697 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22700 if (TREE_CODE (type) == ARRAY_TYPE)
22702 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22704 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22707 else if (TREE_CODE (type) == COMPLEX_TYPE)
22709 if (TYPE_MODE (type) == DCmode && align < 64)
22711 if ((TYPE_MODE (type) == XCmode
22712 || TYPE_MODE (type) == TCmode) && align < 128)
22715 else if ((TREE_CODE (type) == RECORD_TYPE
22716 || TREE_CODE (type) == UNION_TYPE
22717 || TREE_CODE (type) == QUAL_UNION_TYPE)
22718 && TYPE_FIELDS (type))
22720 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22722 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22725 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22726 || TREE_CODE (type) == INTEGER_TYPE)
22729 if (TYPE_MODE (type) == DFmode && align < 64)
22731 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22737 /* Compute the minimum required alignment for dynamic stack realignment
22738 purposes for a local variable, parameter or a stack slot. EXP is
22739 the data type or decl itself, MODE is its mode and ALIGN is the
22740 alignment that the object would ordinarily have. */
22743 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22744 unsigned int align)
22748 if (exp && DECL_P (exp))
22750 type = TREE_TYPE (exp);
22759 if (use_avx256_p (mode, type))
22760 cfun->machine->use_avx256_p = true;
22762 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22765 /* Don't do dynamic stack realignment for long long objects with
22766 -mpreferred-stack-boundary=2. */
22767 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22768 && (!type || !TYPE_USER_ALIGN (type))
22769 && (!decl || !DECL_USER_ALIGN (decl)))
22775 /* Find a location for the static chain incoming to a nested function.
22776 This is a register, unless all free registers are used by arguments. */
22779 ix86_static_chain (const_tree fndecl, bool incoming_p)
22783 if (!DECL_STATIC_CHAIN (fndecl))
22788 /* We always use R10 in 64-bit mode. */
22794 /* By default in 32-bit mode we use ECX to pass the static chain. */
22797 fntype = TREE_TYPE (fndecl);
22798 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
22800 /* Fastcall functions use ecx/edx for arguments, which leaves
22801 us with EAX for the static chain. */
22804 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
22806 /* Thiscall functions use ecx for arguments, which leaves
22807 us with EAX for the static chain. */
22810 else if (ix86_function_regparm (fntype, fndecl) == 3)
22812 /* For regparm 3, we have no free call-clobbered registers in
22813 which to store the static chain. In order to implement this,
22814 we have the trampoline push the static chain to the stack.
22815 However, we can't push a value below the return address when
22816 we call the nested function directly, so we have to use an
22817 alternate entry point. For this we use ESI, and have the
22818 alternate entry point push ESI, so that things appear the
22819 same once we're executing the nested function. */
22822 if (fndecl == current_function_decl)
22823 ix86_static_chain_on_stack = true;
22824 return gen_frame_mem (SImode,
22825 plus_constant (arg_pointer_rtx, -8));
22831 return gen_rtx_REG (Pmode, regno);
22834 /* Emit RTL insns to initialize the variable parts of a trampoline.
22835 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22836 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22837 to be passed to the target function. */
22840 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22844 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22851 /* Depending on the static chain location, either load a register
22852 with a constant, or push the constant to the stack. All of the
22853 instructions are the same size. */
22854 chain = ix86_static_chain (fndecl, true);
22857 if (REGNO (chain) == CX_REG)
22859 else if (REGNO (chain) == AX_REG)
22862 gcc_unreachable ();
22867 mem = adjust_address (m_tramp, QImode, 0);
22868 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22870 mem = adjust_address (m_tramp, SImode, 1);
22871 emit_move_insn (mem, chain_value);
22873 /* Compute offset from the end of the jmp to the target function.
22874 In the case in which the trampoline stores the static chain on
22875 the stack, we need to skip the first insn which pushes the
22876 (call-saved) register static chain; this push is 1 byte. */
22877 disp = expand_binop (SImode, sub_optab, fnaddr,
22878 plus_constant (XEXP (m_tramp, 0),
22879 MEM_P (chain) ? 9 : 10),
22880 NULL_RTX, 1, OPTAB_DIRECT);
22882 mem = adjust_address (m_tramp, QImode, 5);
22883 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22885 mem = adjust_address (m_tramp, SImode, 6);
22886 emit_move_insn (mem, disp);
22892 /* Load the function address to r11. Try to load address using
22893 the shorter movl instead of movabs. We may want to support
22894 movq for kernel mode, but kernel does not use trampolines at
22896 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22898 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22900 mem = adjust_address (m_tramp, HImode, offset);
22901 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22903 mem = adjust_address (m_tramp, SImode, offset + 2);
22904 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22909 mem = adjust_address (m_tramp, HImode, offset);
22910 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22912 mem = adjust_address (m_tramp, DImode, offset + 2);
22913 emit_move_insn (mem, fnaddr);
22917 /* Load static chain using movabs to r10. */
22918 mem = adjust_address (m_tramp, HImode, offset);
22919 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
22921 mem = adjust_address (m_tramp, DImode, offset + 2);
22922 emit_move_insn (mem, chain_value);
22925 /* Jump to r11; the last (unused) byte is a nop, only there to
22926 pad the write out to a single 32-bit store. */
22927 mem = adjust_address (m_tramp, SImode, offset);
22928 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22931 gcc_assert (offset <= TRAMPOLINE_SIZE);
22934 #ifdef ENABLE_EXECUTE_STACK
22935 #ifdef CHECK_EXECUTE_STACK_ENABLED
22936 if (CHECK_EXECUTE_STACK_ENABLED)
22938 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22939 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22943 /* The following file contains several enumerations and data structures
22944 built from the definitions in i386-builtin-types.def. */
22946 #include "i386-builtin-types.inc"
22948 /* Table for the ix86 builtin non-function types. */
22949 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22951 /* Retrieve an element from the above table, building some of
22952 the types lazily. */
22955 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22957 unsigned int index;
22960 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22962 type = ix86_builtin_type_tab[(int) tcode];
22966 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22967 if (tcode <= IX86_BT_LAST_VECT)
22969 enum machine_mode mode;
22971 index = tcode - IX86_BT_LAST_PRIM - 1;
22972 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22973 mode = ix86_builtin_type_vect_mode[index];
22975 type = build_vector_type_for_mode (itype, mode);
22981 index = tcode - IX86_BT_LAST_VECT - 1;
22982 if (tcode <= IX86_BT_LAST_PTR)
22983 quals = TYPE_UNQUALIFIED;
22985 quals = TYPE_QUAL_CONST;
22987 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22988 if (quals != TYPE_UNQUALIFIED)
22989 itype = build_qualified_type (itype, quals);
22991 type = build_pointer_type (itype);
22994 ix86_builtin_type_tab[(int) tcode] = type;
22998 /* Table for the ix86 builtin function types. */
22999 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
23001 /* Retrieve an element from the above table, building some of
23002 the types lazily. */
23005 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
23009 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
23011 type = ix86_builtin_func_type_tab[(int) tcode];
23015 if (tcode <= IX86_BT_LAST_FUNC)
23017 unsigned start = ix86_builtin_func_start[(int) tcode];
23018 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
23019 tree rtype, atype, args = void_list_node;
23022 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
23023 for (i = after - 1; i > start; --i)
23025 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
23026 args = tree_cons (NULL, atype, args);
23029 type = build_function_type (rtype, args);
23033 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23034 enum ix86_builtin_func_type icode;
23036 icode = ix86_builtin_func_alias_base[index];
23037 type = ix86_get_builtin_func_type (icode);
23040 ix86_builtin_func_type_tab[(int) tcode] = type;
23045 /* Codes for all the SSE/MMX builtins. */
23048 IX86_BUILTIN_ADDPS,
23049 IX86_BUILTIN_ADDSS,
23050 IX86_BUILTIN_DIVPS,
23051 IX86_BUILTIN_DIVSS,
23052 IX86_BUILTIN_MULPS,
23053 IX86_BUILTIN_MULSS,
23054 IX86_BUILTIN_SUBPS,
23055 IX86_BUILTIN_SUBSS,
23057 IX86_BUILTIN_CMPEQPS,
23058 IX86_BUILTIN_CMPLTPS,
23059 IX86_BUILTIN_CMPLEPS,
23060 IX86_BUILTIN_CMPGTPS,
23061 IX86_BUILTIN_CMPGEPS,
23062 IX86_BUILTIN_CMPNEQPS,
23063 IX86_BUILTIN_CMPNLTPS,
23064 IX86_BUILTIN_CMPNLEPS,
23065 IX86_BUILTIN_CMPNGTPS,
23066 IX86_BUILTIN_CMPNGEPS,
23067 IX86_BUILTIN_CMPORDPS,
23068 IX86_BUILTIN_CMPUNORDPS,
23069 IX86_BUILTIN_CMPEQSS,
23070 IX86_BUILTIN_CMPLTSS,
23071 IX86_BUILTIN_CMPLESS,
23072 IX86_BUILTIN_CMPNEQSS,
23073 IX86_BUILTIN_CMPNLTSS,
23074 IX86_BUILTIN_CMPNLESS,
23075 IX86_BUILTIN_CMPNGTSS,
23076 IX86_BUILTIN_CMPNGESS,
23077 IX86_BUILTIN_CMPORDSS,
23078 IX86_BUILTIN_CMPUNORDSS,
23080 IX86_BUILTIN_COMIEQSS,
23081 IX86_BUILTIN_COMILTSS,
23082 IX86_BUILTIN_COMILESS,
23083 IX86_BUILTIN_COMIGTSS,
23084 IX86_BUILTIN_COMIGESS,
23085 IX86_BUILTIN_COMINEQSS,
23086 IX86_BUILTIN_UCOMIEQSS,
23087 IX86_BUILTIN_UCOMILTSS,
23088 IX86_BUILTIN_UCOMILESS,
23089 IX86_BUILTIN_UCOMIGTSS,
23090 IX86_BUILTIN_UCOMIGESS,
23091 IX86_BUILTIN_UCOMINEQSS,
23093 IX86_BUILTIN_CVTPI2PS,
23094 IX86_BUILTIN_CVTPS2PI,
23095 IX86_BUILTIN_CVTSI2SS,
23096 IX86_BUILTIN_CVTSI642SS,
23097 IX86_BUILTIN_CVTSS2SI,
23098 IX86_BUILTIN_CVTSS2SI64,
23099 IX86_BUILTIN_CVTTPS2PI,
23100 IX86_BUILTIN_CVTTSS2SI,
23101 IX86_BUILTIN_CVTTSS2SI64,
23103 IX86_BUILTIN_MAXPS,
23104 IX86_BUILTIN_MAXSS,
23105 IX86_BUILTIN_MINPS,
23106 IX86_BUILTIN_MINSS,
23108 IX86_BUILTIN_LOADUPS,
23109 IX86_BUILTIN_STOREUPS,
23110 IX86_BUILTIN_MOVSS,
23112 IX86_BUILTIN_MOVHLPS,
23113 IX86_BUILTIN_MOVLHPS,
23114 IX86_BUILTIN_LOADHPS,
23115 IX86_BUILTIN_LOADLPS,
23116 IX86_BUILTIN_STOREHPS,
23117 IX86_BUILTIN_STORELPS,
23119 IX86_BUILTIN_MASKMOVQ,
23120 IX86_BUILTIN_MOVMSKPS,
23121 IX86_BUILTIN_PMOVMSKB,
23123 IX86_BUILTIN_MOVNTPS,
23124 IX86_BUILTIN_MOVNTQ,
23126 IX86_BUILTIN_LOADDQU,
23127 IX86_BUILTIN_STOREDQU,
23129 IX86_BUILTIN_PACKSSWB,
23130 IX86_BUILTIN_PACKSSDW,
23131 IX86_BUILTIN_PACKUSWB,
23133 IX86_BUILTIN_PADDB,
23134 IX86_BUILTIN_PADDW,
23135 IX86_BUILTIN_PADDD,
23136 IX86_BUILTIN_PADDQ,
23137 IX86_BUILTIN_PADDSB,
23138 IX86_BUILTIN_PADDSW,
23139 IX86_BUILTIN_PADDUSB,
23140 IX86_BUILTIN_PADDUSW,
23141 IX86_BUILTIN_PSUBB,
23142 IX86_BUILTIN_PSUBW,
23143 IX86_BUILTIN_PSUBD,
23144 IX86_BUILTIN_PSUBQ,
23145 IX86_BUILTIN_PSUBSB,
23146 IX86_BUILTIN_PSUBSW,
23147 IX86_BUILTIN_PSUBUSB,
23148 IX86_BUILTIN_PSUBUSW,
23151 IX86_BUILTIN_PANDN,
23155 IX86_BUILTIN_PAVGB,
23156 IX86_BUILTIN_PAVGW,
23158 IX86_BUILTIN_PCMPEQB,
23159 IX86_BUILTIN_PCMPEQW,
23160 IX86_BUILTIN_PCMPEQD,
23161 IX86_BUILTIN_PCMPGTB,
23162 IX86_BUILTIN_PCMPGTW,
23163 IX86_BUILTIN_PCMPGTD,
23165 IX86_BUILTIN_PMADDWD,
23167 IX86_BUILTIN_PMAXSW,
23168 IX86_BUILTIN_PMAXUB,
23169 IX86_BUILTIN_PMINSW,
23170 IX86_BUILTIN_PMINUB,
23172 IX86_BUILTIN_PMULHUW,
23173 IX86_BUILTIN_PMULHW,
23174 IX86_BUILTIN_PMULLW,
23176 IX86_BUILTIN_PSADBW,
23177 IX86_BUILTIN_PSHUFW,
23179 IX86_BUILTIN_PSLLW,
23180 IX86_BUILTIN_PSLLD,
23181 IX86_BUILTIN_PSLLQ,
23182 IX86_BUILTIN_PSRAW,
23183 IX86_BUILTIN_PSRAD,
23184 IX86_BUILTIN_PSRLW,
23185 IX86_BUILTIN_PSRLD,
23186 IX86_BUILTIN_PSRLQ,
23187 IX86_BUILTIN_PSLLWI,
23188 IX86_BUILTIN_PSLLDI,
23189 IX86_BUILTIN_PSLLQI,
23190 IX86_BUILTIN_PSRAWI,
23191 IX86_BUILTIN_PSRADI,
23192 IX86_BUILTIN_PSRLWI,
23193 IX86_BUILTIN_PSRLDI,
23194 IX86_BUILTIN_PSRLQI,
23196 IX86_BUILTIN_PUNPCKHBW,
23197 IX86_BUILTIN_PUNPCKHWD,
23198 IX86_BUILTIN_PUNPCKHDQ,
23199 IX86_BUILTIN_PUNPCKLBW,
23200 IX86_BUILTIN_PUNPCKLWD,
23201 IX86_BUILTIN_PUNPCKLDQ,
23203 IX86_BUILTIN_SHUFPS,
23205 IX86_BUILTIN_RCPPS,
23206 IX86_BUILTIN_RCPSS,
23207 IX86_BUILTIN_RSQRTPS,
23208 IX86_BUILTIN_RSQRTPS_NR,
23209 IX86_BUILTIN_RSQRTSS,
23210 IX86_BUILTIN_RSQRTF,
23211 IX86_BUILTIN_SQRTPS,
23212 IX86_BUILTIN_SQRTPS_NR,
23213 IX86_BUILTIN_SQRTSS,
23215 IX86_BUILTIN_UNPCKHPS,
23216 IX86_BUILTIN_UNPCKLPS,
23218 IX86_BUILTIN_ANDPS,
23219 IX86_BUILTIN_ANDNPS,
23221 IX86_BUILTIN_XORPS,
23224 IX86_BUILTIN_LDMXCSR,
23225 IX86_BUILTIN_STMXCSR,
23226 IX86_BUILTIN_SFENCE,
23228 /* 3DNow! Original */
23229 IX86_BUILTIN_FEMMS,
23230 IX86_BUILTIN_PAVGUSB,
23231 IX86_BUILTIN_PF2ID,
23232 IX86_BUILTIN_PFACC,
23233 IX86_BUILTIN_PFADD,
23234 IX86_BUILTIN_PFCMPEQ,
23235 IX86_BUILTIN_PFCMPGE,
23236 IX86_BUILTIN_PFCMPGT,
23237 IX86_BUILTIN_PFMAX,
23238 IX86_BUILTIN_PFMIN,
23239 IX86_BUILTIN_PFMUL,
23240 IX86_BUILTIN_PFRCP,
23241 IX86_BUILTIN_PFRCPIT1,
23242 IX86_BUILTIN_PFRCPIT2,
23243 IX86_BUILTIN_PFRSQIT1,
23244 IX86_BUILTIN_PFRSQRT,
23245 IX86_BUILTIN_PFSUB,
23246 IX86_BUILTIN_PFSUBR,
23247 IX86_BUILTIN_PI2FD,
23248 IX86_BUILTIN_PMULHRW,
23250 /* 3DNow! Athlon Extensions */
23251 IX86_BUILTIN_PF2IW,
23252 IX86_BUILTIN_PFNACC,
23253 IX86_BUILTIN_PFPNACC,
23254 IX86_BUILTIN_PI2FW,
23255 IX86_BUILTIN_PSWAPDSI,
23256 IX86_BUILTIN_PSWAPDSF,
23259 IX86_BUILTIN_ADDPD,
23260 IX86_BUILTIN_ADDSD,
23261 IX86_BUILTIN_DIVPD,
23262 IX86_BUILTIN_DIVSD,
23263 IX86_BUILTIN_MULPD,
23264 IX86_BUILTIN_MULSD,
23265 IX86_BUILTIN_SUBPD,
23266 IX86_BUILTIN_SUBSD,
23268 IX86_BUILTIN_CMPEQPD,
23269 IX86_BUILTIN_CMPLTPD,
23270 IX86_BUILTIN_CMPLEPD,
23271 IX86_BUILTIN_CMPGTPD,
23272 IX86_BUILTIN_CMPGEPD,
23273 IX86_BUILTIN_CMPNEQPD,
23274 IX86_BUILTIN_CMPNLTPD,
23275 IX86_BUILTIN_CMPNLEPD,
23276 IX86_BUILTIN_CMPNGTPD,
23277 IX86_BUILTIN_CMPNGEPD,
23278 IX86_BUILTIN_CMPORDPD,
23279 IX86_BUILTIN_CMPUNORDPD,
23280 IX86_BUILTIN_CMPEQSD,
23281 IX86_BUILTIN_CMPLTSD,
23282 IX86_BUILTIN_CMPLESD,
23283 IX86_BUILTIN_CMPNEQSD,
23284 IX86_BUILTIN_CMPNLTSD,
23285 IX86_BUILTIN_CMPNLESD,
23286 IX86_BUILTIN_CMPORDSD,
23287 IX86_BUILTIN_CMPUNORDSD,
23289 IX86_BUILTIN_COMIEQSD,
23290 IX86_BUILTIN_COMILTSD,
23291 IX86_BUILTIN_COMILESD,
23292 IX86_BUILTIN_COMIGTSD,
23293 IX86_BUILTIN_COMIGESD,
23294 IX86_BUILTIN_COMINEQSD,
23295 IX86_BUILTIN_UCOMIEQSD,
23296 IX86_BUILTIN_UCOMILTSD,
23297 IX86_BUILTIN_UCOMILESD,
23298 IX86_BUILTIN_UCOMIGTSD,
23299 IX86_BUILTIN_UCOMIGESD,
23300 IX86_BUILTIN_UCOMINEQSD,
23302 IX86_BUILTIN_MAXPD,
23303 IX86_BUILTIN_MAXSD,
23304 IX86_BUILTIN_MINPD,
23305 IX86_BUILTIN_MINSD,
23307 IX86_BUILTIN_ANDPD,
23308 IX86_BUILTIN_ANDNPD,
23310 IX86_BUILTIN_XORPD,
23312 IX86_BUILTIN_SQRTPD,
23313 IX86_BUILTIN_SQRTSD,
23315 IX86_BUILTIN_UNPCKHPD,
23316 IX86_BUILTIN_UNPCKLPD,
23318 IX86_BUILTIN_SHUFPD,
23320 IX86_BUILTIN_LOADUPD,
23321 IX86_BUILTIN_STOREUPD,
23322 IX86_BUILTIN_MOVSD,
23324 IX86_BUILTIN_LOADHPD,
23325 IX86_BUILTIN_LOADLPD,
23327 IX86_BUILTIN_CVTDQ2PD,
23328 IX86_BUILTIN_CVTDQ2PS,
23330 IX86_BUILTIN_CVTPD2DQ,
23331 IX86_BUILTIN_CVTPD2PI,
23332 IX86_BUILTIN_CVTPD2PS,
23333 IX86_BUILTIN_CVTTPD2DQ,
23334 IX86_BUILTIN_CVTTPD2PI,
23336 IX86_BUILTIN_CVTPI2PD,
23337 IX86_BUILTIN_CVTSI2SD,
23338 IX86_BUILTIN_CVTSI642SD,
23340 IX86_BUILTIN_CVTSD2SI,
23341 IX86_BUILTIN_CVTSD2SI64,
23342 IX86_BUILTIN_CVTSD2SS,
23343 IX86_BUILTIN_CVTSS2SD,
23344 IX86_BUILTIN_CVTTSD2SI,
23345 IX86_BUILTIN_CVTTSD2SI64,
23347 IX86_BUILTIN_CVTPS2DQ,
23348 IX86_BUILTIN_CVTPS2PD,
23349 IX86_BUILTIN_CVTTPS2DQ,
23351 IX86_BUILTIN_MOVNTI,
23352 IX86_BUILTIN_MOVNTPD,
23353 IX86_BUILTIN_MOVNTDQ,
23355 IX86_BUILTIN_MOVQ128,
23358 IX86_BUILTIN_MASKMOVDQU,
23359 IX86_BUILTIN_MOVMSKPD,
23360 IX86_BUILTIN_PMOVMSKB128,
23362 IX86_BUILTIN_PACKSSWB128,
23363 IX86_BUILTIN_PACKSSDW128,
23364 IX86_BUILTIN_PACKUSWB128,
23366 IX86_BUILTIN_PADDB128,
23367 IX86_BUILTIN_PADDW128,
23368 IX86_BUILTIN_PADDD128,
23369 IX86_BUILTIN_PADDQ128,
23370 IX86_BUILTIN_PADDSB128,
23371 IX86_BUILTIN_PADDSW128,
23372 IX86_BUILTIN_PADDUSB128,
23373 IX86_BUILTIN_PADDUSW128,
23374 IX86_BUILTIN_PSUBB128,
23375 IX86_BUILTIN_PSUBW128,
23376 IX86_BUILTIN_PSUBD128,
23377 IX86_BUILTIN_PSUBQ128,
23378 IX86_BUILTIN_PSUBSB128,
23379 IX86_BUILTIN_PSUBSW128,
23380 IX86_BUILTIN_PSUBUSB128,
23381 IX86_BUILTIN_PSUBUSW128,
23383 IX86_BUILTIN_PAND128,
23384 IX86_BUILTIN_PANDN128,
23385 IX86_BUILTIN_POR128,
23386 IX86_BUILTIN_PXOR128,
23388 IX86_BUILTIN_PAVGB128,
23389 IX86_BUILTIN_PAVGW128,
23391 IX86_BUILTIN_PCMPEQB128,
23392 IX86_BUILTIN_PCMPEQW128,
23393 IX86_BUILTIN_PCMPEQD128,
23394 IX86_BUILTIN_PCMPGTB128,
23395 IX86_BUILTIN_PCMPGTW128,
23396 IX86_BUILTIN_PCMPGTD128,
23398 IX86_BUILTIN_PMADDWD128,
23400 IX86_BUILTIN_PMAXSW128,
23401 IX86_BUILTIN_PMAXUB128,
23402 IX86_BUILTIN_PMINSW128,
23403 IX86_BUILTIN_PMINUB128,
23405 IX86_BUILTIN_PMULUDQ,
23406 IX86_BUILTIN_PMULUDQ128,
23407 IX86_BUILTIN_PMULHUW128,
23408 IX86_BUILTIN_PMULHW128,
23409 IX86_BUILTIN_PMULLW128,
23411 IX86_BUILTIN_PSADBW128,
23412 IX86_BUILTIN_PSHUFHW,
23413 IX86_BUILTIN_PSHUFLW,
23414 IX86_BUILTIN_PSHUFD,
23416 IX86_BUILTIN_PSLLDQI128,
23417 IX86_BUILTIN_PSLLWI128,
23418 IX86_BUILTIN_PSLLDI128,
23419 IX86_BUILTIN_PSLLQI128,
23420 IX86_BUILTIN_PSRAWI128,
23421 IX86_BUILTIN_PSRADI128,
23422 IX86_BUILTIN_PSRLDQI128,
23423 IX86_BUILTIN_PSRLWI128,
23424 IX86_BUILTIN_PSRLDI128,
23425 IX86_BUILTIN_PSRLQI128,
23427 IX86_BUILTIN_PSLLDQ128,
23428 IX86_BUILTIN_PSLLW128,
23429 IX86_BUILTIN_PSLLD128,
23430 IX86_BUILTIN_PSLLQ128,
23431 IX86_BUILTIN_PSRAW128,
23432 IX86_BUILTIN_PSRAD128,
23433 IX86_BUILTIN_PSRLW128,
23434 IX86_BUILTIN_PSRLD128,
23435 IX86_BUILTIN_PSRLQ128,
23437 IX86_BUILTIN_PUNPCKHBW128,
23438 IX86_BUILTIN_PUNPCKHWD128,
23439 IX86_BUILTIN_PUNPCKHDQ128,
23440 IX86_BUILTIN_PUNPCKHQDQ128,
23441 IX86_BUILTIN_PUNPCKLBW128,
23442 IX86_BUILTIN_PUNPCKLWD128,
23443 IX86_BUILTIN_PUNPCKLDQ128,
23444 IX86_BUILTIN_PUNPCKLQDQ128,
23446 IX86_BUILTIN_CLFLUSH,
23447 IX86_BUILTIN_MFENCE,
23448 IX86_BUILTIN_LFENCE,
23450 IX86_BUILTIN_BSRSI,
23451 IX86_BUILTIN_BSRDI,
23452 IX86_BUILTIN_RDPMC,
23453 IX86_BUILTIN_RDTSC,
23454 IX86_BUILTIN_RDTSCP,
23455 IX86_BUILTIN_ROLQI,
23456 IX86_BUILTIN_ROLHI,
23457 IX86_BUILTIN_RORQI,
23458 IX86_BUILTIN_RORHI,
23461 IX86_BUILTIN_ADDSUBPS,
23462 IX86_BUILTIN_HADDPS,
23463 IX86_BUILTIN_HSUBPS,
23464 IX86_BUILTIN_MOVSHDUP,
23465 IX86_BUILTIN_MOVSLDUP,
23466 IX86_BUILTIN_ADDSUBPD,
23467 IX86_BUILTIN_HADDPD,
23468 IX86_BUILTIN_HSUBPD,
23469 IX86_BUILTIN_LDDQU,
23471 IX86_BUILTIN_MONITOR,
23472 IX86_BUILTIN_MWAIT,
23475 IX86_BUILTIN_PHADDW,
23476 IX86_BUILTIN_PHADDD,
23477 IX86_BUILTIN_PHADDSW,
23478 IX86_BUILTIN_PHSUBW,
23479 IX86_BUILTIN_PHSUBD,
23480 IX86_BUILTIN_PHSUBSW,
23481 IX86_BUILTIN_PMADDUBSW,
23482 IX86_BUILTIN_PMULHRSW,
23483 IX86_BUILTIN_PSHUFB,
23484 IX86_BUILTIN_PSIGNB,
23485 IX86_BUILTIN_PSIGNW,
23486 IX86_BUILTIN_PSIGND,
23487 IX86_BUILTIN_PALIGNR,
23488 IX86_BUILTIN_PABSB,
23489 IX86_BUILTIN_PABSW,
23490 IX86_BUILTIN_PABSD,
23492 IX86_BUILTIN_PHADDW128,
23493 IX86_BUILTIN_PHADDD128,
23494 IX86_BUILTIN_PHADDSW128,
23495 IX86_BUILTIN_PHSUBW128,
23496 IX86_BUILTIN_PHSUBD128,
23497 IX86_BUILTIN_PHSUBSW128,
23498 IX86_BUILTIN_PMADDUBSW128,
23499 IX86_BUILTIN_PMULHRSW128,
23500 IX86_BUILTIN_PSHUFB128,
23501 IX86_BUILTIN_PSIGNB128,
23502 IX86_BUILTIN_PSIGNW128,
23503 IX86_BUILTIN_PSIGND128,
23504 IX86_BUILTIN_PALIGNR128,
23505 IX86_BUILTIN_PABSB128,
23506 IX86_BUILTIN_PABSW128,
23507 IX86_BUILTIN_PABSD128,
23509 /* AMDFAM10 - SSE4A New Instructions. */
23510 IX86_BUILTIN_MOVNTSD,
23511 IX86_BUILTIN_MOVNTSS,
23512 IX86_BUILTIN_EXTRQI,
23513 IX86_BUILTIN_EXTRQ,
23514 IX86_BUILTIN_INSERTQI,
23515 IX86_BUILTIN_INSERTQ,
23518 IX86_BUILTIN_BLENDPD,
23519 IX86_BUILTIN_BLENDPS,
23520 IX86_BUILTIN_BLENDVPD,
23521 IX86_BUILTIN_BLENDVPS,
23522 IX86_BUILTIN_PBLENDVB128,
23523 IX86_BUILTIN_PBLENDW128,
23528 IX86_BUILTIN_INSERTPS128,
23530 IX86_BUILTIN_MOVNTDQA,
23531 IX86_BUILTIN_MPSADBW128,
23532 IX86_BUILTIN_PACKUSDW128,
23533 IX86_BUILTIN_PCMPEQQ,
23534 IX86_BUILTIN_PHMINPOSUW128,
23536 IX86_BUILTIN_PMAXSB128,
23537 IX86_BUILTIN_PMAXSD128,
23538 IX86_BUILTIN_PMAXUD128,
23539 IX86_BUILTIN_PMAXUW128,
23541 IX86_BUILTIN_PMINSB128,
23542 IX86_BUILTIN_PMINSD128,
23543 IX86_BUILTIN_PMINUD128,
23544 IX86_BUILTIN_PMINUW128,
23546 IX86_BUILTIN_PMOVSXBW128,
23547 IX86_BUILTIN_PMOVSXBD128,
23548 IX86_BUILTIN_PMOVSXBQ128,
23549 IX86_BUILTIN_PMOVSXWD128,
23550 IX86_BUILTIN_PMOVSXWQ128,
23551 IX86_BUILTIN_PMOVSXDQ128,
23553 IX86_BUILTIN_PMOVZXBW128,
23554 IX86_BUILTIN_PMOVZXBD128,
23555 IX86_BUILTIN_PMOVZXBQ128,
23556 IX86_BUILTIN_PMOVZXWD128,
23557 IX86_BUILTIN_PMOVZXWQ128,
23558 IX86_BUILTIN_PMOVZXDQ128,
23560 IX86_BUILTIN_PMULDQ128,
23561 IX86_BUILTIN_PMULLD128,
23563 IX86_BUILTIN_ROUNDPD,
23564 IX86_BUILTIN_ROUNDPS,
23565 IX86_BUILTIN_ROUNDSD,
23566 IX86_BUILTIN_ROUNDSS,
23568 IX86_BUILTIN_PTESTZ,
23569 IX86_BUILTIN_PTESTC,
23570 IX86_BUILTIN_PTESTNZC,
23572 IX86_BUILTIN_VEC_INIT_V2SI,
23573 IX86_BUILTIN_VEC_INIT_V4HI,
23574 IX86_BUILTIN_VEC_INIT_V8QI,
23575 IX86_BUILTIN_VEC_EXT_V2DF,
23576 IX86_BUILTIN_VEC_EXT_V2DI,
23577 IX86_BUILTIN_VEC_EXT_V4SF,
23578 IX86_BUILTIN_VEC_EXT_V4SI,
23579 IX86_BUILTIN_VEC_EXT_V8HI,
23580 IX86_BUILTIN_VEC_EXT_V2SI,
23581 IX86_BUILTIN_VEC_EXT_V4HI,
23582 IX86_BUILTIN_VEC_EXT_V16QI,
23583 IX86_BUILTIN_VEC_SET_V2DI,
23584 IX86_BUILTIN_VEC_SET_V4SF,
23585 IX86_BUILTIN_VEC_SET_V4SI,
23586 IX86_BUILTIN_VEC_SET_V8HI,
23587 IX86_BUILTIN_VEC_SET_V4HI,
23588 IX86_BUILTIN_VEC_SET_V16QI,
23590 IX86_BUILTIN_VEC_PACK_SFIX,
23593 IX86_BUILTIN_CRC32QI,
23594 IX86_BUILTIN_CRC32HI,
23595 IX86_BUILTIN_CRC32SI,
23596 IX86_BUILTIN_CRC32DI,
23598 IX86_BUILTIN_PCMPESTRI128,
23599 IX86_BUILTIN_PCMPESTRM128,
23600 IX86_BUILTIN_PCMPESTRA128,
23601 IX86_BUILTIN_PCMPESTRC128,
23602 IX86_BUILTIN_PCMPESTRO128,
23603 IX86_BUILTIN_PCMPESTRS128,
23604 IX86_BUILTIN_PCMPESTRZ128,
23605 IX86_BUILTIN_PCMPISTRI128,
23606 IX86_BUILTIN_PCMPISTRM128,
23607 IX86_BUILTIN_PCMPISTRA128,
23608 IX86_BUILTIN_PCMPISTRC128,
23609 IX86_BUILTIN_PCMPISTRO128,
23610 IX86_BUILTIN_PCMPISTRS128,
23611 IX86_BUILTIN_PCMPISTRZ128,
23613 IX86_BUILTIN_PCMPGTQ,
23615 /* AES instructions */
23616 IX86_BUILTIN_AESENC128,
23617 IX86_BUILTIN_AESENCLAST128,
23618 IX86_BUILTIN_AESDEC128,
23619 IX86_BUILTIN_AESDECLAST128,
23620 IX86_BUILTIN_AESIMC128,
23621 IX86_BUILTIN_AESKEYGENASSIST128,
23623 /* PCLMUL instruction */
23624 IX86_BUILTIN_PCLMULQDQ128,
23627 IX86_BUILTIN_ADDPD256,
23628 IX86_BUILTIN_ADDPS256,
23629 IX86_BUILTIN_ADDSUBPD256,
23630 IX86_BUILTIN_ADDSUBPS256,
23631 IX86_BUILTIN_ANDPD256,
23632 IX86_BUILTIN_ANDPS256,
23633 IX86_BUILTIN_ANDNPD256,
23634 IX86_BUILTIN_ANDNPS256,
23635 IX86_BUILTIN_BLENDPD256,
23636 IX86_BUILTIN_BLENDPS256,
23637 IX86_BUILTIN_BLENDVPD256,
23638 IX86_BUILTIN_BLENDVPS256,
23639 IX86_BUILTIN_DIVPD256,
23640 IX86_BUILTIN_DIVPS256,
23641 IX86_BUILTIN_DPPS256,
23642 IX86_BUILTIN_HADDPD256,
23643 IX86_BUILTIN_HADDPS256,
23644 IX86_BUILTIN_HSUBPD256,
23645 IX86_BUILTIN_HSUBPS256,
23646 IX86_BUILTIN_MAXPD256,
23647 IX86_BUILTIN_MAXPS256,
23648 IX86_BUILTIN_MINPD256,
23649 IX86_BUILTIN_MINPS256,
23650 IX86_BUILTIN_MULPD256,
23651 IX86_BUILTIN_MULPS256,
23652 IX86_BUILTIN_ORPD256,
23653 IX86_BUILTIN_ORPS256,
23654 IX86_BUILTIN_SHUFPD256,
23655 IX86_BUILTIN_SHUFPS256,
23656 IX86_BUILTIN_SUBPD256,
23657 IX86_BUILTIN_SUBPS256,
23658 IX86_BUILTIN_XORPD256,
23659 IX86_BUILTIN_XORPS256,
23660 IX86_BUILTIN_CMPSD,
23661 IX86_BUILTIN_CMPSS,
23662 IX86_BUILTIN_CMPPD,
23663 IX86_BUILTIN_CMPPS,
23664 IX86_BUILTIN_CMPPD256,
23665 IX86_BUILTIN_CMPPS256,
23666 IX86_BUILTIN_CVTDQ2PD256,
23667 IX86_BUILTIN_CVTDQ2PS256,
23668 IX86_BUILTIN_CVTPD2PS256,
23669 IX86_BUILTIN_CVTPS2DQ256,
23670 IX86_BUILTIN_CVTPS2PD256,
23671 IX86_BUILTIN_CVTTPD2DQ256,
23672 IX86_BUILTIN_CVTPD2DQ256,
23673 IX86_BUILTIN_CVTTPS2DQ256,
23674 IX86_BUILTIN_EXTRACTF128PD256,
23675 IX86_BUILTIN_EXTRACTF128PS256,
23676 IX86_BUILTIN_EXTRACTF128SI256,
23677 IX86_BUILTIN_VZEROALL,
23678 IX86_BUILTIN_VZEROUPPER,
23679 IX86_BUILTIN_VPERMILVARPD,
23680 IX86_BUILTIN_VPERMILVARPS,
23681 IX86_BUILTIN_VPERMILVARPD256,
23682 IX86_BUILTIN_VPERMILVARPS256,
23683 IX86_BUILTIN_VPERMILPD,
23684 IX86_BUILTIN_VPERMILPS,
23685 IX86_BUILTIN_VPERMILPD256,
23686 IX86_BUILTIN_VPERMILPS256,
23687 IX86_BUILTIN_VPERMIL2PD,
23688 IX86_BUILTIN_VPERMIL2PS,
23689 IX86_BUILTIN_VPERMIL2PD256,
23690 IX86_BUILTIN_VPERMIL2PS256,
23691 IX86_BUILTIN_VPERM2F128PD256,
23692 IX86_BUILTIN_VPERM2F128PS256,
23693 IX86_BUILTIN_VPERM2F128SI256,
23694 IX86_BUILTIN_VBROADCASTSS,
23695 IX86_BUILTIN_VBROADCASTSD256,
23696 IX86_BUILTIN_VBROADCASTSS256,
23697 IX86_BUILTIN_VBROADCASTPD256,
23698 IX86_BUILTIN_VBROADCASTPS256,
23699 IX86_BUILTIN_VINSERTF128PD256,
23700 IX86_BUILTIN_VINSERTF128PS256,
23701 IX86_BUILTIN_VINSERTF128SI256,
23702 IX86_BUILTIN_LOADUPD256,
23703 IX86_BUILTIN_LOADUPS256,
23704 IX86_BUILTIN_STOREUPD256,
23705 IX86_BUILTIN_STOREUPS256,
23706 IX86_BUILTIN_LDDQU256,
23707 IX86_BUILTIN_MOVNTDQ256,
23708 IX86_BUILTIN_MOVNTPD256,
23709 IX86_BUILTIN_MOVNTPS256,
23710 IX86_BUILTIN_LOADDQU256,
23711 IX86_BUILTIN_STOREDQU256,
23712 IX86_BUILTIN_MASKLOADPD,
23713 IX86_BUILTIN_MASKLOADPS,
23714 IX86_BUILTIN_MASKSTOREPD,
23715 IX86_BUILTIN_MASKSTOREPS,
23716 IX86_BUILTIN_MASKLOADPD256,
23717 IX86_BUILTIN_MASKLOADPS256,
23718 IX86_BUILTIN_MASKSTOREPD256,
23719 IX86_BUILTIN_MASKSTOREPS256,
23720 IX86_BUILTIN_MOVSHDUP256,
23721 IX86_BUILTIN_MOVSLDUP256,
23722 IX86_BUILTIN_MOVDDUP256,
23724 IX86_BUILTIN_SQRTPD256,
23725 IX86_BUILTIN_SQRTPS256,
23726 IX86_BUILTIN_SQRTPS_NR256,
23727 IX86_BUILTIN_RSQRTPS256,
23728 IX86_BUILTIN_RSQRTPS_NR256,
23730 IX86_BUILTIN_RCPPS256,
23732 IX86_BUILTIN_ROUNDPD256,
23733 IX86_BUILTIN_ROUNDPS256,
23735 IX86_BUILTIN_UNPCKHPD256,
23736 IX86_BUILTIN_UNPCKLPD256,
23737 IX86_BUILTIN_UNPCKHPS256,
23738 IX86_BUILTIN_UNPCKLPS256,
23740 IX86_BUILTIN_SI256_SI,
23741 IX86_BUILTIN_PS256_PS,
23742 IX86_BUILTIN_PD256_PD,
23743 IX86_BUILTIN_SI_SI256,
23744 IX86_BUILTIN_PS_PS256,
23745 IX86_BUILTIN_PD_PD256,
23747 IX86_BUILTIN_VTESTZPD,
23748 IX86_BUILTIN_VTESTCPD,
23749 IX86_BUILTIN_VTESTNZCPD,
23750 IX86_BUILTIN_VTESTZPS,
23751 IX86_BUILTIN_VTESTCPS,
23752 IX86_BUILTIN_VTESTNZCPS,
23753 IX86_BUILTIN_VTESTZPD256,
23754 IX86_BUILTIN_VTESTCPD256,
23755 IX86_BUILTIN_VTESTNZCPD256,
23756 IX86_BUILTIN_VTESTZPS256,
23757 IX86_BUILTIN_VTESTCPS256,
23758 IX86_BUILTIN_VTESTNZCPS256,
23759 IX86_BUILTIN_PTESTZ256,
23760 IX86_BUILTIN_PTESTC256,
23761 IX86_BUILTIN_PTESTNZC256,
23763 IX86_BUILTIN_MOVMSKPD256,
23764 IX86_BUILTIN_MOVMSKPS256,
23766 /* TFmode support builtins. */
23768 IX86_BUILTIN_HUGE_VALQ,
23769 IX86_BUILTIN_FABSQ,
23770 IX86_BUILTIN_COPYSIGNQ,
23772 /* Vectorizer support builtins. */
23773 IX86_BUILTIN_CPYSGNPS,
23774 IX86_BUILTIN_CPYSGNPD,
23775 IX86_BUILTIN_CPYSGNPS256,
23776 IX86_BUILTIN_CPYSGNPD256,
23778 IX86_BUILTIN_CVTUDQ2PS,
23780 IX86_BUILTIN_VEC_PERM_V2DF,
23781 IX86_BUILTIN_VEC_PERM_V4SF,
23782 IX86_BUILTIN_VEC_PERM_V2DI,
23783 IX86_BUILTIN_VEC_PERM_V4SI,
23784 IX86_BUILTIN_VEC_PERM_V8HI,
23785 IX86_BUILTIN_VEC_PERM_V16QI,
23786 IX86_BUILTIN_VEC_PERM_V2DI_U,
23787 IX86_BUILTIN_VEC_PERM_V4SI_U,
23788 IX86_BUILTIN_VEC_PERM_V8HI_U,
23789 IX86_BUILTIN_VEC_PERM_V16QI_U,
23790 IX86_BUILTIN_VEC_PERM_V4DF,
23791 IX86_BUILTIN_VEC_PERM_V8SF,
23793 /* FMA4 and XOP instructions. */
23794 IX86_BUILTIN_VFMADDSS,
23795 IX86_BUILTIN_VFMADDSD,
23796 IX86_BUILTIN_VFMADDPS,
23797 IX86_BUILTIN_VFMADDPD,
23798 IX86_BUILTIN_VFMADDPS256,
23799 IX86_BUILTIN_VFMADDPD256,
23800 IX86_BUILTIN_VFMADDSUBPS,
23801 IX86_BUILTIN_VFMADDSUBPD,
23802 IX86_BUILTIN_VFMADDSUBPS256,
23803 IX86_BUILTIN_VFMADDSUBPD256,
23805 IX86_BUILTIN_VPCMOV,
23806 IX86_BUILTIN_VPCMOV_V2DI,
23807 IX86_BUILTIN_VPCMOV_V4SI,
23808 IX86_BUILTIN_VPCMOV_V8HI,
23809 IX86_BUILTIN_VPCMOV_V16QI,
23810 IX86_BUILTIN_VPCMOV_V4SF,
23811 IX86_BUILTIN_VPCMOV_V2DF,
23812 IX86_BUILTIN_VPCMOV256,
23813 IX86_BUILTIN_VPCMOV_V4DI256,
23814 IX86_BUILTIN_VPCMOV_V8SI256,
23815 IX86_BUILTIN_VPCMOV_V16HI256,
23816 IX86_BUILTIN_VPCMOV_V32QI256,
23817 IX86_BUILTIN_VPCMOV_V8SF256,
23818 IX86_BUILTIN_VPCMOV_V4DF256,
23820 IX86_BUILTIN_VPPERM,
23822 IX86_BUILTIN_VPMACSSWW,
23823 IX86_BUILTIN_VPMACSWW,
23824 IX86_BUILTIN_VPMACSSWD,
23825 IX86_BUILTIN_VPMACSWD,
23826 IX86_BUILTIN_VPMACSSDD,
23827 IX86_BUILTIN_VPMACSDD,
23828 IX86_BUILTIN_VPMACSSDQL,
23829 IX86_BUILTIN_VPMACSSDQH,
23830 IX86_BUILTIN_VPMACSDQL,
23831 IX86_BUILTIN_VPMACSDQH,
23832 IX86_BUILTIN_VPMADCSSWD,
23833 IX86_BUILTIN_VPMADCSWD,
23835 IX86_BUILTIN_VPHADDBW,
23836 IX86_BUILTIN_VPHADDBD,
23837 IX86_BUILTIN_VPHADDBQ,
23838 IX86_BUILTIN_VPHADDWD,
23839 IX86_BUILTIN_VPHADDWQ,
23840 IX86_BUILTIN_VPHADDDQ,
23841 IX86_BUILTIN_VPHADDUBW,
23842 IX86_BUILTIN_VPHADDUBD,
23843 IX86_BUILTIN_VPHADDUBQ,
23844 IX86_BUILTIN_VPHADDUWD,
23845 IX86_BUILTIN_VPHADDUWQ,
23846 IX86_BUILTIN_VPHADDUDQ,
23847 IX86_BUILTIN_VPHSUBBW,
23848 IX86_BUILTIN_VPHSUBWD,
23849 IX86_BUILTIN_VPHSUBDQ,
23851 IX86_BUILTIN_VPROTB,
23852 IX86_BUILTIN_VPROTW,
23853 IX86_BUILTIN_VPROTD,
23854 IX86_BUILTIN_VPROTQ,
23855 IX86_BUILTIN_VPROTB_IMM,
23856 IX86_BUILTIN_VPROTW_IMM,
23857 IX86_BUILTIN_VPROTD_IMM,
23858 IX86_BUILTIN_VPROTQ_IMM,
23860 IX86_BUILTIN_VPSHLB,
23861 IX86_BUILTIN_VPSHLW,
23862 IX86_BUILTIN_VPSHLD,
23863 IX86_BUILTIN_VPSHLQ,
23864 IX86_BUILTIN_VPSHAB,
23865 IX86_BUILTIN_VPSHAW,
23866 IX86_BUILTIN_VPSHAD,
23867 IX86_BUILTIN_VPSHAQ,
23869 IX86_BUILTIN_VFRCZSS,
23870 IX86_BUILTIN_VFRCZSD,
23871 IX86_BUILTIN_VFRCZPS,
23872 IX86_BUILTIN_VFRCZPD,
23873 IX86_BUILTIN_VFRCZPS256,
23874 IX86_BUILTIN_VFRCZPD256,
23876 IX86_BUILTIN_VPCOMEQUB,
23877 IX86_BUILTIN_VPCOMNEUB,
23878 IX86_BUILTIN_VPCOMLTUB,
23879 IX86_BUILTIN_VPCOMLEUB,
23880 IX86_BUILTIN_VPCOMGTUB,
23881 IX86_BUILTIN_VPCOMGEUB,
23882 IX86_BUILTIN_VPCOMFALSEUB,
23883 IX86_BUILTIN_VPCOMTRUEUB,
23885 IX86_BUILTIN_VPCOMEQUW,
23886 IX86_BUILTIN_VPCOMNEUW,
23887 IX86_BUILTIN_VPCOMLTUW,
23888 IX86_BUILTIN_VPCOMLEUW,
23889 IX86_BUILTIN_VPCOMGTUW,
23890 IX86_BUILTIN_VPCOMGEUW,
23891 IX86_BUILTIN_VPCOMFALSEUW,
23892 IX86_BUILTIN_VPCOMTRUEUW,
23894 IX86_BUILTIN_VPCOMEQUD,
23895 IX86_BUILTIN_VPCOMNEUD,
23896 IX86_BUILTIN_VPCOMLTUD,
23897 IX86_BUILTIN_VPCOMLEUD,
23898 IX86_BUILTIN_VPCOMGTUD,
23899 IX86_BUILTIN_VPCOMGEUD,
23900 IX86_BUILTIN_VPCOMFALSEUD,
23901 IX86_BUILTIN_VPCOMTRUEUD,
23903 IX86_BUILTIN_VPCOMEQUQ,
23904 IX86_BUILTIN_VPCOMNEUQ,
23905 IX86_BUILTIN_VPCOMLTUQ,
23906 IX86_BUILTIN_VPCOMLEUQ,
23907 IX86_BUILTIN_VPCOMGTUQ,
23908 IX86_BUILTIN_VPCOMGEUQ,
23909 IX86_BUILTIN_VPCOMFALSEUQ,
23910 IX86_BUILTIN_VPCOMTRUEUQ,
23912 IX86_BUILTIN_VPCOMEQB,
23913 IX86_BUILTIN_VPCOMNEB,
23914 IX86_BUILTIN_VPCOMLTB,
23915 IX86_BUILTIN_VPCOMLEB,
23916 IX86_BUILTIN_VPCOMGTB,
23917 IX86_BUILTIN_VPCOMGEB,
23918 IX86_BUILTIN_VPCOMFALSEB,
23919 IX86_BUILTIN_VPCOMTRUEB,
23921 IX86_BUILTIN_VPCOMEQW,
23922 IX86_BUILTIN_VPCOMNEW,
23923 IX86_BUILTIN_VPCOMLTW,
23924 IX86_BUILTIN_VPCOMLEW,
23925 IX86_BUILTIN_VPCOMGTW,
23926 IX86_BUILTIN_VPCOMGEW,
23927 IX86_BUILTIN_VPCOMFALSEW,
23928 IX86_BUILTIN_VPCOMTRUEW,
23930 IX86_BUILTIN_VPCOMEQD,
23931 IX86_BUILTIN_VPCOMNED,
23932 IX86_BUILTIN_VPCOMLTD,
23933 IX86_BUILTIN_VPCOMLED,
23934 IX86_BUILTIN_VPCOMGTD,
23935 IX86_BUILTIN_VPCOMGED,
23936 IX86_BUILTIN_VPCOMFALSED,
23937 IX86_BUILTIN_VPCOMTRUED,
23939 IX86_BUILTIN_VPCOMEQQ,
23940 IX86_BUILTIN_VPCOMNEQ,
23941 IX86_BUILTIN_VPCOMLTQ,
23942 IX86_BUILTIN_VPCOMLEQ,
23943 IX86_BUILTIN_VPCOMGTQ,
23944 IX86_BUILTIN_VPCOMGEQ,
23945 IX86_BUILTIN_VPCOMFALSEQ,
23946 IX86_BUILTIN_VPCOMTRUEQ,
23948 /* LWP instructions. */
23949 IX86_BUILTIN_LLWPCB,
23950 IX86_BUILTIN_SLWPCB,
23951 IX86_BUILTIN_LWPVAL32,
23952 IX86_BUILTIN_LWPVAL64,
23953 IX86_BUILTIN_LWPINS32,
23954 IX86_BUILTIN_LWPINS64,
23958 /* FSGSBASE instructions. */
23959 IX86_BUILTIN_RDFSBASE32,
23960 IX86_BUILTIN_RDFSBASE64,
23961 IX86_BUILTIN_RDGSBASE32,
23962 IX86_BUILTIN_RDGSBASE64,
23963 IX86_BUILTIN_WRFSBASE32,
23964 IX86_BUILTIN_WRFSBASE64,
23965 IX86_BUILTIN_WRGSBASE32,
23966 IX86_BUILTIN_WRGSBASE64,
23968 /* RDRND instructions. */
23969 IX86_BUILTIN_RDRAND16,
23970 IX86_BUILTIN_RDRAND32,
23971 IX86_BUILTIN_RDRAND64,
23973 /* F16C instructions. */
23974 IX86_BUILTIN_CVTPH2PS,
23975 IX86_BUILTIN_CVTPH2PS256,
23976 IX86_BUILTIN_CVTPS2PH,
23977 IX86_BUILTIN_CVTPS2PH256,
23982 /* Table for the ix86 builtin decls. */
23983 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
23985 /* Table of all of the builtin functions that are possible with different ISA's
23986 but are waiting to be built until a function is declared to use that
23988 struct builtin_isa {
23989 const char *name; /* function name */
23990 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
23991 int isa; /* isa_flags this builtin is defined for */
23992 bool const_p; /* true if the declaration is constant */
23993 bool set_and_not_built_p;
23996 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
23999 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
24000 of which isa_flags to use in the ix86_builtins_isa array. Stores the
24001 function decl in the ix86_builtins array. Returns the function decl or
24002 NULL_TREE, if the builtin was not added.
24004 If the front end has a special hook for builtin functions, delay adding
24005 builtin functions that aren't in the current ISA until the ISA is changed
24006 with function specific optimization. Doing so, can save about 300K for the
24007 default compiler. When the builtin is expanded, check at that time whether
24010 If the front end doesn't have a special hook, record all builtins, even if
24011 it isn't an instruction set in the current ISA in case the user uses
24012 function specific options for a different ISA, so that we don't get scope
24013 errors if a builtin is added in the middle of a function scope. */
24016 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
24017 enum ix86_builtins code)
24019 tree decl = NULL_TREE;
24021 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
24023 ix86_builtins_isa[(int) code].isa = mask;
24025 mask &= ~OPTION_MASK_ISA_64BIT;
24027 || (mask & ix86_isa_flags) != 0
24028 || (lang_hooks.builtin_function
24029 == lang_hooks.builtin_function_ext_scope))
24032 tree type = ix86_get_builtin_func_type (tcode);
24033 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24035 ix86_builtins[(int) code] = decl;
24036 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24040 ix86_builtins[(int) code] = NULL_TREE;
24041 ix86_builtins_isa[(int) code].tcode = tcode;
24042 ix86_builtins_isa[(int) code].name = name;
24043 ix86_builtins_isa[(int) code].const_p = false;
24044 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24051 /* Like def_builtin, but also marks the function decl "const". */
24054 def_builtin_const (int mask, const char *name,
24055 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24057 tree decl = def_builtin (mask, name, tcode, code);
24059 TREE_READONLY (decl) = 1;
24061 ix86_builtins_isa[(int) code].const_p = true;
24066 /* Add any new builtin functions for a given ISA that may not have been
24067 declared. This saves a bit of space compared to adding all of the
24068 declarations to the tree, even if we didn't use them. */
24071 ix86_add_new_builtins (int isa)
24075 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24077 if ((ix86_builtins_isa[i].isa & isa) != 0
24078 && ix86_builtins_isa[i].set_and_not_built_p)
24082 /* Don't define the builtin again. */
24083 ix86_builtins_isa[i].set_and_not_built_p = false;
24085 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24086 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24087 type, i, BUILT_IN_MD, NULL,
24090 ix86_builtins[i] = decl;
24091 if (ix86_builtins_isa[i].const_p)
24092 TREE_READONLY (decl) = 1;
24097 /* Bits for builtin_description.flag. */
24099 /* Set when we don't support the comparison natively, and should
24100 swap_comparison in order to support it. */
24101 #define BUILTIN_DESC_SWAP_OPERANDS 1
24103 struct builtin_description
24105 const unsigned int mask;
24106 const enum insn_code icode;
24107 const char *const name;
24108 const enum ix86_builtins code;
24109 const enum rtx_code comparison;
24113 static const struct builtin_description bdesc_comi[] =
24115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24141 static const struct builtin_description bdesc_pcmpestr[] =
24144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24149 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24150 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24153 static const struct builtin_description bdesc_pcmpistr[] =
24156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24157 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24158 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24159 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24160 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24161 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24162 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24165 /* Special builtins with variable number of arguments. */
24166 static const struct builtin_description bdesc_special_args[] =
24168 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24169 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24175 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24180 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24182 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24183 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24184 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24185 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24187 /* SSE or 3DNow!A */
24188 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24189 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24206 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24212 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24213 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24219 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24220 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24221 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
24238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
24239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
24240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
24241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
24242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
24243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
24244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
24246 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24247 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24248 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24249 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24250 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24251 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24254 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24255 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24256 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24257 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24258 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24259 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24260 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24261 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24264 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
24265 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24266 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24269 /* Builtins with variable number of arguments. */
24270 static const struct builtin_description bdesc_args[] =
24272 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24273 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24274 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24275 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24276 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24277 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24278 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24345 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24346 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24347 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24348 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24350 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24351 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24353 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24354 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24355 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24356 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24357 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24358 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24359 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24362 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24364 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24367 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24368 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24369 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24370 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24371 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24372 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24377 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24379 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24383 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24386 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24390 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24391 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24392 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24422 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24423 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24427 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24429 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24430 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24432 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24437 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24438 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24442 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24444 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24450 /* SSE MMX or 3Dnow!A */
24451 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24452 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24453 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24455 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24456 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24457 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24460 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24463 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24468 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24469 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24470 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24471 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24472 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24473 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24474 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24475 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24476 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24477 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24478 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24479 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24498 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24499 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24505 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24507 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24508 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24540 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24543 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24545 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24548 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24551 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24553 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24554 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24555 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24556 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24557 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24558 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24559 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24560 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24571 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24572 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24574 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24576 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24589 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24591 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24597 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24616 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24621 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24622 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24625 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24626 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24629 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24630 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24631 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24632 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24633 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24634 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24636 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24637 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24647 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24648 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24653 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24654 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24657 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24658 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24660 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24661 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24662 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24663 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24664 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24665 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24746 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24747 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24750 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24751 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24754 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24755 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24756 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24757 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24758 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24761 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24762 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24763 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24764 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24767 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24768 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24770 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24771 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24772 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24773 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24776 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24779 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24780 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24783 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24784 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24787 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24793 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24794 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24795 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24796 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24797 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24798 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24799 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24800 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24801 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24802 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24803 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24804 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24850 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24852 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24854 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24869 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24870 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24871 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24892 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24893 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24895 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24898 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24899 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24900 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24901 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24904 /* FMA4 and XOP. */
24905 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24906 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24907 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24908 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24909 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24910 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24911 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24912 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24913 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24914 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24915 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24916 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24917 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24918 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24919 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24920 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24921 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24922 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24923 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24924 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24925 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24926 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24927 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24928 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24929 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24930 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24931 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24932 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24933 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24934 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24935 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24936 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24937 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24938 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24939 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24940 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24941 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24942 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24943 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
24944 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
24945 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
24946 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
24947 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
24948 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
24949 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
24950 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
24951 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
24952 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
24953 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
24954 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
24955 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
24956 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
24958 static const struct builtin_description bdesc_multi_arg[] =
24960 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
24961 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
24962 UNKNOWN, (int)MULTI_ARG_3_SF },
24963 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
24964 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
24965 UNKNOWN, (int)MULTI_ARG_3_DF },
24967 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
24968 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
24969 UNKNOWN, (int)MULTI_ARG_3_SF },
24970 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
24971 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
24972 UNKNOWN, (int)MULTI_ARG_3_DF },
24973 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
24974 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
24975 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24976 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
24977 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
24978 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24980 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
24981 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
24982 UNKNOWN, (int)MULTI_ARG_3_SF },
24983 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
24984 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
24985 UNKNOWN, (int)MULTI_ARG_3_DF },
24986 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
24987 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
24988 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24989 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
24990 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
24991 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
24994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
24995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
25001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
25003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
25004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
25005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
25006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
25007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
25009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
25011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
25013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
25017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
25021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
25024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
25026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
25027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
25028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
25029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25153 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25154 in the current target ISA to allow the user to compile particular modules
25155 with different target specific options that differ from the command line
25158 ix86_init_mmx_sse_builtins (void)
25160 const struct builtin_description * d;
25161 enum ix86_builtin_func_type ftype;
25164 /* Add all special builtins with variable number of operands. */
25165 for (i = 0, d = bdesc_special_args;
25166 i < ARRAY_SIZE (bdesc_special_args);
25172 ftype = (enum ix86_builtin_func_type) d->flag;
25173 def_builtin (d->mask, d->name, ftype, d->code);
25176 /* Add all builtins with variable number of operands. */
25177 for (i = 0, d = bdesc_args;
25178 i < ARRAY_SIZE (bdesc_args);
25184 ftype = (enum ix86_builtin_func_type) d->flag;
25185 def_builtin_const (d->mask, d->name, ftype, d->code);
25188 /* pcmpestr[im] insns. */
25189 for (i = 0, d = bdesc_pcmpestr;
25190 i < ARRAY_SIZE (bdesc_pcmpestr);
25193 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25194 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25196 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25197 def_builtin_const (d->mask, d->name, ftype, d->code);
25200 /* pcmpistr[im] insns. */
25201 for (i = 0, d = bdesc_pcmpistr;
25202 i < ARRAY_SIZE (bdesc_pcmpistr);
25205 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25206 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25208 ftype = INT_FTYPE_V16QI_V16QI_INT;
25209 def_builtin_const (d->mask, d->name, ftype, d->code);
25212 /* comi/ucomi insns. */
25213 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25215 if (d->mask == OPTION_MASK_ISA_SSE2)
25216 ftype = INT_FTYPE_V2DF_V2DF;
25218 ftype = INT_FTYPE_V4SF_V4SF;
25219 def_builtin_const (d->mask, d->name, ftype, d->code);
25223 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25224 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25225 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25226 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25228 /* SSE or 3DNow!A */
25229 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25230 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25231 IX86_BUILTIN_MASKMOVQ);
25234 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25235 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25237 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25238 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25239 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25240 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25243 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25244 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25245 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25246 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25249 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25250 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25251 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25252 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25253 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25254 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25255 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25256 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25257 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25258 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25259 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25260 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25263 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25264 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25266 /* MMX access to the vec_init patterns. */
25267 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25268 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25270 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25271 V4HI_FTYPE_HI_HI_HI_HI,
25272 IX86_BUILTIN_VEC_INIT_V4HI);
25274 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25275 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25276 IX86_BUILTIN_VEC_INIT_V8QI);
25278 /* Access to the vec_extract patterns. */
25279 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25280 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25281 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25282 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25283 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25284 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25285 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25286 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25287 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25288 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25290 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25291 "__builtin_ia32_vec_ext_v4hi",
25292 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25294 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25295 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25297 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25298 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25300 /* Access to the vec_set patterns. */
25301 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25302 "__builtin_ia32_vec_set_v2di",
25303 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25305 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25306 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25308 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25309 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25311 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25312 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25314 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25315 "__builtin_ia32_vec_set_v4hi",
25316 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25318 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25319 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25321 /* Add FMA4 multi-arg argument instructions */
25322 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25327 ftype = (enum ix86_builtin_func_type) d->flag;
25328 def_builtin_const (d->mask, d->name, ftype, d->code);
25332 /* Internal method for ix86_init_builtins. */
25335 ix86_init_builtins_va_builtins_abi (void)
25337 tree ms_va_ref, sysv_va_ref;
25338 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25339 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25340 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25341 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25345 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25346 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25347 ms_va_ref = build_reference_type (ms_va_list_type_node);
25349 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25352 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25353 fnvoid_va_start_ms =
25354 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25355 fnvoid_va_end_sysv =
25356 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25357 fnvoid_va_start_sysv =
25358 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25360 fnvoid_va_copy_ms =
25361 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25363 fnvoid_va_copy_sysv =
25364 build_function_type_list (void_type_node, sysv_va_ref,
25365 sysv_va_ref, NULL_TREE);
25367 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25368 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25369 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25370 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25371 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25372 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25373 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25374 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25375 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25376 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25377 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25378 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25382 ix86_init_builtin_types (void)
25384 tree float128_type_node, float80_type_node;
25386 /* The __float80 type. */
25387 float80_type_node = long_double_type_node;
25388 if (TYPE_MODE (float80_type_node) != XFmode)
25390 /* The __float80 type. */
25391 float80_type_node = make_node (REAL_TYPE);
25393 TYPE_PRECISION (float80_type_node) = 80;
25394 layout_type (float80_type_node);
25396 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25398 /* The __float128 type. */
25399 float128_type_node = make_node (REAL_TYPE);
25400 TYPE_PRECISION (float128_type_node) = 128;
25401 layout_type (float128_type_node);
25402 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25404 /* This macro is built by i386-builtin-types.awk. */
25405 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25409 ix86_init_builtins (void)
25413 ix86_init_builtin_types ();
25415 /* TFmode support builtins. */
25416 def_builtin_const (0, "__builtin_infq",
25417 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25418 def_builtin_const (0, "__builtin_huge_valq",
25419 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25421 /* We will expand them to normal call if SSE2 isn't available since
25422 they are used by libgcc. */
25423 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25424 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25425 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25426 TREE_READONLY (t) = 1;
25427 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25429 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25430 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25431 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25432 TREE_READONLY (t) = 1;
25433 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25435 ix86_init_mmx_sse_builtins ();
25438 ix86_init_builtins_va_builtins_abi ();
25440 #ifdef SUBTARGET_INIT_BUILTINS
25441 SUBTARGET_INIT_BUILTINS;
25445 /* Return the ix86 builtin for CODE. */
25448 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25450 if (code >= IX86_BUILTIN_MAX)
25451 return error_mark_node;
25453 return ix86_builtins[code];
25456 /* Errors in the source file can cause expand_expr to return const0_rtx
25457 where we expect a vector. To avoid crashing, use one of the vector
25458 clear instructions. */
25460 safe_vector_operand (rtx x, enum machine_mode mode)
25462 if (x == const0_rtx)
25463 x = CONST0_RTX (mode);
25467 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25470 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25473 tree arg0 = CALL_EXPR_ARG (exp, 0);
25474 tree arg1 = CALL_EXPR_ARG (exp, 1);
25475 rtx op0 = expand_normal (arg0);
25476 rtx op1 = expand_normal (arg1);
25477 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25478 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25479 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25481 if (VECTOR_MODE_P (mode0))
25482 op0 = safe_vector_operand (op0, mode0);
25483 if (VECTOR_MODE_P (mode1))
25484 op1 = safe_vector_operand (op1, mode1);
25486 if (optimize || !target
25487 || GET_MODE (target) != tmode
25488 || !insn_data[icode].operand[0].predicate (target, tmode))
25489 target = gen_reg_rtx (tmode);
25491 if (GET_MODE (op1) == SImode && mode1 == TImode)
25493 rtx x = gen_reg_rtx (V4SImode);
25494 emit_insn (gen_sse2_loadd (x, op1));
25495 op1 = gen_lowpart (TImode, x);
25498 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25499 op0 = copy_to_mode_reg (mode0, op0);
25500 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25501 op1 = copy_to_mode_reg (mode1, op1);
25503 pat = GEN_FCN (icode) (target, op0, op1);
25512 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25515 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25516 enum ix86_builtin_func_type m_type,
25517 enum rtx_code sub_code)
25522 bool comparison_p = false;
25524 bool last_arg_constant = false;
25525 int num_memory = 0;
25528 enum machine_mode mode;
25531 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25535 case MULTI_ARG_4_DF2_DI_I:
25536 case MULTI_ARG_4_DF2_DI_I1:
25537 case MULTI_ARG_4_SF2_SI_I:
25538 case MULTI_ARG_4_SF2_SI_I1:
25540 last_arg_constant = true;
25543 case MULTI_ARG_3_SF:
25544 case MULTI_ARG_3_DF:
25545 case MULTI_ARG_3_SF2:
25546 case MULTI_ARG_3_DF2:
25547 case MULTI_ARG_3_DI:
25548 case MULTI_ARG_3_SI:
25549 case MULTI_ARG_3_SI_DI:
25550 case MULTI_ARG_3_HI:
25551 case MULTI_ARG_3_HI_SI:
25552 case MULTI_ARG_3_QI:
25553 case MULTI_ARG_3_DI2:
25554 case MULTI_ARG_3_SI2:
25555 case MULTI_ARG_3_HI2:
25556 case MULTI_ARG_3_QI2:
25560 case MULTI_ARG_2_SF:
25561 case MULTI_ARG_2_DF:
25562 case MULTI_ARG_2_DI:
25563 case MULTI_ARG_2_SI:
25564 case MULTI_ARG_2_HI:
25565 case MULTI_ARG_2_QI:
25569 case MULTI_ARG_2_DI_IMM:
25570 case MULTI_ARG_2_SI_IMM:
25571 case MULTI_ARG_2_HI_IMM:
25572 case MULTI_ARG_2_QI_IMM:
25574 last_arg_constant = true;
25577 case MULTI_ARG_1_SF:
25578 case MULTI_ARG_1_DF:
25579 case MULTI_ARG_1_SF2:
25580 case MULTI_ARG_1_DF2:
25581 case MULTI_ARG_1_DI:
25582 case MULTI_ARG_1_SI:
25583 case MULTI_ARG_1_HI:
25584 case MULTI_ARG_1_QI:
25585 case MULTI_ARG_1_SI_DI:
25586 case MULTI_ARG_1_HI_DI:
25587 case MULTI_ARG_1_HI_SI:
25588 case MULTI_ARG_1_QI_DI:
25589 case MULTI_ARG_1_QI_SI:
25590 case MULTI_ARG_1_QI_HI:
25594 case MULTI_ARG_2_DI_CMP:
25595 case MULTI_ARG_2_SI_CMP:
25596 case MULTI_ARG_2_HI_CMP:
25597 case MULTI_ARG_2_QI_CMP:
25599 comparison_p = true;
25602 case MULTI_ARG_2_SF_TF:
25603 case MULTI_ARG_2_DF_TF:
25604 case MULTI_ARG_2_DI_TF:
25605 case MULTI_ARG_2_SI_TF:
25606 case MULTI_ARG_2_HI_TF:
25607 case MULTI_ARG_2_QI_TF:
25613 gcc_unreachable ();
25616 if (optimize || !target
25617 || GET_MODE (target) != tmode
25618 || !insn_data[icode].operand[0].predicate (target, tmode))
25619 target = gen_reg_rtx (tmode);
25621 gcc_assert (nargs <= 4);
25623 for (i = 0; i < nargs; i++)
25625 tree arg = CALL_EXPR_ARG (exp, i);
25626 rtx op = expand_normal (arg);
25627 int adjust = (comparison_p) ? 1 : 0;
25628 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25630 if (last_arg_constant && i == nargs-1)
25632 if (!CONST_INT_P (op))
25634 error ("last argument must be an immediate");
25635 return gen_reg_rtx (tmode);
25640 if (VECTOR_MODE_P (mode))
25641 op = safe_vector_operand (op, mode);
25643 /* If we aren't optimizing, only allow one memory operand to be
25645 if (memory_operand (op, mode))
25648 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25651 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25653 op = force_reg (mode, op);
25657 args[i].mode = mode;
25663 pat = GEN_FCN (icode) (target, args[0].op);
25668 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25669 GEN_INT ((int)sub_code));
25670 else if (! comparison_p)
25671 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25674 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25678 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25683 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25687 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25691 gcc_unreachable ();
25701 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25702 insns with vec_merge. */
25705 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25709 tree arg0 = CALL_EXPR_ARG (exp, 0);
25710 rtx op1, op0 = expand_normal (arg0);
25711 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25712 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25714 if (optimize || !target
25715 || GET_MODE (target) != tmode
25716 || !insn_data[icode].operand[0].predicate (target, tmode))
25717 target = gen_reg_rtx (tmode);
25719 if (VECTOR_MODE_P (mode0))
25720 op0 = safe_vector_operand (op0, mode0);
25722 if ((optimize && !register_operand (op0, mode0))
25723 || !insn_data[icode].operand[1].predicate (op0, mode0))
25724 op0 = copy_to_mode_reg (mode0, op0);
25727 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25728 op1 = copy_to_mode_reg (mode0, op1);
25730 pat = GEN_FCN (icode) (target, op0, op1);
25737 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25740 ix86_expand_sse_compare (const struct builtin_description *d,
25741 tree exp, rtx target, bool swap)
25744 tree arg0 = CALL_EXPR_ARG (exp, 0);
25745 tree arg1 = CALL_EXPR_ARG (exp, 1);
25746 rtx op0 = expand_normal (arg0);
25747 rtx op1 = expand_normal (arg1);
25749 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25750 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25751 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25752 enum rtx_code comparison = d->comparison;
25754 if (VECTOR_MODE_P (mode0))
25755 op0 = safe_vector_operand (op0, mode0);
25756 if (VECTOR_MODE_P (mode1))
25757 op1 = safe_vector_operand (op1, mode1);
25759 /* Swap operands if we have a comparison that isn't available in
25763 rtx tmp = gen_reg_rtx (mode1);
25764 emit_move_insn (tmp, op1);
25769 if (optimize || !target
25770 || GET_MODE (target) != tmode
25771 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25772 target = gen_reg_rtx (tmode);
25774 if ((optimize && !register_operand (op0, mode0))
25775 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25776 op0 = copy_to_mode_reg (mode0, op0);
25777 if ((optimize && !register_operand (op1, mode1))
25778 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25779 op1 = copy_to_mode_reg (mode1, op1);
25781 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25782 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25789 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25792 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25796 tree arg0 = CALL_EXPR_ARG (exp, 0);
25797 tree arg1 = CALL_EXPR_ARG (exp, 1);
25798 rtx op0 = expand_normal (arg0);
25799 rtx op1 = expand_normal (arg1);
25800 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25801 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25802 enum rtx_code comparison = d->comparison;
25804 if (VECTOR_MODE_P (mode0))
25805 op0 = safe_vector_operand (op0, mode0);
25806 if (VECTOR_MODE_P (mode1))
25807 op1 = safe_vector_operand (op1, mode1);
25809 /* Swap operands if we have a comparison that isn't available in
25811 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25818 target = gen_reg_rtx (SImode);
25819 emit_move_insn (target, const0_rtx);
25820 target = gen_rtx_SUBREG (QImode, target, 0);
25822 if ((optimize && !register_operand (op0, mode0))
25823 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25824 op0 = copy_to_mode_reg (mode0, op0);
25825 if ((optimize && !register_operand (op1, mode1))
25826 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25827 op1 = copy_to_mode_reg (mode1, op1);
25829 pat = GEN_FCN (d->icode) (op0, op1);
25833 emit_insn (gen_rtx_SET (VOIDmode,
25834 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25835 gen_rtx_fmt_ee (comparison, QImode,
25839 return SUBREG_REG (target);
25842 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25845 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25849 tree arg0 = CALL_EXPR_ARG (exp, 0);
25850 tree arg1 = CALL_EXPR_ARG (exp, 1);
25851 rtx op0 = expand_normal (arg0);
25852 rtx op1 = expand_normal (arg1);
25853 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25854 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25855 enum rtx_code comparison = d->comparison;
25857 if (VECTOR_MODE_P (mode0))
25858 op0 = safe_vector_operand (op0, mode0);
25859 if (VECTOR_MODE_P (mode1))
25860 op1 = safe_vector_operand (op1, mode1);
25862 target = gen_reg_rtx (SImode);
25863 emit_move_insn (target, const0_rtx);
25864 target = gen_rtx_SUBREG (QImode, target, 0);
25866 if ((optimize && !register_operand (op0, mode0))
25867 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25868 op0 = copy_to_mode_reg (mode0, op0);
25869 if ((optimize && !register_operand (op1, mode1))
25870 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25871 op1 = copy_to_mode_reg (mode1, op1);
25873 pat = GEN_FCN (d->icode) (op0, op1);
25877 emit_insn (gen_rtx_SET (VOIDmode,
25878 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25879 gen_rtx_fmt_ee (comparison, QImode,
25883 return SUBREG_REG (target);
25886 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
25889 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
25890 tree exp, rtx target)
25893 tree arg0 = CALL_EXPR_ARG (exp, 0);
25894 tree arg1 = CALL_EXPR_ARG (exp, 1);
25895 tree arg2 = CALL_EXPR_ARG (exp, 2);
25896 tree arg3 = CALL_EXPR_ARG (exp, 3);
25897 tree arg4 = CALL_EXPR_ARG (exp, 4);
25898 rtx scratch0, scratch1;
25899 rtx op0 = expand_normal (arg0);
25900 rtx op1 = expand_normal (arg1);
25901 rtx op2 = expand_normal (arg2);
25902 rtx op3 = expand_normal (arg3);
25903 rtx op4 = expand_normal (arg4);
25904 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
25906 tmode0 = insn_data[d->icode].operand[0].mode;
25907 tmode1 = insn_data[d->icode].operand[1].mode;
25908 modev2 = insn_data[d->icode].operand[2].mode;
25909 modei3 = insn_data[d->icode].operand[3].mode;
25910 modev4 = insn_data[d->icode].operand[4].mode;
25911 modei5 = insn_data[d->icode].operand[5].mode;
25912 modeimm = insn_data[d->icode].operand[6].mode;
25914 if (VECTOR_MODE_P (modev2))
25915 op0 = safe_vector_operand (op0, modev2);
25916 if (VECTOR_MODE_P (modev4))
25917 op2 = safe_vector_operand (op2, modev4);
25919 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25920 op0 = copy_to_mode_reg (modev2, op0);
25921 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
25922 op1 = copy_to_mode_reg (modei3, op1);
25923 if ((optimize && !register_operand (op2, modev4))
25924 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
25925 op2 = copy_to_mode_reg (modev4, op2);
25926 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
25927 op3 = copy_to_mode_reg (modei5, op3);
25929 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
25931 error ("the fifth argument must be a 8-bit immediate");
25935 if (d->code == IX86_BUILTIN_PCMPESTRI128)
25937 if (optimize || !target
25938 || GET_MODE (target) != tmode0
25939 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25940 target = gen_reg_rtx (tmode0);
25942 scratch1 = gen_reg_rtx (tmode1);
25944 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
25946 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
25948 if (optimize || !target
25949 || GET_MODE (target) != tmode1
25950 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25951 target = gen_reg_rtx (tmode1);
25953 scratch0 = gen_reg_rtx (tmode0);
25955 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
25959 gcc_assert (d->flag);
25961 scratch0 = gen_reg_rtx (tmode0);
25962 scratch1 = gen_reg_rtx (tmode1);
25964 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
25974 target = gen_reg_rtx (SImode);
25975 emit_move_insn (target, const0_rtx);
25976 target = gen_rtx_SUBREG (QImode, target, 0);
25979 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25980 gen_rtx_fmt_ee (EQ, QImode,
25981 gen_rtx_REG ((enum machine_mode) d->flag,
25984 return SUBREG_REG (target);
25991 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
25994 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
25995 tree exp, rtx target)
25998 tree arg0 = CALL_EXPR_ARG (exp, 0);
25999 tree arg1 = CALL_EXPR_ARG (exp, 1);
26000 tree arg2 = CALL_EXPR_ARG (exp, 2);
26001 rtx scratch0, scratch1;
26002 rtx op0 = expand_normal (arg0);
26003 rtx op1 = expand_normal (arg1);
26004 rtx op2 = expand_normal (arg2);
26005 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
26007 tmode0 = insn_data[d->icode].operand[0].mode;
26008 tmode1 = insn_data[d->icode].operand[1].mode;
26009 modev2 = insn_data[d->icode].operand[2].mode;
26010 modev3 = insn_data[d->icode].operand[3].mode;
26011 modeimm = insn_data[d->icode].operand[4].mode;
26013 if (VECTOR_MODE_P (modev2))
26014 op0 = safe_vector_operand (op0, modev2);
26015 if (VECTOR_MODE_P (modev3))
26016 op1 = safe_vector_operand (op1, modev3);
26018 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
26019 op0 = copy_to_mode_reg (modev2, op0);
26020 if ((optimize && !register_operand (op1, modev3))
26021 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
26022 op1 = copy_to_mode_reg (modev3, op1);
26024 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
26026 error ("the third argument must be a 8-bit immediate");
26030 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26032 if (optimize || !target
26033 || GET_MODE (target) != tmode0
26034 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26035 target = gen_reg_rtx (tmode0);
26037 scratch1 = gen_reg_rtx (tmode1);
26039 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26041 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26043 if (optimize || !target
26044 || GET_MODE (target) != tmode1
26045 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26046 target = gen_reg_rtx (tmode1);
26048 scratch0 = gen_reg_rtx (tmode0);
26050 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26054 gcc_assert (d->flag);
26056 scratch0 = gen_reg_rtx (tmode0);
26057 scratch1 = gen_reg_rtx (tmode1);
26059 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26069 target = gen_reg_rtx (SImode);
26070 emit_move_insn (target, const0_rtx);
26071 target = gen_rtx_SUBREG (QImode, target, 0);
26074 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26075 gen_rtx_fmt_ee (EQ, QImode,
26076 gen_rtx_REG ((enum machine_mode) d->flag,
26079 return SUBREG_REG (target);
26085 /* Subroutine of ix86_expand_builtin to take care of insns with
26086 variable number of operands. */
26089 ix86_expand_args_builtin (const struct builtin_description *d,
26090 tree exp, rtx target)
26092 rtx pat, real_target;
26093 unsigned int i, nargs;
26094 unsigned int nargs_constant = 0;
26095 int num_memory = 0;
26099 enum machine_mode mode;
26101 bool last_arg_count = false;
26102 enum insn_code icode = d->icode;
26103 const struct insn_data_d *insn_p = &insn_data[icode];
26104 enum machine_mode tmode = insn_p->operand[0].mode;
26105 enum machine_mode rmode = VOIDmode;
26107 enum rtx_code comparison = d->comparison;
26109 switch ((enum ix86_builtin_func_type) d->flag)
26111 case INT_FTYPE_V8SF_V8SF_PTEST:
26112 case INT_FTYPE_V4DI_V4DI_PTEST:
26113 case INT_FTYPE_V4DF_V4DF_PTEST:
26114 case INT_FTYPE_V4SF_V4SF_PTEST:
26115 case INT_FTYPE_V2DI_V2DI_PTEST:
26116 case INT_FTYPE_V2DF_V2DF_PTEST:
26117 return ix86_expand_sse_ptest (d, exp, target);
26118 case FLOAT128_FTYPE_FLOAT128:
26119 case FLOAT_FTYPE_FLOAT:
26120 case INT_FTYPE_INT:
26121 case UINT64_FTYPE_INT:
26122 case UINT16_FTYPE_UINT16:
26123 case INT64_FTYPE_INT64:
26124 case INT64_FTYPE_V4SF:
26125 case INT64_FTYPE_V2DF:
26126 case INT_FTYPE_V16QI:
26127 case INT_FTYPE_V8QI:
26128 case INT_FTYPE_V8SF:
26129 case INT_FTYPE_V4DF:
26130 case INT_FTYPE_V4SF:
26131 case INT_FTYPE_V2DF:
26132 case V16QI_FTYPE_V16QI:
26133 case V8SI_FTYPE_V8SF:
26134 case V8SI_FTYPE_V4SI:
26135 case V8HI_FTYPE_V8HI:
26136 case V8HI_FTYPE_V16QI:
26137 case V8QI_FTYPE_V8QI:
26138 case V8SF_FTYPE_V8SF:
26139 case V8SF_FTYPE_V8SI:
26140 case V8SF_FTYPE_V4SF:
26141 case V8SF_FTYPE_V8HI:
26142 case V4SI_FTYPE_V4SI:
26143 case V4SI_FTYPE_V16QI:
26144 case V4SI_FTYPE_V4SF:
26145 case V4SI_FTYPE_V8SI:
26146 case V4SI_FTYPE_V8HI:
26147 case V4SI_FTYPE_V4DF:
26148 case V4SI_FTYPE_V2DF:
26149 case V4HI_FTYPE_V4HI:
26150 case V4DF_FTYPE_V4DF:
26151 case V4DF_FTYPE_V4SI:
26152 case V4DF_FTYPE_V4SF:
26153 case V4DF_FTYPE_V2DF:
26154 case V4SF_FTYPE_V4SF:
26155 case V4SF_FTYPE_V4SI:
26156 case V4SF_FTYPE_V8SF:
26157 case V4SF_FTYPE_V4DF:
26158 case V4SF_FTYPE_V8HI:
26159 case V4SF_FTYPE_V2DF:
26160 case V2DI_FTYPE_V2DI:
26161 case V2DI_FTYPE_V16QI:
26162 case V2DI_FTYPE_V8HI:
26163 case V2DI_FTYPE_V4SI:
26164 case V2DF_FTYPE_V2DF:
26165 case V2DF_FTYPE_V4SI:
26166 case V2DF_FTYPE_V4DF:
26167 case V2DF_FTYPE_V4SF:
26168 case V2DF_FTYPE_V2SI:
26169 case V2SI_FTYPE_V2SI:
26170 case V2SI_FTYPE_V4SF:
26171 case V2SI_FTYPE_V2SF:
26172 case V2SI_FTYPE_V2DF:
26173 case V2SF_FTYPE_V2SF:
26174 case V2SF_FTYPE_V2SI:
26177 case V4SF_FTYPE_V4SF_VEC_MERGE:
26178 case V2DF_FTYPE_V2DF_VEC_MERGE:
26179 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26180 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26181 case V16QI_FTYPE_V16QI_V16QI:
26182 case V16QI_FTYPE_V8HI_V8HI:
26183 case V8QI_FTYPE_V8QI_V8QI:
26184 case V8QI_FTYPE_V4HI_V4HI:
26185 case V8HI_FTYPE_V8HI_V8HI:
26186 case V8HI_FTYPE_V16QI_V16QI:
26187 case V8HI_FTYPE_V4SI_V4SI:
26188 case V8SF_FTYPE_V8SF_V8SF:
26189 case V8SF_FTYPE_V8SF_V8SI:
26190 case V4SI_FTYPE_V4SI_V4SI:
26191 case V4SI_FTYPE_V8HI_V8HI:
26192 case V4SI_FTYPE_V4SF_V4SF:
26193 case V4SI_FTYPE_V2DF_V2DF:
26194 case V4HI_FTYPE_V4HI_V4HI:
26195 case V4HI_FTYPE_V8QI_V8QI:
26196 case V4HI_FTYPE_V2SI_V2SI:
26197 case V4DF_FTYPE_V4DF_V4DF:
26198 case V4DF_FTYPE_V4DF_V4DI:
26199 case V4SF_FTYPE_V4SF_V4SF:
26200 case V4SF_FTYPE_V4SF_V4SI:
26201 case V4SF_FTYPE_V4SF_V2SI:
26202 case V4SF_FTYPE_V4SF_V2DF:
26203 case V4SF_FTYPE_V4SF_DI:
26204 case V4SF_FTYPE_V4SF_SI:
26205 case V2DI_FTYPE_V2DI_V2DI:
26206 case V2DI_FTYPE_V16QI_V16QI:
26207 case V2DI_FTYPE_V4SI_V4SI:
26208 case V2DI_FTYPE_V2DI_V16QI:
26209 case V2DI_FTYPE_V2DF_V2DF:
26210 case V2SI_FTYPE_V2SI_V2SI:
26211 case V2SI_FTYPE_V4HI_V4HI:
26212 case V2SI_FTYPE_V2SF_V2SF:
26213 case V2DF_FTYPE_V2DF_V2DF:
26214 case V2DF_FTYPE_V2DF_V4SF:
26215 case V2DF_FTYPE_V2DF_V2DI:
26216 case V2DF_FTYPE_V2DF_DI:
26217 case V2DF_FTYPE_V2DF_SI:
26218 case V2SF_FTYPE_V2SF_V2SF:
26219 case V1DI_FTYPE_V1DI_V1DI:
26220 case V1DI_FTYPE_V8QI_V8QI:
26221 case V1DI_FTYPE_V2SI_V2SI:
26222 if (comparison == UNKNOWN)
26223 return ix86_expand_binop_builtin (icode, exp, target);
26226 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26227 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26228 gcc_assert (comparison != UNKNOWN);
26232 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26233 case V8HI_FTYPE_V8HI_SI_COUNT:
26234 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26235 case V4SI_FTYPE_V4SI_SI_COUNT:
26236 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26237 case V4HI_FTYPE_V4HI_SI_COUNT:
26238 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26239 case V2DI_FTYPE_V2DI_SI_COUNT:
26240 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26241 case V2SI_FTYPE_V2SI_SI_COUNT:
26242 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26243 case V1DI_FTYPE_V1DI_SI_COUNT:
26245 last_arg_count = true;
26247 case UINT64_FTYPE_UINT64_UINT64:
26248 case UINT_FTYPE_UINT_UINT:
26249 case UINT_FTYPE_UINT_USHORT:
26250 case UINT_FTYPE_UINT_UCHAR:
26251 case UINT16_FTYPE_UINT16_INT:
26252 case UINT8_FTYPE_UINT8_INT:
26255 case V2DI_FTYPE_V2DI_INT_CONVERT:
26258 nargs_constant = 1;
26260 case V8HI_FTYPE_V8HI_INT:
26261 case V8HI_FTYPE_V8SF_INT:
26262 case V8HI_FTYPE_V4SF_INT:
26263 case V8SF_FTYPE_V8SF_INT:
26264 case V4SI_FTYPE_V4SI_INT:
26265 case V4SI_FTYPE_V8SI_INT:
26266 case V4HI_FTYPE_V4HI_INT:
26267 case V4DF_FTYPE_V4DF_INT:
26268 case V4SF_FTYPE_V4SF_INT:
26269 case V4SF_FTYPE_V8SF_INT:
26270 case V2DI_FTYPE_V2DI_INT:
26271 case V2DF_FTYPE_V2DF_INT:
26272 case V2DF_FTYPE_V4DF_INT:
26274 nargs_constant = 1;
26276 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26277 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26278 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26279 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26280 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26283 case V16QI_FTYPE_V16QI_V16QI_INT:
26284 case V8HI_FTYPE_V8HI_V8HI_INT:
26285 case V8SI_FTYPE_V8SI_V8SI_INT:
26286 case V8SI_FTYPE_V8SI_V4SI_INT:
26287 case V8SF_FTYPE_V8SF_V8SF_INT:
26288 case V8SF_FTYPE_V8SF_V4SF_INT:
26289 case V4SI_FTYPE_V4SI_V4SI_INT:
26290 case V4DF_FTYPE_V4DF_V4DF_INT:
26291 case V4DF_FTYPE_V4DF_V2DF_INT:
26292 case V4SF_FTYPE_V4SF_V4SF_INT:
26293 case V2DI_FTYPE_V2DI_V2DI_INT:
26294 case V2DF_FTYPE_V2DF_V2DF_INT:
26296 nargs_constant = 1;
26298 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26301 nargs_constant = 1;
26303 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26306 nargs_constant = 1;
26308 case V2DI_FTYPE_V2DI_UINT_UINT:
26310 nargs_constant = 2;
26312 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26313 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26314 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26315 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26317 nargs_constant = 1;
26319 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26321 nargs_constant = 2;
26324 gcc_unreachable ();
26327 gcc_assert (nargs <= ARRAY_SIZE (args));
26329 if (comparison != UNKNOWN)
26331 gcc_assert (nargs == 2);
26332 return ix86_expand_sse_compare (d, exp, target, swap);
26335 if (rmode == VOIDmode || rmode == tmode)
26339 || GET_MODE (target) != tmode
26340 || !insn_p->operand[0].predicate (target, tmode))
26341 target = gen_reg_rtx (tmode);
26342 real_target = target;
26346 target = gen_reg_rtx (rmode);
26347 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26350 for (i = 0; i < nargs; i++)
26352 tree arg = CALL_EXPR_ARG (exp, i);
26353 rtx op = expand_normal (arg);
26354 enum machine_mode mode = insn_p->operand[i + 1].mode;
26355 bool match = insn_p->operand[i + 1].predicate (op, mode);
26357 if (last_arg_count && (i + 1) == nargs)
26359 /* SIMD shift insns take either an 8-bit immediate or
26360 register as count. But builtin functions take int as
26361 count. If count doesn't match, we put it in register. */
26364 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26365 if (!insn_p->operand[i + 1].predicate (op, mode))
26366 op = copy_to_reg (op);
26369 else if ((nargs - i) <= nargs_constant)
26374 case CODE_FOR_sse4_1_roundpd:
26375 case CODE_FOR_sse4_1_roundps:
26376 case CODE_FOR_sse4_1_roundsd:
26377 case CODE_FOR_sse4_1_roundss:
26378 case CODE_FOR_sse4_1_blendps:
26379 case CODE_FOR_avx_blendpd256:
26380 case CODE_FOR_avx_vpermilv4df:
26381 case CODE_FOR_avx_roundpd256:
26382 case CODE_FOR_avx_roundps256:
26383 error ("the last argument must be a 4-bit immediate");
26386 case CODE_FOR_sse4_1_blendpd:
26387 case CODE_FOR_avx_vpermilv2df:
26388 case CODE_FOR_xop_vpermil2v2df3:
26389 case CODE_FOR_xop_vpermil2v4sf3:
26390 case CODE_FOR_xop_vpermil2v4df3:
26391 case CODE_FOR_xop_vpermil2v8sf3:
26392 error ("the last argument must be a 2-bit immediate");
26395 case CODE_FOR_avx_vextractf128v4df:
26396 case CODE_FOR_avx_vextractf128v8sf:
26397 case CODE_FOR_avx_vextractf128v8si:
26398 case CODE_FOR_avx_vinsertf128v4df:
26399 case CODE_FOR_avx_vinsertf128v8sf:
26400 case CODE_FOR_avx_vinsertf128v8si:
26401 error ("the last argument must be a 1-bit immediate");
26404 case CODE_FOR_avx_cmpsdv2df3:
26405 case CODE_FOR_avx_cmpssv4sf3:
26406 case CODE_FOR_avx_cmppdv2df3:
26407 case CODE_FOR_avx_cmppsv4sf3:
26408 case CODE_FOR_avx_cmppdv4df3:
26409 case CODE_FOR_avx_cmppsv8sf3:
26410 error ("the last argument must be a 5-bit immediate");
26414 switch (nargs_constant)
26417 if ((nargs - i) == nargs_constant)
26419 error ("the next to last argument must be an 8-bit immediate");
26423 error ("the last argument must be an 8-bit immediate");
26426 gcc_unreachable ();
26433 if (VECTOR_MODE_P (mode))
26434 op = safe_vector_operand (op, mode);
26436 /* If we aren't optimizing, only allow one memory operand to
26438 if (memory_operand (op, mode))
26441 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26443 if (optimize || !match || num_memory > 1)
26444 op = copy_to_mode_reg (mode, op);
26448 op = copy_to_reg (op);
26449 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26454 args[i].mode = mode;
26460 pat = GEN_FCN (icode) (real_target, args[0].op);
26463 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26466 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26470 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26471 args[2].op, args[3].op);
26474 gcc_unreachable ();
26484 /* Subroutine of ix86_expand_builtin to take care of special insns
26485 with variable number of operands. */
26488 ix86_expand_special_args_builtin (const struct builtin_description *d,
26489 tree exp, rtx target)
26493 unsigned int i, nargs, arg_adjust, memory;
26497 enum machine_mode mode;
26499 enum insn_code icode = d->icode;
26500 bool last_arg_constant = false;
26501 const struct insn_data_d *insn_p = &insn_data[icode];
26502 enum machine_mode tmode = insn_p->operand[0].mode;
26503 enum { load, store } klass;
26505 switch ((enum ix86_builtin_func_type) d->flag)
26507 case VOID_FTYPE_VOID:
26508 if (icode == CODE_FOR_avx_vzeroupper)
26509 target = GEN_INT (vzeroupper_intrinsic);
26510 emit_insn (GEN_FCN (icode) (target));
26512 case VOID_FTYPE_UINT64:
26513 case VOID_FTYPE_UNSIGNED:
26519 case UINT64_FTYPE_VOID:
26520 case UNSIGNED_FTYPE_VOID:
26521 case UINT16_FTYPE_VOID:
26526 case UINT64_FTYPE_PUNSIGNED:
26527 case V2DI_FTYPE_PV2DI:
26528 case V32QI_FTYPE_PCCHAR:
26529 case V16QI_FTYPE_PCCHAR:
26530 case V8SF_FTYPE_PCV4SF:
26531 case V8SF_FTYPE_PCFLOAT:
26532 case V4SF_FTYPE_PCFLOAT:
26533 case V4DF_FTYPE_PCV2DF:
26534 case V4DF_FTYPE_PCDOUBLE:
26535 case V2DF_FTYPE_PCDOUBLE:
26536 case VOID_FTYPE_PVOID:
26541 case VOID_FTYPE_PV2SF_V4SF:
26542 case VOID_FTYPE_PV4DI_V4DI:
26543 case VOID_FTYPE_PV2DI_V2DI:
26544 case VOID_FTYPE_PCHAR_V32QI:
26545 case VOID_FTYPE_PCHAR_V16QI:
26546 case VOID_FTYPE_PFLOAT_V8SF:
26547 case VOID_FTYPE_PFLOAT_V4SF:
26548 case VOID_FTYPE_PDOUBLE_V4DF:
26549 case VOID_FTYPE_PDOUBLE_V2DF:
26550 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26551 case VOID_FTYPE_PINT_INT:
26554 /* Reserve memory operand for target. */
26555 memory = ARRAY_SIZE (args);
26557 case V4SF_FTYPE_V4SF_PCV2SF:
26558 case V2DF_FTYPE_V2DF_PCDOUBLE:
26563 case V8SF_FTYPE_PCV8SF_V8SF:
26564 case V4DF_FTYPE_PCV4DF_V4DF:
26565 case V4SF_FTYPE_PCV4SF_V4SF:
26566 case V2DF_FTYPE_PCV2DF_V2DF:
26571 case VOID_FTYPE_PV8SF_V8SF_V8SF:
26572 case VOID_FTYPE_PV4DF_V4DF_V4DF:
26573 case VOID_FTYPE_PV4SF_V4SF_V4SF:
26574 case VOID_FTYPE_PV2DF_V2DF_V2DF:
26577 /* Reserve memory operand for target. */
26578 memory = ARRAY_SIZE (args);
26580 case VOID_FTYPE_UINT_UINT_UINT:
26581 case VOID_FTYPE_UINT64_UINT_UINT:
26582 case UCHAR_FTYPE_UINT_UINT_UINT:
26583 case UCHAR_FTYPE_UINT64_UINT_UINT:
26586 memory = ARRAY_SIZE (args);
26587 last_arg_constant = true;
26590 gcc_unreachable ();
26593 gcc_assert (nargs <= ARRAY_SIZE (args));
26595 if (klass == store)
26597 arg = CALL_EXPR_ARG (exp, 0);
26598 op = expand_normal (arg);
26599 gcc_assert (target == 0);
26601 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26603 target = force_reg (tmode, op);
26611 || GET_MODE (target) != tmode
26612 || !insn_p->operand[0].predicate (target, tmode))
26613 target = gen_reg_rtx (tmode);
26616 for (i = 0; i < nargs; i++)
26618 enum machine_mode mode = insn_p->operand[i + 1].mode;
26621 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26622 op = expand_normal (arg);
26623 match = insn_p->operand[i + 1].predicate (op, mode);
26625 if (last_arg_constant && (i + 1) == nargs)
26629 if (icode == CODE_FOR_lwp_lwpvalsi3
26630 || icode == CODE_FOR_lwp_lwpinssi3
26631 || icode == CODE_FOR_lwp_lwpvaldi3
26632 || icode == CODE_FOR_lwp_lwpinsdi3)
26633 error ("the last argument must be a 32-bit immediate");
26635 error ("the last argument must be an 8-bit immediate");
26643 /* This must be the memory operand. */
26644 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26645 gcc_assert (GET_MODE (op) == mode
26646 || GET_MODE (op) == VOIDmode);
26650 /* This must be register. */
26651 if (VECTOR_MODE_P (mode))
26652 op = safe_vector_operand (op, mode);
26654 gcc_assert (GET_MODE (op) == mode
26655 || GET_MODE (op) == VOIDmode);
26656 op = copy_to_mode_reg (mode, op);
26661 args[i].mode = mode;
26667 pat = GEN_FCN (icode) (target);
26670 pat = GEN_FCN (icode) (target, args[0].op);
26673 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26676 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26679 gcc_unreachable ();
26685 return klass == store ? 0 : target;
26688 /* Return the integer constant in ARG. Constrain it to be in the range
26689 of the subparts of VEC_TYPE; issue an error if not. */
26692 get_element_number (tree vec_type, tree arg)
26694 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26696 if (!host_integerp (arg, 1)
26697 || (elt = tree_low_cst (arg, 1), elt > max))
26699 error ("selector must be an integer constant in the range 0..%wi", max);
26706 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26707 ix86_expand_vector_init. We DO have language-level syntax for this, in
26708 the form of (type){ init-list }. Except that since we can't place emms
26709 instructions from inside the compiler, we can't allow the use of MMX
26710 registers unless the user explicitly asks for it. So we do *not* define
26711 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26712 we have builtins invoked by mmintrin.h that gives us license to emit
26713 these sorts of instructions. */
26716 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26718 enum machine_mode tmode = TYPE_MODE (type);
26719 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26720 int i, n_elt = GET_MODE_NUNITS (tmode);
26721 rtvec v = rtvec_alloc (n_elt);
26723 gcc_assert (VECTOR_MODE_P (tmode));
26724 gcc_assert (call_expr_nargs (exp) == n_elt);
26726 for (i = 0; i < n_elt; ++i)
26728 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26729 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26732 if (!target || !register_operand (target, tmode))
26733 target = gen_reg_rtx (tmode);
26735 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26739 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26740 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26741 had a language-level syntax for referencing vector elements. */
26744 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26746 enum machine_mode tmode, mode0;
26751 arg0 = CALL_EXPR_ARG (exp, 0);
26752 arg1 = CALL_EXPR_ARG (exp, 1);
26754 op0 = expand_normal (arg0);
26755 elt = get_element_number (TREE_TYPE (arg0), arg1);
26757 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26758 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26759 gcc_assert (VECTOR_MODE_P (mode0));
26761 op0 = force_reg (mode0, op0);
26763 if (optimize || !target || !register_operand (target, tmode))
26764 target = gen_reg_rtx (tmode);
26766 ix86_expand_vector_extract (true, target, op0, elt);
26771 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26772 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26773 a language-level syntax for referencing vector elements. */
26776 ix86_expand_vec_set_builtin (tree exp)
26778 enum machine_mode tmode, mode1;
26779 tree arg0, arg1, arg2;
26781 rtx op0, op1, target;
26783 arg0 = CALL_EXPR_ARG (exp, 0);
26784 arg1 = CALL_EXPR_ARG (exp, 1);
26785 arg2 = CALL_EXPR_ARG (exp, 2);
26787 tmode = TYPE_MODE (TREE_TYPE (arg0));
26788 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26789 gcc_assert (VECTOR_MODE_P (tmode));
26791 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26792 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26793 elt = get_element_number (TREE_TYPE (arg0), arg2);
26795 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26796 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26798 op0 = force_reg (tmode, op0);
26799 op1 = force_reg (mode1, op1);
26801 /* OP0 is the source of these builtin functions and shouldn't be
26802 modified. Create a copy, use it and return it as target. */
26803 target = gen_reg_rtx (tmode);
26804 emit_move_insn (target, op0);
26805 ix86_expand_vector_set (true, target, op1, elt);
26810 /* Expand an expression EXP that calls a built-in function,
26811 with result going to TARGET if that's convenient
26812 (and in mode MODE if that's convenient).
26813 SUBTARGET may be used as the target for computing one of EXP's operands.
26814 IGNORE is nonzero if the value is to be ignored. */
26817 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26818 enum machine_mode mode ATTRIBUTE_UNUSED,
26819 int ignore ATTRIBUTE_UNUSED)
26821 const struct builtin_description *d;
26823 enum insn_code icode;
26824 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26825 tree arg0, arg1, arg2;
26826 rtx op0, op1, op2, pat;
26827 enum machine_mode mode0, mode1, mode2;
26828 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26830 /* Determine whether the builtin function is available under the current ISA.
26831 Originally the builtin was not created if it wasn't applicable to the
26832 current ISA based on the command line switches. With function specific
26833 options, we need to check in the context of the function making the call
26834 whether it is supported. */
26835 if (ix86_builtins_isa[fcode].isa
26836 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26838 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26839 NULL, NULL, false);
26842 error ("%qE needs unknown isa option", fndecl);
26845 gcc_assert (opts != NULL);
26846 error ("%qE needs isa option %s", fndecl, opts);
26854 case IX86_BUILTIN_MASKMOVQ:
26855 case IX86_BUILTIN_MASKMOVDQU:
26856 icode = (fcode == IX86_BUILTIN_MASKMOVQ
26857 ? CODE_FOR_mmx_maskmovq
26858 : CODE_FOR_sse2_maskmovdqu);
26859 /* Note the arg order is different from the operand order. */
26860 arg1 = CALL_EXPR_ARG (exp, 0);
26861 arg2 = CALL_EXPR_ARG (exp, 1);
26862 arg0 = CALL_EXPR_ARG (exp, 2);
26863 op0 = expand_normal (arg0);
26864 op1 = expand_normal (arg1);
26865 op2 = expand_normal (arg2);
26866 mode0 = insn_data[icode].operand[0].mode;
26867 mode1 = insn_data[icode].operand[1].mode;
26868 mode2 = insn_data[icode].operand[2].mode;
26870 op0 = force_reg (Pmode, op0);
26871 op0 = gen_rtx_MEM (mode1, op0);
26873 if (!insn_data[icode].operand[0].predicate (op0, mode0))
26874 op0 = copy_to_mode_reg (mode0, op0);
26875 if (!insn_data[icode].operand[1].predicate (op1, mode1))
26876 op1 = copy_to_mode_reg (mode1, op1);
26877 if (!insn_data[icode].operand[2].predicate (op2, mode2))
26878 op2 = copy_to_mode_reg (mode2, op2);
26879 pat = GEN_FCN (icode) (op0, op1, op2);
26885 case IX86_BUILTIN_LDMXCSR:
26886 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
26887 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26888 emit_move_insn (target, op0);
26889 emit_insn (gen_sse_ldmxcsr (target));
26892 case IX86_BUILTIN_STMXCSR:
26893 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26894 emit_insn (gen_sse_stmxcsr (target));
26895 return copy_to_mode_reg (SImode, target);
26897 case IX86_BUILTIN_CLFLUSH:
26898 arg0 = CALL_EXPR_ARG (exp, 0);
26899 op0 = expand_normal (arg0);
26900 icode = CODE_FOR_sse2_clflush;
26901 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26902 op0 = copy_to_mode_reg (Pmode, op0);
26904 emit_insn (gen_sse2_clflush (op0));
26907 case IX86_BUILTIN_MONITOR:
26908 arg0 = CALL_EXPR_ARG (exp, 0);
26909 arg1 = CALL_EXPR_ARG (exp, 1);
26910 arg2 = CALL_EXPR_ARG (exp, 2);
26911 op0 = expand_normal (arg0);
26912 op1 = expand_normal (arg1);
26913 op2 = expand_normal (arg2);
26915 op0 = copy_to_mode_reg (Pmode, op0);
26917 op1 = copy_to_mode_reg (SImode, op1);
26919 op2 = copy_to_mode_reg (SImode, op2);
26920 emit_insn (ix86_gen_monitor (op0, op1, op2));
26923 case IX86_BUILTIN_MWAIT:
26924 arg0 = CALL_EXPR_ARG (exp, 0);
26925 arg1 = CALL_EXPR_ARG (exp, 1);
26926 op0 = expand_normal (arg0);
26927 op1 = expand_normal (arg1);
26929 op0 = copy_to_mode_reg (SImode, op0);
26931 op1 = copy_to_mode_reg (SImode, op1);
26932 emit_insn (gen_sse3_mwait (op0, op1));
26935 case IX86_BUILTIN_VEC_INIT_V2SI:
26936 case IX86_BUILTIN_VEC_INIT_V4HI:
26937 case IX86_BUILTIN_VEC_INIT_V8QI:
26938 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
26940 case IX86_BUILTIN_VEC_EXT_V2DF:
26941 case IX86_BUILTIN_VEC_EXT_V2DI:
26942 case IX86_BUILTIN_VEC_EXT_V4SF:
26943 case IX86_BUILTIN_VEC_EXT_V4SI:
26944 case IX86_BUILTIN_VEC_EXT_V8HI:
26945 case IX86_BUILTIN_VEC_EXT_V2SI:
26946 case IX86_BUILTIN_VEC_EXT_V4HI:
26947 case IX86_BUILTIN_VEC_EXT_V16QI:
26948 return ix86_expand_vec_ext_builtin (exp, target);
26950 case IX86_BUILTIN_VEC_SET_V2DI:
26951 case IX86_BUILTIN_VEC_SET_V4SF:
26952 case IX86_BUILTIN_VEC_SET_V4SI:
26953 case IX86_BUILTIN_VEC_SET_V8HI:
26954 case IX86_BUILTIN_VEC_SET_V4HI:
26955 case IX86_BUILTIN_VEC_SET_V16QI:
26956 return ix86_expand_vec_set_builtin (exp);
26958 case IX86_BUILTIN_VEC_PERM_V2DF:
26959 case IX86_BUILTIN_VEC_PERM_V4SF:
26960 case IX86_BUILTIN_VEC_PERM_V2DI:
26961 case IX86_BUILTIN_VEC_PERM_V4SI:
26962 case IX86_BUILTIN_VEC_PERM_V8HI:
26963 case IX86_BUILTIN_VEC_PERM_V16QI:
26964 case IX86_BUILTIN_VEC_PERM_V2DI_U:
26965 case IX86_BUILTIN_VEC_PERM_V4SI_U:
26966 case IX86_BUILTIN_VEC_PERM_V8HI_U:
26967 case IX86_BUILTIN_VEC_PERM_V16QI_U:
26968 case IX86_BUILTIN_VEC_PERM_V4DF:
26969 case IX86_BUILTIN_VEC_PERM_V8SF:
26970 return ix86_expand_vec_perm_builtin (exp);
26972 case IX86_BUILTIN_INFQ:
26973 case IX86_BUILTIN_HUGE_VALQ:
26975 REAL_VALUE_TYPE inf;
26979 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
26981 tmp = validize_mem (force_const_mem (mode, tmp));
26984 target = gen_reg_rtx (mode);
26986 emit_move_insn (target, tmp);
26990 case IX86_BUILTIN_LLWPCB:
26991 arg0 = CALL_EXPR_ARG (exp, 0);
26992 op0 = expand_normal (arg0);
26993 icode = CODE_FOR_lwp_llwpcb;
26994 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26995 op0 = copy_to_mode_reg (Pmode, op0);
26996 emit_insn (gen_lwp_llwpcb (op0));
26999 case IX86_BUILTIN_SLWPCB:
27000 icode = CODE_FOR_lwp_slwpcb;
27002 || !insn_data[icode].operand[0].predicate (target, Pmode))
27003 target = gen_reg_rtx (Pmode);
27004 emit_insn (gen_lwp_slwpcb (target));
27011 for (i = 0, d = bdesc_special_args;
27012 i < ARRAY_SIZE (bdesc_special_args);
27014 if (d->code == fcode)
27015 return ix86_expand_special_args_builtin (d, exp, target);
27017 for (i = 0, d = bdesc_args;
27018 i < ARRAY_SIZE (bdesc_args);
27020 if (d->code == fcode)
27023 case IX86_BUILTIN_FABSQ:
27024 case IX86_BUILTIN_COPYSIGNQ:
27026 /* Emit a normal call if SSE2 isn't available. */
27027 return expand_call (exp, target, ignore);
27029 return ix86_expand_args_builtin (d, exp, target);
27032 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27033 if (d->code == fcode)
27034 return ix86_expand_sse_comi (d, exp, target);
27036 for (i = 0, d = bdesc_pcmpestr;
27037 i < ARRAY_SIZE (bdesc_pcmpestr);
27039 if (d->code == fcode)
27040 return ix86_expand_sse_pcmpestr (d, exp, target);
27042 for (i = 0, d = bdesc_pcmpistr;
27043 i < ARRAY_SIZE (bdesc_pcmpistr);
27045 if (d->code == fcode)
27046 return ix86_expand_sse_pcmpistr (d, exp, target);
27048 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27049 if (d->code == fcode)
27050 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27051 (enum ix86_builtin_func_type)
27052 d->flag, d->comparison);
27054 gcc_unreachable ();
27057 /* Returns a function decl for a vectorized version of the builtin function
27058 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27059 if it is not available. */
27062 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27065 enum machine_mode in_mode, out_mode;
27067 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27069 if (TREE_CODE (type_out) != VECTOR_TYPE
27070 || TREE_CODE (type_in) != VECTOR_TYPE
27071 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27074 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27075 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27076 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27077 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27081 case BUILT_IN_SQRT:
27082 if (out_mode == DFmode && in_mode == DFmode)
27084 if (out_n == 2 && in_n == 2)
27085 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27086 else if (out_n == 4 && in_n == 4)
27087 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27091 case BUILT_IN_SQRTF:
27092 if (out_mode == SFmode && in_mode == SFmode)
27094 if (out_n == 4 && in_n == 4)
27095 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27096 else if (out_n == 8 && in_n == 8)
27097 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27101 case BUILT_IN_LRINT:
27102 if (out_mode == SImode && out_n == 4
27103 && in_mode == DFmode && in_n == 2)
27104 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27107 case BUILT_IN_LRINTF:
27108 if (out_mode == SImode && in_mode == SFmode)
27110 if (out_n == 4 && in_n == 4)
27111 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27112 else if (out_n == 8 && in_n == 8)
27113 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27117 case BUILT_IN_COPYSIGN:
27118 if (out_mode == DFmode && in_mode == DFmode)
27120 if (out_n == 2 && in_n == 2)
27121 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27122 else if (out_n == 4 && in_n == 4)
27123 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27127 case BUILT_IN_COPYSIGNF:
27128 if (out_mode == SFmode && in_mode == SFmode)
27130 if (out_n == 4 && in_n == 4)
27131 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27132 else if (out_n == 8 && in_n == 8)
27133 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27138 if (out_mode == DFmode && in_mode == DFmode)
27140 if (out_n == 2 && in_n == 2)
27141 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27142 if (out_n == 4 && in_n == 4)
27143 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27147 case BUILT_IN_FMAF:
27148 if (out_mode == SFmode && in_mode == SFmode)
27150 if (out_n == 4 && in_n == 4)
27151 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27152 if (out_n == 8 && in_n == 8)
27153 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27161 /* Dispatch to a handler for a vectorization library. */
27162 if (ix86_veclib_handler)
27163 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27169 /* Handler for an SVML-style interface to
27170 a library with vectorized intrinsics. */
27173 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27176 tree fntype, new_fndecl, args;
27179 enum machine_mode el_mode, in_mode;
27182 /* The SVML is suitable for unsafe math only. */
27183 if (!flag_unsafe_math_optimizations)
27186 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27187 n = TYPE_VECTOR_SUBPARTS (type_out);
27188 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27189 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27190 if (el_mode != in_mode
27198 case BUILT_IN_LOG10:
27200 case BUILT_IN_TANH:
27202 case BUILT_IN_ATAN:
27203 case BUILT_IN_ATAN2:
27204 case BUILT_IN_ATANH:
27205 case BUILT_IN_CBRT:
27206 case BUILT_IN_SINH:
27208 case BUILT_IN_ASINH:
27209 case BUILT_IN_ASIN:
27210 case BUILT_IN_COSH:
27212 case BUILT_IN_ACOSH:
27213 case BUILT_IN_ACOS:
27214 if (el_mode != DFmode || n != 2)
27218 case BUILT_IN_EXPF:
27219 case BUILT_IN_LOGF:
27220 case BUILT_IN_LOG10F:
27221 case BUILT_IN_POWF:
27222 case BUILT_IN_TANHF:
27223 case BUILT_IN_TANF:
27224 case BUILT_IN_ATANF:
27225 case BUILT_IN_ATAN2F:
27226 case BUILT_IN_ATANHF:
27227 case BUILT_IN_CBRTF:
27228 case BUILT_IN_SINHF:
27229 case BUILT_IN_SINF:
27230 case BUILT_IN_ASINHF:
27231 case BUILT_IN_ASINF:
27232 case BUILT_IN_COSHF:
27233 case BUILT_IN_COSF:
27234 case BUILT_IN_ACOSHF:
27235 case BUILT_IN_ACOSF:
27236 if (el_mode != SFmode || n != 4)
27244 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27246 if (fn == BUILT_IN_LOGF)
27247 strcpy (name, "vmlsLn4");
27248 else if (fn == BUILT_IN_LOG)
27249 strcpy (name, "vmldLn2");
27252 sprintf (name, "vmls%s", bname+10);
27253 name[strlen (name)-1] = '4';
27256 sprintf (name, "vmld%s2", bname+10);
27258 /* Convert to uppercase. */
27262 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27263 args = TREE_CHAIN (args))
27267 fntype = build_function_type_list (type_out, type_in, NULL);
27269 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27271 /* Build a function declaration for the vectorized function. */
27272 new_fndecl = build_decl (BUILTINS_LOCATION,
27273 FUNCTION_DECL, get_identifier (name), fntype);
27274 TREE_PUBLIC (new_fndecl) = 1;
27275 DECL_EXTERNAL (new_fndecl) = 1;
27276 DECL_IS_NOVOPS (new_fndecl) = 1;
27277 TREE_READONLY (new_fndecl) = 1;
27282 /* Handler for an ACML-style interface to
27283 a library with vectorized intrinsics. */
27286 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27288 char name[20] = "__vr.._";
27289 tree fntype, new_fndecl, args;
27292 enum machine_mode el_mode, in_mode;
27295 /* The ACML is 64bits only and suitable for unsafe math only as
27296 it does not correctly support parts of IEEE with the required
27297 precision such as denormals. */
27299 || !flag_unsafe_math_optimizations)
27302 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27303 n = TYPE_VECTOR_SUBPARTS (type_out);
27304 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27305 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27306 if (el_mode != in_mode
27316 case BUILT_IN_LOG2:
27317 case BUILT_IN_LOG10:
27320 if (el_mode != DFmode
27325 case BUILT_IN_SINF:
27326 case BUILT_IN_COSF:
27327 case BUILT_IN_EXPF:
27328 case BUILT_IN_POWF:
27329 case BUILT_IN_LOGF:
27330 case BUILT_IN_LOG2F:
27331 case BUILT_IN_LOG10F:
27334 if (el_mode != SFmode
27343 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27344 sprintf (name + 7, "%s", bname+10);
27347 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27348 args = TREE_CHAIN (args))
27352 fntype = build_function_type_list (type_out, type_in, NULL);
27354 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27356 /* Build a function declaration for the vectorized function. */
27357 new_fndecl = build_decl (BUILTINS_LOCATION,
27358 FUNCTION_DECL, get_identifier (name), fntype);
27359 TREE_PUBLIC (new_fndecl) = 1;
27360 DECL_EXTERNAL (new_fndecl) = 1;
27361 DECL_IS_NOVOPS (new_fndecl) = 1;
27362 TREE_READONLY (new_fndecl) = 1;
27368 /* Returns a decl of a function that implements conversion of an integer vector
27369 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27370 are the types involved when converting according to CODE.
27371 Return NULL_TREE if it is not available. */
27374 ix86_vectorize_builtin_conversion (unsigned int code,
27375 tree dest_type, tree src_type)
27383 switch (TYPE_MODE (src_type))
27386 switch (TYPE_MODE (dest_type))
27389 return (TYPE_UNSIGNED (src_type)
27390 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27391 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27393 return (TYPE_UNSIGNED (src_type)
27395 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27401 switch (TYPE_MODE (dest_type))
27404 return (TYPE_UNSIGNED (src_type)
27406 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27415 case FIX_TRUNC_EXPR:
27416 switch (TYPE_MODE (dest_type))
27419 switch (TYPE_MODE (src_type))
27422 return (TYPE_UNSIGNED (dest_type)
27424 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27426 return (TYPE_UNSIGNED (dest_type)
27428 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27435 switch (TYPE_MODE (src_type))
27438 return (TYPE_UNSIGNED (dest_type)
27440 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27457 /* Returns a code for a target-specific builtin that implements
27458 reciprocal of the function, or NULL_TREE if not available. */
27461 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27462 bool sqrt ATTRIBUTE_UNUSED)
27464 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27465 && flag_finite_math_only && !flag_trapping_math
27466 && flag_unsafe_math_optimizations))
27470 /* Machine dependent builtins. */
27473 /* Vectorized version of sqrt to rsqrt conversion. */
27474 case IX86_BUILTIN_SQRTPS_NR:
27475 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27477 case IX86_BUILTIN_SQRTPS_NR256:
27478 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27484 /* Normal builtins. */
27487 /* Sqrt to rsqrt conversion. */
27488 case BUILT_IN_SQRTF:
27489 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27496 /* Helper for avx_vpermilps256_operand et al. This is also used by
27497 the expansion functions to turn the parallel back into a mask.
27498 The return value is 0 for no match and the imm8+1 for a match. */
27501 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27503 unsigned i, nelt = GET_MODE_NUNITS (mode);
27505 unsigned char ipar[8];
27507 if (XVECLEN (par, 0) != (int) nelt)
27510 /* Validate that all of the elements are constants, and not totally
27511 out of range. Copy the data into an integral array to make the
27512 subsequent checks easier. */
27513 for (i = 0; i < nelt; ++i)
27515 rtx er = XVECEXP (par, 0, i);
27516 unsigned HOST_WIDE_INT ei;
27518 if (!CONST_INT_P (er))
27529 /* In the 256-bit DFmode case, we can only move elements within
27531 for (i = 0; i < 2; ++i)
27535 mask |= ipar[i] << i;
27537 for (i = 2; i < 4; ++i)
27541 mask |= (ipar[i] - 2) << i;
27546 /* In the 256-bit SFmode case, we have full freedom of movement
27547 within the low 128-bit lane, but the high 128-bit lane must
27548 mirror the exact same pattern. */
27549 for (i = 0; i < 4; ++i)
27550 if (ipar[i] + 4 != ipar[i + 4])
27557 /* In the 128-bit case, we've full freedom in the placement of
27558 the elements from the source operand. */
27559 for (i = 0; i < nelt; ++i)
27560 mask |= ipar[i] << (i * (nelt / 2));
27564 gcc_unreachable ();
27567 /* Make sure success has a non-zero value by adding one. */
27571 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27572 the expansion functions to turn the parallel back into a mask.
27573 The return value is 0 for no match and the imm8+1 for a match. */
27576 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27578 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27580 unsigned char ipar[8];
27582 if (XVECLEN (par, 0) != (int) nelt)
27585 /* Validate that all of the elements are constants, and not totally
27586 out of range. Copy the data into an integral array to make the
27587 subsequent checks easier. */
27588 for (i = 0; i < nelt; ++i)
27590 rtx er = XVECEXP (par, 0, i);
27591 unsigned HOST_WIDE_INT ei;
27593 if (!CONST_INT_P (er))
27596 if (ei >= 2 * nelt)
27601 /* Validate that the halves of the permute are halves. */
27602 for (i = 0; i < nelt2 - 1; ++i)
27603 if (ipar[i] + 1 != ipar[i + 1])
27605 for (i = nelt2; i < nelt - 1; ++i)
27606 if (ipar[i] + 1 != ipar[i + 1])
27609 /* Reconstruct the mask. */
27610 for (i = 0; i < 2; ++i)
27612 unsigned e = ipar[i * nelt2];
27616 mask |= e << (i * 4);
27619 /* Make sure success has a non-zero value by adding one. */
27624 /* Store OPERAND to the memory after reload is completed. This means
27625 that we can't easily use assign_stack_local. */
27627 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27631 gcc_assert (reload_completed);
27632 if (ix86_using_red_zone ())
27634 result = gen_rtx_MEM (mode,
27635 gen_rtx_PLUS (Pmode,
27637 GEN_INT (-RED_ZONE_SIZE)));
27638 emit_move_insn (result, operand);
27640 else if (TARGET_64BIT)
27646 operand = gen_lowpart (DImode, operand);
27650 gen_rtx_SET (VOIDmode,
27651 gen_rtx_MEM (DImode,
27652 gen_rtx_PRE_DEC (DImode,
27653 stack_pointer_rtx)),
27657 gcc_unreachable ();
27659 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27668 split_double_mode (mode, &operand, 1, operands, operands + 1);
27670 gen_rtx_SET (VOIDmode,
27671 gen_rtx_MEM (SImode,
27672 gen_rtx_PRE_DEC (Pmode,
27673 stack_pointer_rtx)),
27676 gen_rtx_SET (VOIDmode,
27677 gen_rtx_MEM (SImode,
27678 gen_rtx_PRE_DEC (Pmode,
27679 stack_pointer_rtx)),
27684 /* Store HImodes as SImodes. */
27685 operand = gen_lowpart (SImode, operand);
27689 gen_rtx_SET (VOIDmode,
27690 gen_rtx_MEM (GET_MODE (operand),
27691 gen_rtx_PRE_DEC (SImode,
27692 stack_pointer_rtx)),
27696 gcc_unreachable ();
27698 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27703 /* Free operand from the memory. */
27705 ix86_free_from_memory (enum machine_mode mode)
27707 if (!ix86_using_red_zone ())
27711 if (mode == DImode || TARGET_64BIT)
27715 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27716 to pop or add instruction if registers are available. */
27717 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27718 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27723 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
27724 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
27726 static const reg_class_t *
27727 i386_ira_cover_classes (void)
27729 static const reg_class_t sse_fpmath_classes[] = {
27730 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
27732 static const reg_class_t no_sse_fpmath_classes[] = {
27733 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
27736 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
27739 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
27741 Put float CONST_DOUBLE in the constant pool instead of fp regs.
27742 QImode must go into class Q_REGS.
27743 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
27744 movdf to do mem-to-mem moves through integer regs. */
27747 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
27749 enum machine_mode mode = GET_MODE (x);
27751 /* We're only allowed to return a subclass of CLASS. Many of the
27752 following checks fail for NO_REGS, so eliminate that early. */
27753 if (regclass == NO_REGS)
27756 /* All classes can load zeros. */
27757 if (x == CONST0_RTX (mode))
27760 /* Force constants into memory if we are loading a (nonzero) constant into
27761 an MMX or SSE register. This is because there are no MMX/SSE instructions
27762 to load from a constant. */
27764 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
27767 /* Prefer SSE regs only, if we can use them for math. */
27768 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
27769 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
27771 /* Floating-point constants need more complex checks. */
27772 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
27774 /* General regs can load everything. */
27775 if (reg_class_subset_p (regclass, GENERAL_REGS))
27778 /* Floats can load 0 and 1 plus some others. Note that we eliminated
27779 zero above. We only want to wind up preferring 80387 registers if
27780 we plan on doing computation with them. */
27782 && standard_80387_constant_p (x))
27784 /* Limit class to non-sse. */
27785 if (regclass == FLOAT_SSE_REGS)
27787 if (regclass == FP_TOP_SSE_REGS)
27789 if (regclass == FP_SECOND_SSE_REGS)
27790 return FP_SECOND_REG;
27791 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
27798 /* Generally when we see PLUS here, it's the function invariant
27799 (plus soft-fp const_int). Which can only be computed into general
27801 if (GET_CODE (x) == PLUS)
27802 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
27804 /* QImode constants are easy to load, but non-constant QImode data
27805 must go into Q_REGS. */
27806 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
27808 if (reg_class_subset_p (regclass, Q_REGS))
27810 if (reg_class_subset_p (Q_REGS, regclass))
27818 /* Discourage putting floating-point values in SSE registers unless
27819 SSE math is being used, and likewise for the 387 registers. */
27821 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
27823 enum machine_mode mode = GET_MODE (x);
27825 /* Restrict the output reload class to the register bank that we are doing
27826 math on. If we would like not to return a subset of CLASS, reject this
27827 alternative: if reload cannot do this, it will still use its choice. */
27828 mode = GET_MODE (x);
27829 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
27830 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
27832 if (X87_FLOAT_MODE_P (mode))
27834 if (regclass == FP_TOP_SSE_REGS)
27836 else if (regclass == FP_SECOND_SSE_REGS)
27837 return FP_SECOND_REG;
27839 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
27846 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
27847 enum machine_mode mode,
27848 secondary_reload_info *sri ATTRIBUTE_UNUSED)
27850 /* QImode spills from non-QI registers require
27851 intermediate register on 32bit targets. */
27852 if (!in_p && mode == QImode && !TARGET_64BIT
27853 && (rclass == GENERAL_REGS
27854 || rclass == LEGACY_REGS
27855 || rclass == INDEX_REGS))
27864 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
27865 regno = true_regnum (x);
27867 /* Return Q_REGS if the operand is in memory. */
27875 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
27878 ix86_class_likely_spilled_p (reg_class_t rclass)
27889 case SSE_FIRST_REG:
27891 case FP_SECOND_REG:
27901 /* If we are copying between general and FP registers, we need a memory
27902 location. The same is true for SSE and MMX registers.
27904 To optimize register_move_cost performance, allow inline variant.
27906 The macro can't work reliably when one of the CLASSES is class containing
27907 registers from multiple units (SSE, MMX, integer). We avoid this by never
27908 combining those units in single alternative in the machine description.
27909 Ensure that this constraint holds to avoid unexpected surprises.
27911 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
27912 enforce these sanity checks. */
27915 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
27916 enum machine_mode mode, int strict)
27918 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
27919 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
27920 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
27921 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
27922 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
27923 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
27925 gcc_assert (!strict);
27929 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
27932 /* ??? This is a lie. We do have moves between mmx/general, and for
27933 mmx/sse2. But by saying we need secondary memory we discourage the
27934 register allocator from using the mmx registers unless needed. */
27935 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
27938 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27940 /* SSE1 doesn't have any direct moves from other classes. */
27944 /* If the target says that inter-unit moves are more expensive
27945 than moving through memory, then don't generate them. */
27946 if (!TARGET_INTER_UNIT_MOVES)
27949 /* Between SSE and general, we have moves no larger than word size. */
27950 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
27958 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
27959 enum machine_mode mode, int strict)
27961 return inline_secondary_memory_needed (class1, class2, mode, strict);
27964 /* Return true if the registers in CLASS cannot represent the change from
27965 modes FROM to TO. */
27968 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
27969 enum reg_class regclass)
27974 /* x87 registers can't do subreg at all, as all values are reformatted
27975 to extended precision. */
27976 if (MAYBE_FLOAT_CLASS_P (regclass))
27979 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
27981 /* Vector registers do not support QI or HImode loads. If we don't
27982 disallow a change to these modes, reload will assume it's ok to
27983 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
27984 the vec_dupv4hi pattern. */
27985 if (GET_MODE_SIZE (from) < 4)
27988 /* Vector registers do not support subreg with nonzero offsets, which
27989 are otherwise valid for integer registers. Since we can't see
27990 whether we have a nonzero offset from here, prohibit all
27991 nonparadoxical subregs changing size. */
27992 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
27999 /* Return the cost of moving data of mode M between a
28000 register and memory. A value of 2 is the default; this cost is
28001 relative to those in `REGISTER_MOVE_COST'.
28003 This function is used extensively by register_move_cost that is used to
28004 build tables at startup. Make it inline in this case.
28005 When IN is 2, return maximum of in and out move cost.
28007 If moving between registers and memory is more expensive than
28008 between two registers, you should define this macro to express the
28011 Model also increased moving costs of QImode registers in non
28015 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
28019 if (FLOAT_CLASS_P (regclass))
28037 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28038 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28040 if (SSE_CLASS_P (regclass))
28043 switch (GET_MODE_SIZE (mode))
28058 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28059 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28061 if (MMX_CLASS_P (regclass))
28064 switch (GET_MODE_SIZE (mode))
28076 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28077 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28079 switch (GET_MODE_SIZE (mode))
28082 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28085 return ix86_cost->int_store[0];
28086 if (TARGET_PARTIAL_REG_DEPENDENCY
28087 && optimize_function_for_speed_p (cfun))
28088 cost = ix86_cost->movzbl_load;
28090 cost = ix86_cost->int_load[0];
28092 return MAX (cost, ix86_cost->int_store[0]);
28098 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28100 return ix86_cost->movzbl_load;
28102 return ix86_cost->int_store[0] + 4;
28107 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28108 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28110 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28111 if (mode == TFmode)
28114 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28116 cost = ix86_cost->int_load[2];
28118 cost = ix86_cost->int_store[2];
28119 return (cost * (((int) GET_MODE_SIZE (mode)
28120 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28125 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28128 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28132 /* Return the cost of moving data from a register in class CLASS1 to
28133 one in class CLASS2.
28135 It is not required that the cost always equal 2 when FROM is the same as TO;
28136 on some machines it is expensive to move between registers if they are not
28137 general registers. */
28140 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28141 reg_class_t class2_i)
28143 enum reg_class class1 = (enum reg_class) class1_i;
28144 enum reg_class class2 = (enum reg_class) class2_i;
28146 /* In case we require secondary memory, compute cost of the store followed
28147 by load. In order to avoid bad register allocation choices, we need
28148 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28150 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28154 cost += inline_memory_move_cost (mode, class1, 2);
28155 cost += inline_memory_move_cost (mode, class2, 2);
28157 /* In case of copying from general_purpose_register we may emit multiple
28158 stores followed by single load causing memory size mismatch stall.
28159 Count this as arbitrarily high cost of 20. */
28160 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28163 /* In the case of FP/MMX moves, the registers actually overlap, and we
28164 have to switch modes in order to treat them differently. */
28165 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28166 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28172 /* Moves between SSE/MMX and integer unit are expensive. */
28173 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28174 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28176 /* ??? By keeping returned value relatively high, we limit the number
28177 of moves between integer and MMX/SSE registers for all targets.
28178 Additionally, high value prevents problem with x86_modes_tieable_p(),
28179 where integer modes in MMX/SSE registers are not tieable
28180 because of missing QImode and HImode moves to, from or between
28181 MMX/SSE registers. */
28182 return MAX (8, ix86_cost->mmxsse_to_integer);
28184 if (MAYBE_FLOAT_CLASS_P (class1))
28185 return ix86_cost->fp_move;
28186 if (MAYBE_SSE_CLASS_P (class1))
28187 return ix86_cost->sse_move;
28188 if (MAYBE_MMX_CLASS_P (class1))
28189 return ix86_cost->mmx_move;
28193 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28196 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28198 /* Flags and only flags can only hold CCmode values. */
28199 if (CC_REGNO_P (regno))
28200 return GET_MODE_CLASS (mode) == MODE_CC;
28201 if (GET_MODE_CLASS (mode) == MODE_CC
28202 || GET_MODE_CLASS (mode) == MODE_RANDOM
28203 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28205 if (FP_REGNO_P (regno))
28206 return VALID_FP_MODE_P (mode);
28207 if (SSE_REGNO_P (regno))
28209 /* We implement the move patterns for all vector modes into and
28210 out of SSE registers, even when no operation instructions
28211 are available. OImode move is available only when AVX is
28213 return ((TARGET_AVX && mode == OImode)
28214 || VALID_AVX256_REG_MODE (mode)
28215 || VALID_SSE_REG_MODE (mode)
28216 || VALID_SSE2_REG_MODE (mode)
28217 || VALID_MMX_REG_MODE (mode)
28218 || VALID_MMX_REG_MODE_3DNOW (mode));
28220 if (MMX_REGNO_P (regno))
28222 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28223 so if the register is available at all, then we can move data of
28224 the given mode into or out of it. */
28225 return (VALID_MMX_REG_MODE (mode)
28226 || VALID_MMX_REG_MODE_3DNOW (mode));
28229 if (mode == QImode)
28231 /* Take care for QImode values - they can be in non-QI regs,
28232 but then they do cause partial register stalls. */
28233 if (regno <= BX_REG || TARGET_64BIT)
28235 if (!TARGET_PARTIAL_REG_STALL)
28237 return reload_in_progress || reload_completed;
28239 /* We handle both integer and floats in the general purpose registers. */
28240 else if (VALID_INT_MODE_P (mode))
28242 else if (VALID_FP_MODE_P (mode))
28244 else if (VALID_DFP_MODE_P (mode))
28246 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28247 on to use that value in smaller contexts, this can easily force a
28248 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28249 supporting DImode, allow it. */
28250 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28256 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28257 tieable integer mode. */
28260 ix86_tieable_integer_mode_p (enum machine_mode mode)
28269 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28272 return TARGET_64BIT;
28279 /* Return true if MODE1 is accessible in a register that can hold MODE2
28280 without copying. That is, all register classes that can hold MODE2
28281 can also hold MODE1. */
28284 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28286 if (mode1 == mode2)
28289 if (ix86_tieable_integer_mode_p (mode1)
28290 && ix86_tieable_integer_mode_p (mode2))
28293 /* MODE2 being XFmode implies fp stack or general regs, which means we
28294 can tie any smaller floating point modes to it. Note that we do not
28295 tie this with TFmode. */
28296 if (mode2 == XFmode)
28297 return mode1 == SFmode || mode1 == DFmode;
28299 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28300 that we can tie it with SFmode. */
28301 if (mode2 == DFmode)
28302 return mode1 == SFmode;
28304 /* If MODE2 is only appropriate for an SSE register, then tie with
28305 any other mode acceptable to SSE registers. */
28306 if (GET_MODE_SIZE (mode2) == 16
28307 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28308 return (GET_MODE_SIZE (mode1) == 16
28309 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28311 /* If MODE2 is appropriate for an MMX register, then tie
28312 with any other mode acceptable to MMX registers. */
28313 if (GET_MODE_SIZE (mode2) == 8
28314 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28315 return (GET_MODE_SIZE (mode1) == 8
28316 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28321 /* Compute a (partial) cost for rtx X. Return true if the complete
28322 cost has been computed, and false if subexpressions should be
28323 scanned. In either case, *TOTAL contains the cost result. */
28326 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28328 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28329 enum machine_mode mode = GET_MODE (x);
28330 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28338 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28340 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28342 else if (flag_pic && SYMBOLIC_CONST (x)
28344 || (!GET_CODE (x) != LABEL_REF
28345 && (GET_CODE (x) != SYMBOL_REF
28346 || !SYMBOL_REF_LOCAL_P (x)))))
28353 if (mode == VOIDmode)
28356 switch (standard_80387_constant_p (x))
28361 default: /* Other constants */
28366 /* Start with (MEM (SYMBOL_REF)), since that's where
28367 it'll probably end up. Add a penalty for size. */
28368 *total = (COSTS_N_INSNS (1)
28369 + (flag_pic != 0 && !TARGET_64BIT)
28370 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28376 /* The zero extensions is often completely free on x86_64, so make
28377 it as cheap as possible. */
28378 if (TARGET_64BIT && mode == DImode
28379 && GET_MODE (XEXP (x, 0)) == SImode)
28381 else if (TARGET_ZERO_EXTEND_WITH_AND)
28382 *total = cost->add;
28384 *total = cost->movzx;
28388 *total = cost->movsx;
28392 if (CONST_INT_P (XEXP (x, 1))
28393 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28395 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28398 *total = cost->add;
28401 if ((value == 2 || value == 3)
28402 && cost->lea <= cost->shift_const)
28404 *total = cost->lea;
28414 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28416 if (CONST_INT_P (XEXP (x, 1)))
28418 if (INTVAL (XEXP (x, 1)) > 32)
28419 *total = cost->shift_const + COSTS_N_INSNS (2);
28421 *total = cost->shift_const * 2;
28425 if (GET_CODE (XEXP (x, 1)) == AND)
28426 *total = cost->shift_var * 2;
28428 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28433 if (CONST_INT_P (XEXP (x, 1)))
28434 *total = cost->shift_const;
28436 *total = cost->shift_var;
28441 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28443 /* ??? SSE scalar cost should be used here. */
28444 *total = cost->fmul;
28447 else if (X87_FLOAT_MODE_P (mode))
28449 *total = cost->fmul;
28452 else if (FLOAT_MODE_P (mode))
28454 /* ??? SSE vector cost should be used here. */
28455 *total = cost->fmul;
28460 rtx op0 = XEXP (x, 0);
28461 rtx op1 = XEXP (x, 1);
28463 if (CONST_INT_P (XEXP (x, 1)))
28465 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28466 for (nbits = 0; value != 0; value &= value - 1)
28470 /* This is arbitrary. */
28473 /* Compute costs correctly for widening multiplication. */
28474 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28475 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28476 == GET_MODE_SIZE (mode))
28478 int is_mulwiden = 0;
28479 enum machine_mode inner_mode = GET_MODE (op0);
28481 if (GET_CODE (op0) == GET_CODE (op1))
28482 is_mulwiden = 1, op1 = XEXP (op1, 0);
28483 else if (CONST_INT_P (op1))
28485 if (GET_CODE (op0) == SIGN_EXTEND)
28486 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28489 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28493 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28496 *total = (cost->mult_init[MODE_INDEX (mode)]
28497 + nbits * cost->mult_bit
28498 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28507 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28508 /* ??? SSE cost should be used here. */
28509 *total = cost->fdiv;
28510 else if (X87_FLOAT_MODE_P (mode))
28511 *total = cost->fdiv;
28512 else if (FLOAT_MODE_P (mode))
28513 /* ??? SSE vector cost should be used here. */
28514 *total = cost->fdiv;
28516 *total = cost->divide[MODE_INDEX (mode)];
28520 if (GET_MODE_CLASS (mode) == MODE_INT
28521 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28523 if (GET_CODE (XEXP (x, 0)) == PLUS
28524 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28525 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28526 && CONSTANT_P (XEXP (x, 1)))
28528 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28529 if (val == 2 || val == 4 || val == 8)
28531 *total = cost->lea;
28532 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28533 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28534 outer_code, speed);
28535 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28539 else if (GET_CODE (XEXP (x, 0)) == MULT
28540 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28542 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28543 if (val == 2 || val == 4 || val == 8)
28545 *total = cost->lea;
28546 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28547 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28551 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28553 *total = cost->lea;
28554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28555 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28556 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28563 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28565 /* ??? SSE cost should be used here. */
28566 *total = cost->fadd;
28569 else if (X87_FLOAT_MODE_P (mode))
28571 *total = cost->fadd;
28574 else if (FLOAT_MODE_P (mode))
28576 /* ??? SSE vector cost should be used here. */
28577 *total = cost->fadd;
28585 if (!TARGET_64BIT && mode == DImode)
28587 *total = (cost->add * 2
28588 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28589 << (GET_MODE (XEXP (x, 0)) != DImode))
28590 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28591 << (GET_MODE (XEXP (x, 1)) != DImode)));
28597 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28599 /* ??? SSE cost should be used here. */
28600 *total = cost->fchs;
28603 else if (X87_FLOAT_MODE_P (mode))
28605 *total = cost->fchs;
28608 else if (FLOAT_MODE_P (mode))
28610 /* ??? SSE vector cost should be used here. */
28611 *total = cost->fchs;
28617 if (!TARGET_64BIT && mode == DImode)
28618 *total = cost->add * 2;
28620 *total = cost->add;
28624 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28625 && XEXP (XEXP (x, 0), 1) == const1_rtx
28626 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28627 && XEXP (x, 1) == const0_rtx)
28629 /* This kind of construct is implemented using test[bwl].
28630 Treat it as if we had an AND. */
28631 *total = (cost->add
28632 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28633 + rtx_cost (const1_rtx, outer_code, speed));
28639 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28644 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28645 /* ??? SSE cost should be used here. */
28646 *total = cost->fabs;
28647 else if (X87_FLOAT_MODE_P (mode))
28648 *total = cost->fabs;
28649 else if (FLOAT_MODE_P (mode))
28650 /* ??? SSE vector cost should be used here. */
28651 *total = cost->fabs;
28655 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28656 /* ??? SSE cost should be used here. */
28657 *total = cost->fsqrt;
28658 else if (X87_FLOAT_MODE_P (mode))
28659 *total = cost->fsqrt;
28660 else if (FLOAT_MODE_P (mode))
28661 /* ??? SSE vector cost should be used here. */
28662 *total = cost->fsqrt;
28666 if (XINT (x, 1) == UNSPEC_TP)
28673 case VEC_DUPLICATE:
28674 /* ??? Assume all of these vector manipulation patterns are
28675 recognizable. In which case they all pretty much have the
28677 *total = COSTS_N_INSNS (1);
28687 static int current_machopic_label_num;
28689 /* Given a symbol name and its associated stub, write out the
28690 definition of the stub. */
28693 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28695 unsigned int length;
28696 char *binder_name, *symbol_name, lazy_ptr_name[32];
28697 int label = ++current_machopic_label_num;
28699 /* For 64-bit we shouldn't get here. */
28700 gcc_assert (!TARGET_64BIT);
28702 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28703 symb = targetm.strip_name_encoding (symb);
28705 length = strlen (stub);
28706 binder_name = XALLOCAVEC (char, length + 32);
28707 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
28709 length = strlen (symb);
28710 symbol_name = XALLOCAVEC (char, length + 32);
28711 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28713 sprintf (lazy_ptr_name, "L%d$lz", label);
28715 if (MACHOPIC_ATT_STUB)
28716 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
28717 else if (MACHOPIC_PURE)
28719 if (TARGET_DEEP_BRANCH_PREDICTION)
28720 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
28722 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
28725 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
28727 fprintf (file, "%s:\n", stub);
28728 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28730 if (MACHOPIC_ATT_STUB)
28732 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
28734 else if (MACHOPIC_PURE)
28737 if (TARGET_DEEP_BRANCH_PREDICTION)
28739 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28740 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
28741 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
28742 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
28746 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
28747 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
28748 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
28750 fprintf (file, "\tjmp\t*%%ecx\n");
28753 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
28755 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
28756 it needs no stub-binding-helper. */
28757 if (MACHOPIC_ATT_STUB)
28760 fprintf (file, "%s:\n", binder_name);
28764 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
28765 fprintf (file, "\tpushl\t%%ecx\n");
28768 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
28770 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
28772 /* N.B. Keep the correspondence of these
28773 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
28774 old-pic/new-pic/non-pic stubs; altering this will break
28775 compatibility with existing dylibs. */
28779 if (TARGET_DEEP_BRANCH_PREDICTION)
28780 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28781 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
28783 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
28784 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
28787 /* 16-byte -mdynamic-no-pic stub. */
28788 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
28790 fprintf (file, "%s:\n", lazy_ptr_name);
28791 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28792 fprintf (file, ASM_LONG "%s\n", binder_name);
28794 #endif /* TARGET_MACHO */
28796 /* Order the registers for register allocator. */
28799 x86_order_regs_for_local_alloc (void)
28804 /* First allocate the local general purpose registers. */
28805 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28806 if (GENERAL_REGNO_P (i) && call_used_regs[i])
28807 reg_alloc_order [pos++] = i;
28809 /* Global general purpose registers. */
28810 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28811 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
28812 reg_alloc_order [pos++] = i;
28814 /* x87 registers come first in case we are doing FP math
28816 if (!TARGET_SSE_MATH)
28817 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
28818 reg_alloc_order [pos++] = i;
28820 /* SSE registers. */
28821 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
28822 reg_alloc_order [pos++] = i;
28823 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
28824 reg_alloc_order [pos++] = i;
28826 /* x87 registers. */
28827 if (TARGET_SSE_MATH)
28828 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
28829 reg_alloc_order [pos++] = i;
28831 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
28832 reg_alloc_order [pos++] = i;
28834 /* Initialize the rest of array as we do not allocate some registers
28836 while (pos < FIRST_PSEUDO_REGISTER)
28837 reg_alloc_order [pos++] = 0;
28840 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
28841 struct attribute_spec.handler. */
28843 ix86_handle_abi_attribute (tree *node, tree name,
28844 tree args ATTRIBUTE_UNUSED,
28845 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28847 if (TREE_CODE (*node) != FUNCTION_TYPE
28848 && TREE_CODE (*node) != METHOD_TYPE
28849 && TREE_CODE (*node) != FIELD_DECL
28850 && TREE_CODE (*node) != TYPE_DECL)
28852 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28854 *no_add_attrs = true;
28859 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
28861 *no_add_attrs = true;
28865 /* Can combine regparm with all attributes but fastcall. */
28866 if (is_attribute_p ("ms_abi", name))
28868 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
28870 error ("ms_abi and sysv_abi attributes are not compatible");
28875 else if (is_attribute_p ("sysv_abi", name))
28877 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
28879 error ("ms_abi and sysv_abi attributes are not compatible");
28888 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28889 struct attribute_spec.handler. */
28891 ix86_handle_struct_attribute (tree *node, tree name,
28892 tree args ATTRIBUTE_UNUSED,
28893 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28896 if (DECL_P (*node))
28898 if (TREE_CODE (*node) == TYPE_DECL)
28899 type = &TREE_TYPE (*node);
28904 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28905 || TREE_CODE (*type) == UNION_TYPE)))
28907 warning (OPT_Wattributes, "%qE attribute ignored",
28909 *no_add_attrs = true;
28912 else if ((is_attribute_p ("ms_struct", name)
28913 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28914 || ((is_attribute_p ("gcc_struct", name)
28915 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28917 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28919 *no_add_attrs = true;
28926 ix86_handle_fndecl_attribute (tree *node, tree name,
28927 tree args ATTRIBUTE_UNUSED,
28928 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28930 if (TREE_CODE (*node) != FUNCTION_DECL)
28932 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28934 *no_add_attrs = true;
28940 ix86_ms_bitfield_layout_p (const_tree record_type)
28942 return ((TARGET_MS_BITFIELD_LAYOUT
28943 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28944 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
28947 /* Returns an expression indicating where the this parameter is
28948 located on entry to the FUNCTION. */
28951 x86_this_parameter (tree function)
28953 tree type = TREE_TYPE (function);
28954 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
28959 const int *parm_regs;
28961 if (ix86_function_type_abi (type) == MS_ABI)
28962 parm_regs = x86_64_ms_abi_int_parameter_registers;
28964 parm_regs = x86_64_int_parameter_registers;
28965 return gen_rtx_REG (DImode, parm_regs[aggr]);
28968 nregs = ix86_function_regparm (type, function);
28970 if (nregs > 0 && !stdarg_p (type))
28974 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
28975 regno = aggr ? DX_REG : CX_REG;
28976 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
28980 return gen_rtx_MEM (SImode,
28981 plus_constant (stack_pointer_rtx, 4));
28990 return gen_rtx_MEM (SImode,
28991 plus_constant (stack_pointer_rtx, 4));
28994 return gen_rtx_REG (SImode, regno);
28997 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
29000 /* Determine whether x86_output_mi_thunk can succeed. */
29003 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
29004 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
29005 HOST_WIDE_INT vcall_offset, const_tree function)
29007 /* 64-bit can handle anything. */
29011 /* For 32-bit, everything's fine if we have one free register. */
29012 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
29015 /* Need a free register for vcall_offset. */
29019 /* Need a free register for GOT references. */
29020 if (flag_pic && !targetm.binds_local_p (function))
29023 /* Otherwise ok. */
29027 /* Output the assembler code for a thunk function. THUNK_DECL is the
29028 declaration for the thunk function itself, FUNCTION is the decl for
29029 the target function. DELTA is an immediate constant offset to be
29030 added to THIS. If VCALL_OFFSET is nonzero, the word at
29031 *(*this + vcall_offset) should be added to THIS. */
29034 x86_output_mi_thunk (FILE *file,
29035 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29036 HOST_WIDE_INT vcall_offset, tree function)
29039 rtx this_param = x86_this_parameter (function);
29042 /* Make sure unwind info is emitted for the thunk if needed. */
29043 final_start_function (emit_barrier (), file, 1);
29045 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29046 pull it in now and let DELTA benefit. */
29047 if (REG_P (this_param))
29048 this_reg = this_param;
29049 else if (vcall_offset)
29051 /* Put the this parameter into %eax. */
29052 xops[0] = this_param;
29053 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29054 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29057 this_reg = NULL_RTX;
29059 /* Adjust the this parameter by a fixed constant. */
29062 xops[0] = GEN_INT (delta);
29063 xops[1] = this_reg ? this_reg : this_param;
29066 if (!x86_64_general_operand (xops[0], DImode))
29068 tmp = gen_rtx_REG (DImode, R10_REG);
29070 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29072 xops[1] = this_param;
29074 if (x86_maybe_negate_const_int (&xops[0], DImode))
29075 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29077 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29079 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29080 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29082 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29085 /* Adjust the this parameter by a value stored in the vtable. */
29089 tmp = gen_rtx_REG (DImode, R10_REG);
29092 int tmp_regno = CX_REG;
29093 if (lookup_attribute ("fastcall",
29094 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29095 || lookup_attribute ("thiscall",
29096 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29097 tmp_regno = AX_REG;
29098 tmp = gen_rtx_REG (SImode, tmp_regno);
29101 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29103 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29105 /* Adjust the this parameter. */
29106 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29107 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29109 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29110 xops[0] = GEN_INT (vcall_offset);
29112 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29113 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29115 xops[1] = this_reg;
29116 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29119 /* If necessary, drop THIS back to its stack slot. */
29120 if (this_reg && this_reg != this_param)
29122 xops[0] = this_reg;
29123 xops[1] = this_param;
29124 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29127 xops[0] = XEXP (DECL_RTL (function), 0);
29130 if (!flag_pic || targetm.binds_local_p (function))
29131 output_asm_insn ("jmp\t%P0", xops);
29132 /* All thunks should be in the same object as their target,
29133 and thus binds_local_p should be true. */
29134 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29135 gcc_unreachable ();
29138 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29139 tmp = gen_rtx_CONST (Pmode, tmp);
29140 tmp = gen_rtx_MEM (QImode, tmp);
29142 output_asm_insn ("jmp\t%A0", xops);
29147 if (!flag_pic || targetm.binds_local_p (function))
29148 output_asm_insn ("jmp\t%P0", xops);
29153 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29154 if (TARGET_MACHO_BRANCH_ISLANDS)
29155 sym_ref = (gen_rtx_SYMBOL_REF
29157 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29158 tmp = gen_rtx_MEM (QImode, sym_ref);
29160 output_asm_insn ("jmp\t%0", xops);
29163 #endif /* TARGET_MACHO */
29165 tmp = gen_rtx_REG (SImode, CX_REG);
29166 output_set_got (tmp, NULL_RTX);
29169 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29170 output_asm_insn ("jmp\t{*}%1", xops);
29173 final_end_function ();
29177 x86_file_start (void)
29179 default_file_start ();
29181 darwin_file_start ();
29183 if (X86_FILE_START_VERSION_DIRECTIVE)
29184 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29185 if (X86_FILE_START_FLTUSED)
29186 fputs ("\t.global\t__fltused\n", asm_out_file);
29187 if (ix86_asm_dialect == ASM_INTEL)
29188 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29192 x86_field_alignment (tree field, int computed)
29194 enum machine_mode mode;
29195 tree type = TREE_TYPE (field);
29197 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29199 mode = TYPE_MODE (strip_array_types (type));
29200 if (mode == DFmode || mode == DCmode
29201 || GET_MODE_CLASS (mode) == MODE_INT
29202 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29203 return MIN (32, computed);
29207 /* Output assembler code to FILE to increment profiler label # LABELNO
29208 for profiling a function entry. */
29210 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29212 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29217 #ifndef NO_PROFILE_COUNTERS
29218 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29221 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29222 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29224 fprintf (file, "\tcall\t%s\n", mcount_name);
29228 #ifndef NO_PROFILE_COUNTERS
29229 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29232 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29236 #ifndef NO_PROFILE_COUNTERS
29237 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29240 fprintf (file, "\tcall\t%s\n", mcount_name);
29244 /* We don't have exact information about the insn sizes, but we may assume
29245 quite safely that we are informed about all 1 byte insns and memory
29246 address sizes. This is enough to eliminate unnecessary padding in
29250 min_insn_size (rtx insn)
29254 if (!INSN_P (insn) || !active_insn_p (insn))
29257 /* Discard alignments we've emit and jump instructions. */
29258 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29259 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29261 if (JUMP_TABLE_DATA_P (insn))
29264 /* Important case - calls are always 5 bytes.
29265 It is common to have many calls in the row. */
29267 && symbolic_reference_mentioned_p (PATTERN (insn))
29268 && !SIBLING_CALL_P (insn))
29270 len = get_attr_length (insn);
29274 /* For normal instructions we rely on get_attr_length being exact,
29275 with a few exceptions. */
29276 if (!JUMP_P (insn))
29278 enum attr_type type = get_attr_type (insn);
29283 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29284 || asm_noperands (PATTERN (insn)) >= 0)
29291 /* Otherwise trust get_attr_length. */
29295 l = get_attr_length_address (insn);
29296 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29305 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29307 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29311 ix86_avoid_jump_mispredicts (void)
29313 rtx insn, start = get_insns ();
29314 int nbytes = 0, njumps = 0;
29317 /* Look for all minimal intervals of instructions containing 4 jumps.
29318 The intervals are bounded by START and INSN. NBYTES is the total
29319 size of instructions in the interval including INSN and not including
29320 START. When the NBYTES is smaller than 16 bytes, it is possible
29321 that the end of START and INSN ends up in the same 16byte page.
29323 The smallest offset in the page INSN can start is the case where START
29324 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29325 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29327 for (insn = start; insn; insn = NEXT_INSN (insn))
29331 if (LABEL_P (insn))
29333 int align = label_to_alignment (insn);
29334 int max_skip = label_to_max_skip (insn);
29338 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29339 already in the current 16 byte page, because otherwise
29340 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29341 bytes to reach 16 byte boundary. */
29343 || (align <= 3 && max_skip != (1 << align) - 1))
29346 fprintf (dump_file, "Label %i with max_skip %i\n",
29347 INSN_UID (insn), max_skip);
29350 while (nbytes + max_skip >= 16)
29352 start = NEXT_INSN (start);
29353 if ((JUMP_P (start)
29354 && GET_CODE (PATTERN (start)) != ADDR_VEC
29355 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29357 njumps--, isjump = 1;
29360 nbytes -= min_insn_size (start);
29366 min_size = min_insn_size (insn);
29367 nbytes += min_size;
29369 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29370 INSN_UID (insn), min_size);
29372 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29373 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29381 start = NEXT_INSN (start);
29382 if ((JUMP_P (start)
29383 && GET_CODE (PATTERN (start)) != ADDR_VEC
29384 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29386 njumps--, isjump = 1;
29389 nbytes -= min_insn_size (start);
29391 gcc_assert (njumps >= 0);
29393 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29394 INSN_UID (start), INSN_UID (insn), nbytes);
29396 if (njumps == 3 && isjump && nbytes < 16)
29398 int padsize = 15 - nbytes + min_insn_size (insn);
29401 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29402 INSN_UID (insn), padsize);
29403 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29409 /* AMD Athlon works faster
29410 when RET is not destination of conditional jump or directly preceded
29411 by other jump instruction. We avoid the penalty by inserting NOP just
29412 before the RET instructions in such cases. */
29414 ix86_pad_returns (void)
29419 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29421 basic_block bb = e->src;
29422 rtx ret = BB_END (bb);
29424 bool replace = false;
29426 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29427 || optimize_bb_for_size_p (bb))
29429 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29430 if (active_insn_p (prev) || LABEL_P (prev))
29432 if (prev && LABEL_P (prev))
29437 FOR_EACH_EDGE (e, ei, bb->preds)
29438 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29439 && !(e->flags & EDGE_FALLTHRU))
29444 prev = prev_active_insn (ret);
29446 && ((JUMP_P (prev) && any_condjump_p (prev))
29449 /* Empty functions get branch mispredict even when the jump destination
29450 is not visible to us. */
29451 if (!prev && !optimize_function_for_size_p (cfun))
29456 emit_jump_insn_before (gen_return_internal_long (), ret);
29462 /* Count the minimum number of instructions in BB. Return 4 if the
29463 number of instructions >= 4. */
29466 ix86_count_insn_bb (basic_block bb)
29469 int insn_count = 0;
29471 /* Count number of instructions in this block. Return 4 if the number
29472 of instructions >= 4. */
29473 FOR_BB_INSNS (bb, insn)
29475 /* Only happen in exit blocks. */
29477 && GET_CODE (PATTERN (insn)) == RETURN)
29480 if (NONDEBUG_INSN_P (insn)
29481 && GET_CODE (PATTERN (insn)) != USE
29482 && GET_CODE (PATTERN (insn)) != CLOBBER)
29485 if (insn_count >= 4)
29494 /* Count the minimum number of instructions in code path in BB.
29495 Return 4 if the number of instructions >= 4. */
29498 ix86_count_insn (basic_block bb)
29502 int min_prev_count;
29504 /* Only bother counting instructions along paths with no
29505 more than 2 basic blocks between entry and exit. Given
29506 that BB has an edge to exit, determine if a predecessor
29507 of BB has an edge from entry. If so, compute the number
29508 of instructions in the predecessor block. If there
29509 happen to be multiple such blocks, compute the minimum. */
29510 min_prev_count = 4;
29511 FOR_EACH_EDGE (e, ei, bb->preds)
29514 edge_iterator prev_ei;
29516 if (e->src == ENTRY_BLOCK_PTR)
29518 min_prev_count = 0;
29521 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29523 if (prev_e->src == ENTRY_BLOCK_PTR)
29525 int count = ix86_count_insn_bb (e->src);
29526 if (count < min_prev_count)
29527 min_prev_count = count;
29533 if (min_prev_count < 4)
29534 min_prev_count += ix86_count_insn_bb (bb);
29536 return min_prev_count;
29539 /* Pad short funtion to 4 instructions. */
29542 ix86_pad_short_function (void)
29547 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29549 rtx ret = BB_END (e->src);
29550 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29552 int insn_count = ix86_count_insn (e->src);
29554 /* Pad short function. */
29555 if (insn_count < 4)
29559 /* Find epilogue. */
29562 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29563 insn = PREV_INSN (insn);
29568 /* Two NOPs are counted as one instruction. */
29569 insn_count = 2 * (4 - insn_count);
29570 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29576 /* Implement machine specific optimizations. We implement padding of returns
29577 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29581 if (optimize && optimize_function_for_speed_p (cfun))
29583 if (TARGET_PAD_SHORT_FUNCTION)
29584 ix86_pad_short_function ();
29585 else if (TARGET_PAD_RETURNS)
29586 ix86_pad_returns ();
29587 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29588 if (TARGET_FOUR_JUMP_LIMIT)
29589 ix86_avoid_jump_mispredicts ();
29593 /* Run the vzeroupper optimization if needed. */
29594 if (cfun->machine->use_vzeroupper_p)
29595 move_or_delete_vzeroupper ();
29598 /* Return nonzero when QImode register that must be represented via REX prefix
29601 x86_extended_QIreg_mentioned_p (rtx insn)
29604 extract_insn_cached (insn);
29605 for (i = 0; i < recog_data.n_operands; i++)
29606 if (REG_P (recog_data.operand[i])
29607 && REGNO (recog_data.operand[i]) > BX_REG)
29612 /* Return nonzero when P points to register encoded via REX prefix.
29613 Called via for_each_rtx. */
29615 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29617 unsigned int regno;
29620 regno = REGNO (*p);
29621 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29624 /* Return true when INSN mentions register that must be encoded using REX
29627 x86_extended_reg_mentioned_p (rtx insn)
29629 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29630 extended_reg_mentioned_1, NULL);
29633 /* If profitable, negate (without causing overflow) integer constant
29634 of mode MODE at location LOC. Return true in this case. */
29636 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29640 if (!CONST_INT_P (*loc))
29646 /* DImode x86_64 constants must fit in 32 bits. */
29647 gcc_assert (x86_64_immediate_operand (*loc, mode));
29658 gcc_unreachable ();
29661 /* Avoid overflows. */
29662 if (mode_signbit_p (mode, *loc))
29665 val = INTVAL (*loc);
29667 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
29668 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
29669 if ((val < 0 && val != -128)
29672 *loc = GEN_INT (-val);
29679 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
29680 optabs would emit if we didn't have TFmode patterns. */
29683 x86_emit_floatuns (rtx operands[2])
29685 rtx neglab, donelab, i0, i1, f0, in, out;
29686 enum machine_mode mode, inmode;
29688 inmode = GET_MODE (operands[1]);
29689 gcc_assert (inmode == SImode || inmode == DImode);
29692 in = force_reg (inmode, operands[1]);
29693 mode = GET_MODE (out);
29694 neglab = gen_label_rtx ();
29695 donelab = gen_label_rtx ();
29696 f0 = gen_reg_rtx (mode);
29698 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
29700 expand_float (out, in, 0);
29702 emit_jump_insn (gen_jump (donelab));
29705 emit_label (neglab);
29707 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
29709 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
29711 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
29713 expand_float (f0, i0, 0);
29715 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
29717 emit_label (donelab);
29720 /* AVX does not support 32-byte integer vector operations,
29721 thus the longest vector we are faced with is V16QImode. */
29722 #define MAX_VECT_LEN 16
29724 struct expand_vec_perm_d
29726 rtx target, op0, op1;
29727 unsigned char perm[MAX_VECT_LEN];
29728 enum machine_mode vmode;
29729 unsigned char nelt;
29733 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
29734 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
29736 /* Get a vector mode of the same size as the original but with elements
29737 twice as wide. This is only guaranteed to apply to integral vectors. */
29739 static inline enum machine_mode
29740 get_mode_wider_vector (enum machine_mode o)
29742 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
29743 enum machine_mode n = GET_MODE_WIDER_MODE (o);
29744 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
29745 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
29749 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29750 with all elements equal to VAR. Return true if successful. */
29753 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
29754 rtx target, rtx val)
29777 /* First attempt to recognize VAL as-is. */
29778 dup = gen_rtx_VEC_DUPLICATE (mode, val);
29779 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
29780 if (recog_memoized (insn) < 0)
29783 /* If that fails, force VAL into a register. */
29786 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
29787 seq = get_insns ();
29790 emit_insn_before (seq, insn);
29792 ok = recog_memoized (insn) >= 0;
29801 if (TARGET_SSE || TARGET_3DNOW_A)
29805 val = gen_lowpart (SImode, val);
29806 x = gen_rtx_TRUNCATE (HImode, val);
29807 x = gen_rtx_VEC_DUPLICATE (mode, x);
29808 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29821 struct expand_vec_perm_d dperm;
29825 memset (&dperm, 0, sizeof (dperm));
29826 dperm.target = target;
29827 dperm.vmode = mode;
29828 dperm.nelt = GET_MODE_NUNITS (mode);
29829 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
29831 /* Extend to SImode using a paradoxical SUBREG. */
29832 tmp1 = gen_reg_rtx (SImode);
29833 emit_move_insn (tmp1, gen_lowpart (SImode, val));
29835 /* Insert the SImode value as low element of a V4SImode vector. */
29836 tmp2 = gen_lowpart (V4SImode, dperm.op0);
29837 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
29839 ok = (expand_vec_perm_1 (&dperm)
29840 || expand_vec_perm_broadcast_1 (&dperm));
29852 /* Replicate the value once into the next wider mode and recurse. */
29854 enum machine_mode smode, wsmode, wvmode;
29857 smode = GET_MODE_INNER (mode);
29858 wvmode = get_mode_wider_vector (mode);
29859 wsmode = GET_MODE_INNER (wvmode);
29861 val = convert_modes (wsmode, smode, val, true);
29862 x = expand_simple_binop (wsmode, ASHIFT, val,
29863 GEN_INT (GET_MODE_BITSIZE (smode)),
29864 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29865 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
29867 x = gen_lowpart (wvmode, target);
29868 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
29876 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
29877 rtx x = gen_reg_rtx (hvmode);
29879 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
29882 x = gen_rtx_VEC_CONCAT (mode, x, x);
29883 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29892 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29893 whose ONE_VAR element is VAR, and other elements are zero. Return true
29897 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
29898 rtx target, rtx var, int one_var)
29900 enum machine_mode vsimode;
29903 bool use_vector_set = false;
29908 /* For SSE4.1, we normally use vector set. But if the second
29909 element is zero and inter-unit moves are OK, we use movq
29911 use_vector_set = (TARGET_64BIT
29913 && !(TARGET_INTER_UNIT_MOVES
29919 use_vector_set = TARGET_SSE4_1;
29922 use_vector_set = TARGET_SSE2;
29925 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
29932 use_vector_set = TARGET_AVX;
29935 /* Use ix86_expand_vector_set in 64bit mode only. */
29936 use_vector_set = TARGET_AVX && TARGET_64BIT;
29942 if (use_vector_set)
29944 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
29945 var = force_reg (GET_MODE_INNER (mode), var);
29946 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29962 var = force_reg (GET_MODE_INNER (mode), var);
29963 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
29964 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29969 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
29970 new_target = gen_reg_rtx (mode);
29972 new_target = target;
29973 var = force_reg (GET_MODE_INNER (mode), var);
29974 x = gen_rtx_VEC_DUPLICATE (mode, var);
29975 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
29976 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
29979 /* We need to shuffle the value to the correct position, so
29980 create a new pseudo to store the intermediate result. */
29982 /* With SSE2, we can use the integer shuffle insns. */
29983 if (mode != V4SFmode && TARGET_SSE2)
29985 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
29987 GEN_INT (one_var == 1 ? 0 : 1),
29988 GEN_INT (one_var == 2 ? 0 : 1),
29989 GEN_INT (one_var == 3 ? 0 : 1)));
29990 if (target != new_target)
29991 emit_move_insn (target, new_target);
29995 /* Otherwise convert the intermediate result to V4SFmode and
29996 use the SSE1 shuffle instructions. */
29997 if (mode != V4SFmode)
29999 tmp = gen_reg_rtx (V4SFmode);
30000 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
30005 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
30007 GEN_INT (one_var == 1 ? 0 : 1),
30008 GEN_INT (one_var == 2 ? 0+4 : 1+4),
30009 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
30011 if (mode != V4SFmode)
30012 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
30013 else if (tmp != target)
30014 emit_move_insn (target, tmp);
30016 else if (target != new_target)
30017 emit_move_insn (target, new_target);
30022 vsimode = V4SImode;
30028 vsimode = V2SImode;
30034 /* Zero extend the variable element to SImode and recurse. */
30035 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30037 x = gen_reg_rtx (vsimode);
30038 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30040 gcc_unreachable ();
30042 emit_move_insn (target, gen_lowpart (mode, x));
30050 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30051 consisting of the values in VALS. It is known that all elements
30052 except ONE_VAR are constants. Return true if successful. */
30055 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30056 rtx target, rtx vals, int one_var)
30058 rtx var = XVECEXP (vals, 0, one_var);
30059 enum machine_mode wmode;
30062 const_vec = copy_rtx (vals);
30063 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30064 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30072 /* For the two element vectors, it's just as easy to use
30073 the general case. */
30077 /* Use ix86_expand_vector_set in 64bit mode only. */
30100 /* There's no way to set one QImode entry easily. Combine
30101 the variable value with its adjacent constant value, and
30102 promote to an HImode set. */
30103 x = XVECEXP (vals, 0, one_var ^ 1);
30106 var = convert_modes (HImode, QImode, var, true);
30107 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30108 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30109 x = GEN_INT (INTVAL (x) & 0xff);
30113 var = convert_modes (HImode, QImode, var, true);
30114 x = gen_int_mode (INTVAL (x) << 8, HImode);
30116 if (x != const0_rtx)
30117 var = expand_simple_binop (HImode, IOR, var, x, var,
30118 1, OPTAB_LIB_WIDEN);
30120 x = gen_reg_rtx (wmode);
30121 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30122 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30124 emit_move_insn (target, gen_lowpart (mode, x));
30131 emit_move_insn (target, const_vec);
30132 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30136 /* A subroutine of ix86_expand_vector_init_general. Use vector
30137 concatenate to handle the most general case: all values variable,
30138 and none identical. */
30141 ix86_expand_vector_init_concat (enum machine_mode mode,
30142 rtx target, rtx *ops, int n)
30144 enum machine_mode cmode, hmode = VOIDmode;
30145 rtx first[8], second[4];
30185 gcc_unreachable ();
30188 if (!register_operand (ops[1], cmode))
30189 ops[1] = force_reg (cmode, ops[1]);
30190 if (!register_operand (ops[0], cmode))
30191 ops[0] = force_reg (cmode, ops[0]);
30192 emit_insn (gen_rtx_SET (VOIDmode, target,
30193 gen_rtx_VEC_CONCAT (mode, ops[0],
30213 gcc_unreachable ();
30229 gcc_unreachable ();
30234 /* FIXME: We process inputs backward to help RA. PR 36222. */
30237 for (; i > 0; i -= 2, j--)
30239 first[j] = gen_reg_rtx (cmode);
30240 v = gen_rtvec (2, ops[i - 1], ops[i]);
30241 ix86_expand_vector_init (false, first[j],
30242 gen_rtx_PARALLEL (cmode, v));
30248 gcc_assert (hmode != VOIDmode);
30249 for (i = j = 0; i < n; i += 2, j++)
30251 second[j] = gen_reg_rtx (hmode);
30252 ix86_expand_vector_init_concat (hmode, second [j],
30256 ix86_expand_vector_init_concat (mode, target, second, n);
30259 ix86_expand_vector_init_concat (mode, target, first, n);
30263 gcc_unreachable ();
30267 /* A subroutine of ix86_expand_vector_init_general. Use vector
30268 interleave to handle the most general case: all values variable,
30269 and none identical. */
30272 ix86_expand_vector_init_interleave (enum machine_mode mode,
30273 rtx target, rtx *ops, int n)
30275 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30278 rtx (*gen_load_even) (rtx, rtx, rtx);
30279 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30280 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30285 gen_load_even = gen_vec_setv8hi;
30286 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30287 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30288 inner_mode = HImode;
30289 first_imode = V4SImode;
30290 second_imode = V2DImode;
30291 third_imode = VOIDmode;
30294 gen_load_even = gen_vec_setv16qi;
30295 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30296 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30297 inner_mode = QImode;
30298 first_imode = V8HImode;
30299 second_imode = V4SImode;
30300 third_imode = V2DImode;
30303 gcc_unreachable ();
30306 for (i = 0; i < n; i++)
30308 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30309 op0 = gen_reg_rtx (SImode);
30310 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30312 /* Insert the SImode value as low element of V4SImode vector. */
30313 op1 = gen_reg_rtx (V4SImode);
30314 op0 = gen_rtx_VEC_MERGE (V4SImode,
30315 gen_rtx_VEC_DUPLICATE (V4SImode,
30317 CONST0_RTX (V4SImode),
30319 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30321 /* Cast the V4SImode vector back to a vector in orignal mode. */
30322 op0 = gen_reg_rtx (mode);
30323 emit_move_insn (op0, gen_lowpart (mode, op1));
30325 /* Load even elements into the second positon. */
30326 emit_insn (gen_load_even (op0,
30327 force_reg (inner_mode,
30331 /* Cast vector to FIRST_IMODE vector. */
30332 ops[i] = gen_reg_rtx (first_imode);
30333 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30336 /* Interleave low FIRST_IMODE vectors. */
30337 for (i = j = 0; i < n; i += 2, j++)
30339 op0 = gen_reg_rtx (first_imode);
30340 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30342 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30343 ops[j] = gen_reg_rtx (second_imode);
30344 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30347 /* Interleave low SECOND_IMODE vectors. */
30348 switch (second_imode)
30351 for (i = j = 0; i < n / 2; i += 2, j++)
30353 op0 = gen_reg_rtx (second_imode);
30354 emit_insn (gen_interleave_second_low (op0, ops[i],
30357 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30359 ops[j] = gen_reg_rtx (third_imode);
30360 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30362 second_imode = V2DImode;
30363 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30367 op0 = gen_reg_rtx (second_imode);
30368 emit_insn (gen_interleave_second_low (op0, ops[0],
30371 /* Cast the SECOND_IMODE vector back to a vector on original
30373 emit_insn (gen_rtx_SET (VOIDmode, target,
30374 gen_lowpart (mode, op0)));
30378 gcc_unreachable ();
30382 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30383 all values variable, and none identical. */
30386 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30387 rtx target, rtx vals)
30389 rtx ops[32], op0, op1;
30390 enum machine_mode half_mode = VOIDmode;
30397 if (!mmx_ok && !TARGET_SSE)
30409 n = GET_MODE_NUNITS (mode);
30410 for (i = 0; i < n; i++)
30411 ops[i] = XVECEXP (vals, 0, i);
30412 ix86_expand_vector_init_concat (mode, target, ops, n);
30416 half_mode = V16QImode;
30420 half_mode = V8HImode;
30424 n = GET_MODE_NUNITS (mode);
30425 for (i = 0; i < n; i++)
30426 ops[i] = XVECEXP (vals, 0, i);
30427 op0 = gen_reg_rtx (half_mode);
30428 op1 = gen_reg_rtx (half_mode);
30429 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30431 ix86_expand_vector_init_interleave (half_mode, op1,
30432 &ops [n >> 1], n >> 2);
30433 emit_insn (gen_rtx_SET (VOIDmode, target,
30434 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30438 if (!TARGET_SSE4_1)
30446 /* Don't use ix86_expand_vector_init_interleave if we can't
30447 move from GPR to SSE register directly. */
30448 if (!TARGET_INTER_UNIT_MOVES)
30451 n = GET_MODE_NUNITS (mode);
30452 for (i = 0; i < n; i++)
30453 ops[i] = XVECEXP (vals, 0, i);
30454 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30462 gcc_unreachable ();
30466 int i, j, n_elts, n_words, n_elt_per_word;
30467 enum machine_mode inner_mode;
30468 rtx words[4], shift;
30470 inner_mode = GET_MODE_INNER (mode);
30471 n_elts = GET_MODE_NUNITS (mode);
30472 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30473 n_elt_per_word = n_elts / n_words;
30474 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30476 for (i = 0; i < n_words; ++i)
30478 rtx word = NULL_RTX;
30480 for (j = 0; j < n_elt_per_word; ++j)
30482 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30483 elt = convert_modes (word_mode, inner_mode, elt, true);
30489 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30490 word, 1, OPTAB_LIB_WIDEN);
30491 word = expand_simple_binop (word_mode, IOR, word, elt,
30492 word, 1, OPTAB_LIB_WIDEN);
30500 emit_move_insn (target, gen_lowpart (mode, words[0]));
30501 else if (n_words == 2)
30503 rtx tmp = gen_reg_rtx (mode);
30504 emit_clobber (tmp);
30505 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30506 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30507 emit_move_insn (target, tmp);
30509 else if (n_words == 4)
30511 rtx tmp = gen_reg_rtx (V4SImode);
30512 gcc_assert (word_mode == SImode);
30513 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30514 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30515 emit_move_insn (target, gen_lowpart (mode, tmp));
30518 gcc_unreachable ();
30522 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30523 instructions unless MMX_OK is true. */
30526 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30528 enum machine_mode mode = GET_MODE (target);
30529 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30530 int n_elts = GET_MODE_NUNITS (mode);
30531 int n_var = 0, one_var = -1;
30532 bool all_same = true, all_const_zero = true;
30536 for (i = 0; i < n_elts; ++i)
30538 x = XVECEXP (vals, 0, i);
30539 if (!(CONST_INT_P (x)
30540 || GET_CODE (x) == CONST_DOUBLE
30541 || GET_CODE (x) == CONST_FIXED))
30542 n_var++, one_var = i;
30543 else if (x != CONST0_RTX (inner_mode))
30544 all_const_zero = false;
30545 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30549 /* Constants are best loaded from the constant pool. */
30552 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30556 /* If all values are identical, broadcast the value. */
30558 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30559 XVECEXP (vals, 0, 0)))
30562 /* Values where only one field is non-constant are best loaded from
30563 the pool and overwritten via move later. */
30567 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30568 XVECEXP (vals, 0, one_var),
30572 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30576 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30580 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30582 enum machine_mode mode = GET_MODE (target);
30583 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30584 enum machine_mode half_mode;
30585 bool use_vec_merge = false;
30587 static rtx (*gen_extract[6][2]) (rtx, rtx)
30589 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30590 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30591 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30592 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30593 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30594 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30596 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30598 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30599 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30600 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30601 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30602 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30603 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30613 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30614 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30616 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30618 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30619 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30625 use_vec_merge = TARGET_SSE4_1;
30633 /* For the two element vectors, we implement a VEC_CONCAT with
30634 the extraction of the other element. */
30636 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30637 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30640 op0 = val, op1 = tmp;
30642 op0 = tmp, op1 = val;
30644 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30645 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30650 use_vec_merge = TARGET_SSE4_1;
30657 use_vec_merge = true;
30661 /* tmp = target = A B C D */
30662 tmp = copy_to_reg (target);
30663 /* target = A A B B */
30664 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
30665 /* target = X A B B */
30666 ix86_expand_vector_set (false, target, val, 0);
30667 /* target = A X C D */
30668 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30669 const1_rtx, const0_rtx,
30670 GEN_INT (2+4), GEN_INT (3+4)));
30674 /* tmp = target = A B C D */
30675 tmp = copy_to_reg (target);
30676 /* tmp = X B C D */
30677 ix86_expand_vector_set (false, tmp, val, 0);
30678 /* target = A B X D */
30679 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30680 const0_rtx, const1_rtx,
30681 GEN_INT (0+4), GEN_INT (3+4)));
30685 /* tmp = target = A B C D */
30686 tmp = copy_to_reg (target);
30687 /* tmp = X B C D */
30688 ix86_expand_vector_set (false, tmp, val, 0);
30689 /* target = A B X D */
30690 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30691 const0_rtx, const1_rtx,
30692 GEN_INT (2+4), GEN_INT (0+4)));
30696 gcc_unreachable ();
30701 use_vec_merge = TARGET_SSE4_1;
30705 /* Element 0 handled by vec_merge below. */
30708 use_vec_merge = true;
30714 /* With SSE2, use integer shuffles to swap element 0 and ELT,
30715 store into element 0, then shuffle them back. */
30719 order[0] = GEN_INT (elt);
30720 order[1] = const1_rtx;
30721 order[2] = const2_rtx;
30722 order[3] = GEN_INT (3);
30723 order[elt] = const0_rtx;
30725 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30726 order[1], order[2], order[3]));
30728 ix86_expand_vector_set (false, target, val, 0);
30730 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30731 order[1], order[2], order[3]));
30735 /* For SSE1, we have to reuse the V4SF code. */
30736 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
30737 gen_lowpart (SFmode, val), elt);
30742 use_vec_merge = TARGET_SSE2;
30745 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30749 use_vec_merge = TARGET_SSE4_1;
30756 half_mode = V16QImode;
30762 half_mode = V8HImode;
30768 half_mode = V4SImode;
30774 half_mode = V2DImode;
30780 half_mode = V4SFmode;
30786 half_mode = V2DFmode;
30792 /* Compute offset. */
30796 gcc_assert (i <= 1);
30798 /* Extract the half. */
30799 tmp = gen_reg_rtx (half_mode);
30800 emit_insn (gen_extract[j][i] (tmp, target));
30802 /* Put val in tmp at elt. */
30803 ix86_expand_vector_set (false, tmp, val, elt);
30806 emit_insn (gen_insert[j][i] (target, target, tmp));
30815 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
30816 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
30817 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30821 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
30823 emit_move_insn (mem, target);
30825 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
30826 emit_move_insn (tmp, val);
30828 emit_move_insn (target, mem);
30833 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
30835 enum machine_mode mode = GET_MODE (vec);
30836 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30837 bool use_vec_extr = false;
30850 use_vec_extr = true;
30854 use_vec_extr = TARGET_SSE4_1;
30866 tmp = gen_reg_rtx (mode);
30867 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
30868 GEN_INT (elt), GEN_INT (elt),
30869 GEN_INT (elt+4), GEN_INT (elt+4)));
30873 tmp = gen_reg_rtx (mode);
30874 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
30878 gcc_unreachable ();
30881 use_vec_extr = true;
30886 use_vec_extr = TARGET_SSE4_1;
30900 tmp = gen_reg_rtx (mode);
30901 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
30902 GEN_INT (elt), GEN_INT (elt),
30903 GEN_INT (elt), GEN_INT (elt)));
30907 tmp = gen_reg_rtx (mode);
30908 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
30912 gcc_unreachable ();
30915 use_vec_extr = true;
30920 /* For SSE1, we have to reuse the V4SF code. */
30921 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
30922 gen_lowpart (V4SFmode, vec), elt);
30928 use_vec_extr = TARGET_SSE2;
30931 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30935 use_vec_extr = TARGET_SSE4_1;
30939 /* ??? Could extract the appropriate HImode element and shift. */
30946 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
30947 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
30949 /* Let the rtl optimizers know about the zero extension performed. */
30950 if (inner_mode == QImode || inner_mode == HImode)
30952 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
30953 target = gen_lowpart (SImode, target);
30956 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30960 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
30962 emit_move_insn (mem, vec);
30964 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
30965 emit_move_insn (target, tmp);
30969 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
30970 pattern to reduce; DEST is the destination; IN is the input vector. */
30973 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
30975 rtx tmp1, tmp2, tmp3;
30977 tmp1 = gen_reg_rtx (V4SFmode);
30978 tmp2 = gen_reg_rtx (V4SFmode);
30979 tmp3 = gen_reg_rtx (V4SFmode);
30981 emit_insn (gen_sse_movhlps (tmp1, in, in));
30982 emit_insn (fn (tmp2, tmp1, in));
30984 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
30985 const1_rtx, const1_rtx,
30986 GEN_INT (1+4), GEN_INT (1+4)));
30987 emit_insn (fn (dest, tmp2, tmp3));
30990 /* Target hook for scalar_mode_supported_p. */
30992 ix86_scalar_mode_supported_p (enum machine_mode mode)
30994 if (DECIMAL_FLOAT_MODE_P (mode))
30995 return default_decimal_float_supported_p ();
30996 else if (mode == TFmode)
30999 return default_scalar_mode_supported_p (mode);
31002 /* Implements target hook vector_mode_supported_p. */
31004 ix86_vector_mode_supported_p (enum machine_mode mode)
31006 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
31008 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
31010 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
31012 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
31014 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
31019 /* Target hook for c_mode_for_suffix. */
31020 static enum machine_mode
31021 ix86_c_mode_for_suffix (char suffix)
31031 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31033 We do this in the new i386 backend to maintain source compatibility
31034 with the old cc0-based compiler. */
31037 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31038 tree inputs ATTRIBUTE_UNUSED,
31041 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31043 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31048 /* Implements target vector targetm.asm.encode_section_info. This
31049 is not used by netware. */
31051 static void ATTRIBUTE_UNUSED
31052 ix86_encode_section_info (tree decl, rtx rtl, int first)
31054 default_encode_section_info (decl, rtl, first);
31056 if (TREE_CODE (decl) == VAR_DECL
31057 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31058 && ix86_in_large_data_p (decl))
31059 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31062 /* Worker function for REVERSE_CONDITION. */
31065 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31067 return (mode != CCFPmode && mode != CCFPUmode
31068 ? reverse_condition (code)
31069 : reverse_condition_maybe_unordered (code));
31072 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31076 output_387_reg_move (rtx insn, rtx *operands)
31078 if (REG_P (operands[0]))
31080 if (REG_P (operands[1])
31081 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31083 if (REGNO (operands[0]) == FIRST_STACK_REG)
31084 return output_387_ffreep (operands, 0);
31085 return "fstp\t%y0";
31087 if (STACK_TOP_P (operands[0]))
31088 return "fld%Z1\t%y1";
31091 else if (MEM_P (operands[0]))
31093 gcc_assert (REG_P (operands[1]));
31094 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31095 return "fstp%Z0\t%y0";
31098 /* There is no non-popping store to memory for XFmode.
31099 So if we need one, follow the store with a load. */
31100 if (GET_MODE (operands[0]) == XFmode)
31101 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31103 return "fst%Z0\t%y0";
31110 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31111 FP status register is set. */
31114 ix86_emit_fp_unordered_jump (rtx label)
31116 rtx reg = gen_reg_rtx (HImode);
31119 emit_insn (gen_x86_fnstsw_1 (reg));
31121 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31123 emit_insn (gen_x86_sahf_1 (reg));
31125 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31126 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31130 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31132 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31133 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31136 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31137 gen_rtx_LABEL_REF (VOIDmode, label),
31139 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31141 emit_jump_insn (temp);
31142 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31145 /* Output code to perform a log1p XFmode calculation. */
31147 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31149 rtx label1 = gen_label_rtx ();
31150 rtx label2 = gen_label_rtx ();
31152 rtx tmp = gen_reg_rtx (XFmode);
31153 rtx tmp2 = gen_reg_rtx (XFmode);
31156 emit_insn (gen_absxf2 (tmp, op1));
31157 test = gen_rtx_GE (VOIDmode, tmp,
31158 CONST_DOUBLE_FROM_REAL_VALUE (
31159 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31161 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31163 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31164 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31165 emit_jump (label2);
31167 emit_label (label1);
31168 emit_move_insn (tmp, CONST1_RTX (XFmode));
31169 emit_insn (gen_addxf3 (tmp, op1, tmp));
31170 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31171 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31173 emit_label (label2);
31176 /* Output code to perform a Newton-Rhapson approximation of a single precision
31177 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31179 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31181 rtx x0, x1, e0, e1, two;
31183 x0 = gen_reg_rtx (mode);
31184 e0 = gen_reg_rtx (mode);
31185 e1 = gen_reg_rtx (mode);
31186 x1 = gen_reg_rtx (mode);
31188 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
31190 if (VECTOR_MODE_P (mode))
31191 two = ix86_build_const_vector (mode, true, two);
31193 two = force_reg (mode, two);
31195 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31197 /* x0 = rcp(b) estimate */
31198 emit_insn (gen_rtx_SET (VOIDmode, x0,
31199 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31202 emit_insn (gen_rtx_SET (VOIDmode, e0,
31203 gen_rtx_MULT (mode, x0, a)));
31205 emit_insn (gen_rtx_SET (VOIDmode, e1,
31206 gen_rtx_MULT (mode, x0, b)));
31208 emit_insn (gen_rtx_SET (VOIDmode, x1,
31209 gen_rtx_MINUS (mode, two, e1)));
31210 /* res = e0 * x1 */
31211 emit_insn (gen_rtx_SET (VOIDmode, res,
31212 gen_rtx_MULT (mode, e0, x1)));
31215 /* Output code to perform a Newton-Rhapson approximation of a
31216 single precision floating point [reciprocal] square root. */
31218 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31221 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31224 x0 = gen_reg_rtx (mode);
31225 e0 = gen_reg_rtx (mode);
31226 e1 = gen_reg_rtx (mode);
31227 e2 = gen_reg_rtx (mode);
31228 e3 = gen_reg_rtx (mode);
31230 real_from_integer (&r, VOIDmode, -3, -1, 0);
31231 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31233 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31234 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31236 if (VECTOR_MODE_P (mode))
31238 mthree = ix86_build_const_vector (mode, true, mthree);
31239 mhalf = ix86_build_const_vector (mode, true, mhalf);
31242 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31243 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31245 /* x0 = rsqrt(a) estimate */
31246 emit_insn (gen_rtx_SET (VOIDmode, x0,
31247 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31250 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31255 zero = gen_reg_rtx (mode);
31256 mask = gen_reg_rtx (mode);
31258 zero = force_reg (mode, CONST0_RTX(mode));
31259 emit_insn (gen_rtx_SET (VOIDmode, mask,
31260 gen_rtx_NE (mode, zero, a)));
31262 emit_insn (gen_rtx_SET (VOIDmode, x0,
31263 gen_rtx_AND (mode, x0, mask)));
31267 emit_insn (gen_rtx_SET (VOIDmode, e0,
31268 gen_rtx_MULT (mode, x0, a)));
31270 emit_insn (gen_rtx_SET (VOIDmode, e1,
31271 gen_rtx_MULT (mode, e0, x0)));
31274 mthree = force_reg (mode, mthree);
31275 emit_insn (gen_rtx_SET (VOIDmode, e2,
31276 gen_rtx_PLUS (mode, e1, mthree)));
31278 mhalf = force_reg (mode, mhalf);
31280 /* e3 = -.5 * x0 */
31281 emit_insn (gen_rtx_SET (VOIDmode, e3,
31282 gen_rtx_MULT (mode, x0, mhalf)));
31284 /* e3 = -.5 * e0 */
31285 emit_insn (gen_rtx_SET (VOIDmode, e3,
31286 gen_rtx_MULT (mode, e0, mhalf)));
31287 /* ret = e2 * e3 */
31288 emit_insn (gen_rtx_SET (VOIDmode, res,
31289 gen_rtx_MULT (mode, e2, e3)));
31292 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31294 static void ATTRIBUTE_UNUSED
31295 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31298 /* With Binutils 2.15, the "@unwind" marker must be specified on
31299 every occurrence of the ".eh_frame" section, not just the first
31302 && strcmp (name, ".eh_frame") == 0)
31304 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31305 flags & SECTION_WRITE ? "aw" : "a");
31308 default_elf_asm_named_section (name, flags, decl);
31311 /* Return the mangling of TYPE if it is an extended fundamental type. */
31313 static const char *
31314 ix86_mangle_type (const_tree type)
31316 type = TYPE_MAIN_VARIANT (type);
31318 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31319 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31322 switch (TYPE_MODE (type))
31325 /* __float128 is "g". */
31328 /* "long double" or __float80 is "e". */
31335 /* For 32-bit code we can save PIC register setup by using
31336 __stack_chk_fail_local hidden function instead of calling
31337 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31338 register, so it is better to call __stack_chk_fail directly. */
31341 ix86_stack_protect_fail (void)
31343 return TARGET_64BIT
31344 ? default_external_stack_protect_fail ()
31345 : default_hidden_stack_protect_fail ();
31348 /* Select a format to encode pointers in exception handling data. CODE
31349 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31350 true if the symbol may be affected by dynamic relocations.
31352 ??? All x86 object file formats are capable of representing this.
31353 After all, the relocation needed is the same as for the call insn.
31354 Whether or not a particular assembler allows us to enter such, I
31355 guess we'll have to see. */
31357 asm_preferred_eh_data_format (int code, int global)
31361 int type = DW_EH_PE_sdata8;
31363 || ix86_cmodel == CM_SMALL_PIC
31364 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31365 type = DW_EH_PE_sdata4;
31366 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31368 if (ix86_cmodel == CM_SMALL
31369 || (ix86_cmodel == CM_MEDIUM && code))
31370 return DW_EH_PE_udata4;
31371 return DW_EH_PE_absptr;
31374 /* Expand copysign from SIGN to the positive value ABS_VALUE
31375 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31378 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31380 enum machine_mode mode = GET_MODE (sign);
31381 rtx sgn = gen_reg_rtx (mode);
31382 if (mask == NULL_RTX)
31384 enum machine_mode vmode;
31386 if (mode == SFmode)
31388 else if (mode == DFmode)
31393 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31394 if (!VECTOR_MODE_P (mode))
31396 /* We need to generate a scalar mode mask in this case. */
31397 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31398 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31399 mask = gen_reg_rtx (mode);
31400 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31404 mask = gen_rtx_NOT (mode, mask);
31405 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31406 gen_rtx_AND (mode, mask, sign)));
31407 emit_insn (gen_rtx_SET (VOIDmode, result,
31408 gen_rtx_IOR (mode, abs_value, sgn)));
31411 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31412 mask for masking out the sign-bit is stored in *SMASK, if that is
31415 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31417 enum machine_mode vmode, mode = GET_MODE (op0);
31420 xa = gen_reg_rtx (mode);
31421 if (mode == SFmode)
31423 else if (mode == DFmode)
31427 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31428 if (!VECTOR_MODE_P (mode))
31430 /* We need to generate a scalar mode mask in this case. */
31431 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31432 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31433 mask = gen_reg_rtx (mode);
31434 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31436 emit_insn (gen_rtx_SET (VOIDmode, xa,
31437 gen_rtx_AND (mode, op0, mask)));
31445 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31446 swapping the operands if SWAP_OPERANDS is true. The expanded
31447 code is a forward jump to a newly created label in case the
31448 comparison is true. The generated label rtx is returned. */
31450 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31451 bool swap_operands)
31462 label = gen_label_rtx ();
31463 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31464 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31465 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31466 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31467 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31468 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31469 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31470 JUMP_LABEL (tmp) = label;
31475 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31476 using comparison code CODE. Operands are swapped for the comparison if
31477 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31479 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31480 bool swap_operands)
31482 enum machine_mode mode = GET_MODE (op0);
31483 rtx mask = gen_reg_rtx (mode);
31492 if (mode == DFmode)
31493 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
31494 gen_rtx_fmt_ee (code, mode, op0, op1)));
31496 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
31497 gen_rtx_fmt_ee (code, mode, op0, op1)));
31502 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31503 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31505 ix86_gen_TWO52 (enum machine_mode mode)
31507 REAL_VALUE_TYPE TWO52r;
31510 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31511 TWO52 = const_double_from_real_value (TWO52r, mode);
31512 TWO52 = force_reg (mode, TWO52);
31517 /* Expand SSE sequence for computing lround from OP1 storing
31520 ix86_expand_lround (rtx op0, rtx op1)
31522 /* C code for the stuff we're doing below:
31523 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31526 enum machine_mode mode = GET_MODE (op1);
31527 const struct real_format *fmt;
31528 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31531 /* load nextafter (0.5, 0.0) */
31532 fmt = REAL_MODE_FORMAT (mode);
31533 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31534 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31536 /* adj = copysign (0.5, op1) */
31537 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31538 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31540 /* adj = op1 + adj */
31541 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31543 /* op0 = (imode)adj */
31544 expand_fix (op0, adj, 0);
31547 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31550 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31552 /* C code for the stuff we're doing below (for do_floor):
31554 xi -= (double)xi > op1 ? 1 : 0;
31557 enum machine_mode fmode = GET_MODE (op1);
31558 enum machine_mode imode = GET_MODE (op0);
31559 rtx ireg, freg, label, tmp;
31561 /* reg = (long)op1 */
31562 ireg = gen_reg_rtx (imode);
31563 expand_fix (ireg, op1, 0);
31565 /* freg = (double)reg */
31566 freg = gen_reg_rtx (fmode);
31567 expand_float (freg, ireg, 0);
31569 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31570 label = ix86_expand_sse_compare_and_jump (UNLE,
31571 freg, op1, !do_floor);
31572 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31573 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31574 emit_move_insn (ireg, tmp);
31576 emit_label (label);
31577 LABEL_NUSES (label) = 1;
31579 emit_move_insn (op0, ireg);
31582 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31583 result in OPERAND0. */
31585 ix86_expand_rint (rtx operand0, rtx operand1)
31587 /* C code for the stuff we're doing below:
31588 xa = fabs (operand1);
31589 if (!isless (xa, 2**52))
31591 xa = xa + 2**52 - 2**52;
31592 return copysign (xa, operand1);
31594 enum machine_mode mode = GET_MODE (operand0);
31595 rtx res, xa, label, TWO52, mask;
31597 res = gen_reg_rtx (mode);
31598 emit_move_insn (res, operand1);
31600 /* xa = abs (operand1) */
31601 xa = ix86_expand_sse_fabs (res, &mask);
31603 /* if (!isless (xa, TWO52)) goto label; */
31604 TWO52 = ix86_gen_TWO52 (mode);
31605 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31607 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31608 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31610 ix86_sse_copysign_to_positive (res, xa, res, mask);
31612 emit_label (label);
31613 LABEL_NUSES (label) = 1;
31615 emit_move_insn (operand0, res);
31618 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31621 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31623 /* C code for the stuff we expand below.
31624 double xa = fabs (x), x2;
31625 if (!isless (xa, TWO52))
31627 xa = xa + TWO52 - TWO52;
31628 x2 = copysign (xa, x);
31637 enum machine_mode mode = GET_MODE (operand0);
31638 rtx xa, TWO52, tmp, label, one, res, mask;
31640 TWO52 = ix86_gen_TWO52 (mode);
31642 /* Temporary for holding the result, initialized to the input
31643 operand to ease control flow. */
31644 res = gen_reg_rtx (mode);
31645 emit_move_insn (res, operand1);
31647 /* xa = abs (operand1) */
31648 xa = ix86_expand_sse_fabs (res, &mask);
31650 /* if (!isless (xa, TWO52)) goto label; */
31651 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31653 /* xa = xa + TWO52 - TWO52; */
31654 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31655 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31657 /* xa = copysign (xa, operand1) */
31658 ix86_sse_copysign_to_positive (xa, xa, res, mask);
31660 /* generate 1.0 or -1.0 */
31661 one = force_reg (mode,
31662 const_double_from_real_value (do_floor
31663 ? dconst1 : dconstm1, mode));
31665 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31666 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31667 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31668 gen_rtx_AND (mode, one, tmp)));
31669 /* We always need to subtract here to preserve signed zero. */
31670 tmp = expand_simple_binop (mode, MINUS,
31671 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31672 emit_move_insn (res, tmp);
31674 emit_label (label);
31675 LABEL_NUSES (label) = 1;
31677 emit_move_insn (operand0, res);
31680 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31683 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
31685 /* C code for the stuff we expand below.
31686 double xa = fabs (x), x2;
31687 if (!isless (xa, TWO52))
31689 x2 = (double)(long)x;
31696 if (HONOR_SIGNED_ZEROS (mode))
31697 return copysign (x2, x);
31700 enum machine_mode mode = GET_MODE (operand0);
31701 rtx xa, xi, TWO52, tmp, label, one, res, mask;
31703 TWO52 = ix86_gen_TWO52 (mode);
31705 /* Temporary for holding the result, initialized to the input
31706 operand to ease control flow. */
31707 res = gen_reg_rtx (mode);
31708 emit_move_insn (res, operand1);
31710 /* xa = abs (operand1) */
31711 xa = ix86_expand_sse_fabs (res, &mask);
31713 /* if (!isless (xa, TWO52)) goto label; */
31714 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31716 /* xa = (double)(long)x */
31717 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31718 expand_fix (xi, res, 0);
31719 expand_float (xa, xi, 0);
31722 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31724 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31725 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31726 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31727 gen_rtx_AND (mode, one, tmp)));
31728 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
31729 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31730 emit_move_insn (res, tmp);
31732 if (HONOR_SIGNED_ZEROS (mode))
31733 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31735 emit_label (label);
31736 LABEL_NUSES (label) = 1;
31738 emit_move_insn (operand0, res);
31741 /* Expand SSE sequence for computing round from OPERAND1 storing
31742 into OPERAND0. Sequence that works without relying on DImode truncation
31743 via cvttsd2siq that is only available on 64bit targets. */
31745 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
31747 /* C code for the stuff we expand below.
31748 double xa = fabs (x), xa2, x2;
31749 if (!isless (xa, TWO52))
31751 Using the absolute value and copying back sign makes
31752 -0.0 -> -0.0 correct.
31753 xa2 = xa + TWO52 - TWO52;
31758 else if (dxa > 0.5)
31760 x2 = copysign (xa2, x);
31763 enum machine_mode mode = GET_MODE (operand0);
31764 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
31766 TWO52 = ix86_gen_TWO52 (mode);
31768 /* Temporary for holding the result, initialized to the input
31769 operand to ease control flow. */
31770 res = gen_reg_rtx (mode);
31771 emit_move_insn (res, operand1);
31773 /* xa = abs (operand1) */
31774 xa = ix86_expand_sse_fabs (res, &mask);
31776 /* if (!isless (xa, TWO52)) goto label; */
31777 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31779 /* xa2 = xa + TWO52 - TWO52; */
31780 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31781 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
31783 /* dxa = xa2 - xa; */
31784 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
31786 /* generate 0.5, 1.0 and -0.5 */
31787 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
31788 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
31789 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
31793 tmp = gen_reg_rtx (mode);
31794 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
31795 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
31796 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31797 gen_rtx_AND (mode, one, tmp)));
31798 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31799 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
31800 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
31801 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31802 gen_rtx_AND (mode, one, tmp)));
31803 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31805 /* res = copysign (xa2, operand1) */
31806 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
31808 emit_label (label);
31809 LABEL_NUSES (label) = 1;
31811 emit_move_insn (operand0, res);
31814 /* Expand SSE sequence for computing trunc from OPERAND1 storing
31817 ix86_expand_trunc (rtx operand0, rtx operand1)
31819 /* C code for SSE variant we expand below.
31820 double xa = fabs (x), x2;
31821 if (!isless (xa, TWO52))
31823 x2 = (double)(long)x;
31824 if (HONOR_SIGNED_ZEROS (mode))
31825 return copysign (x2, x);
31828 enum machine_mode mode = GET_MODE (operand0);
31829 rtx xa, xi, TWO52, label, res, mask;
31831 TWO52 = ix86_gen_TWO52 (mode);
31833 /* Temporary for holding the result, initialized to the input
31834 operand to ease control flow. */
31835 res = gen_reg_rtx (mode);
31836 emit_move_insn (res, operand1);
31838 /* xa = abs (operand1) */
31839 xa = ix86_expand_sse_fabs (res, &mask);
31841 /* if (!isless (xa, TWO52)) goto label; */
31842 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31844 /* x = (double)(long)x */
31845 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31846 expand_fix (xi, res, 0);
31847 expand_float (res, xi, 0);
31849 if (HONOR_SIGNED_ZEROS (mode))
31850 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31852 emit_label (label);
31853 LABEL_NUSES (label) = 1;
31855 emit_move_insn (operand0, res);
31858 /* Expand SSE sequence for computing trunc from OPERAND1 storing
31861 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
31863 enum machine_mode mode = GET_MODE (operand0);
31864 rtx xa, mask, TWO52, label, one, res, smask, tmp;
31866 /* C code for SSE variant we expand below.
31867 double xa = fabs (x), x2;
31868 if (!isless (xa, TWO52))
31870 xa2 = xa + TWO52 - TWO52;
31874 x2 = copysign (xa2, x);
31878 TWO52 = ix86_gen_TWO52 (mode);
31880 /* Temporary for holding the result, initialized to the input
31881 operand to ease control flow. */
31882 res = gen_reg_rtx (mode);
31883 emit_move_insn (res, operand1);
31885 /* xa = abs (operand1) */
31886 xa = ix86_expand_sse_fabs (res, &smask);
31888 /* if (!isless (xa, TWO52)) goto label; */
31889 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31891 /* res = xa + TWO52 - TWO52; */
31892 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31893 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
31894 emit_move_insn (res, tmp);
31897 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31899 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
31900 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
31901 emit_insn (gen_rtx_SET (VOIDmode, mask,
31902 gen_rtx_AND (mode, mask, one)));
31903 tmp = expand_simple_binop (mode, MINUS,
31904 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
31905 emit_move_insn (res, tmp);
31907 /* res = copysign (res, operand1) */
31908 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
31910 emit_label (label);
31911 LABEL_NUSES (label) = 1;
31913 emit_move_insn (operand0, res);
31916 /* Expand SSE sequence for computing round from OPERAND1 storing
31919 ix86_expand_round (rtx operand0, rtx operand1)
31921 /* C code for the stuff we're doing below:
31922 double xa = fabs (x);
31923 if (!isless (xa, TWO52))
31925 xa = (double)(long)(xa + nextafter (0.5, 0.0));
31926 return copysign (xa, x);
31928 enum machine_mode mode = GET_MODE (operand0);
31929 rtx res, TWO52, xa, label, xi, half, mask;
31930 const struct real_format *fmt;
31931 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31933 /* Temporary for holding the result, initialized to the input
31934 operand to ease control flow. */
31935 res = gen_reg_rtx (mode);
31936 emit_move_insn (res, operand1);
31938 TWO52 = ix86_gen_TWO52 (mode);
31939 xa = ix86_expand_sse_fabs (res, &mask);
31940 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31942 /* load nextafter (0.5, 0.0) */
31943 fmt = REAL_MODE_FORMAT (mode);
31944 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31945 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31947 /* xa = xa + 0.5 */
31948 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
31949 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
31951 /* xa = (double)(int64_t)xa */
31952 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31953 expand_fix (xi, xa, 0);
31954 expand_float (xa, xi, 0);
31956 /* res = copysign (xa, operand1) */
31957 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
31959 emit_label (label);
31960 LABEL_NUSES (label) = 1;
31962 emit_move_insn (operand0, res);
31966 /* Table of valid machine attributes. */
31967 static const struct attribute_spec ix86_attribute_table[] =
31969 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
31970 /* Stdcall attribute says callee is responsible for popping arguments
31971 if they are not variable. */
31972 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31973 /* Fastcall attribute says callee is responsible for popping arguments
31974 if they are not variable. */
31975 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31976 /* Thiscall attribute says callee is responsible for popping arguments
31977 if they are not variable. */
31978 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31979 /* Cdecl attribute says the callee is a normal C declaration */
31980 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31981 /* Regparm attribute specifies how many integer arguments are to be
31982 passed in registers. */
31983 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
31984 /* Sseregparm attribute says we are using x86_64 calling conventions
31985 for FP arguments. */
31986 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31987 /* force_align_arg_pointer says this function realigns the stack at entry. */
31988 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
31989 false, true, true, ix86_handle_cconv_attribute },
31990 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31991 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
31992 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
31993 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
31995 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
31996 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
31997 #ifdef SUBTARGET_ATTRIBUTE_TABLE
31998 SUBTARGET_ATTRIBUTE_TABLE,
32000 /* ms_abi and sysv_abi calling convention function attributes. */
32001 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32002 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
32003 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
32005 { NULL, 0, 0, false, false, false, NULL }
32008 /* Implement targetm.vectorize.builtin_vectorization_cost. */
32010 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
32011 tree vectype ATTRIBUTE_UNUSED,
32012 int misalign ATTRIBUTE_UNUSED)
32014 switch (type_of_cost)
32017 return ix86_cost->scalar_stmt_cost;
32020 return ix86_cost->scalar_load_cost;
32023 return ix86_cost->scalar_store_cost;
32026 return ix86_cost->vec_stmt_cost;
32029 return ix86_cost->vec_align_load_cost;
32032 return ix86_cost->vec_store_cost;
32034 case vec_to_scalar:
32035 return ix86_cost->vec_to_scalar_cost;
32037 case scalar_to_vec:
32038 return ix86_cost->scalar_to_vec_cost;
32040 case unaligned_load:
32041 case unaligned_store:
32042 return ix86_cost->vec_unalign_load_cost;
32044 case cond_branch_taken:
32045 return ix86_cost->cond_taken_branch_cost;
32047 case cond_branch_not_taken:
32048 return ix86_cost->cond_not_taken_branch_cost;
32054 gcc_unreachable ();
32059 /* Implement targetm.vectorize.builtin_vec_perm. */
32062 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32064 tree itype = TREE_TYPE (vec_type);
32065 bool u = TYPE_UNSIGNED (itype);
32066 enum machine_mode vmode = TYPE_MODE (vec_type);
32067 enum ix86_builtins fcode;
32068 bool ok = TARGET_SSE2;
32074 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32077 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32079 itype = ix86_get_builtin_type (IX86_BT_DI);
32084 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32088 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32090 itype = ix86_get_builtin_type (IX86_BT_SI);
32094 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32097 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32100 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32103 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32113 *mask_type = itype;
32114 return ix86_builtins[(int) fcode];
32117 /* Return a vector mode with twice as many elements as VMODE. */
32118 /* ??? Consider moving this to a table generated by genmodes.c. */
32120 static enum machine_mode
32121 doublesize_vector_mode (enum machine_mode vmode)
32125 case V2SFmode: return V4SFmode;
32126 case V1DImode: return V2DImode;
32127 case V2SImode: return V4SImode;
32128 case V4HImode: return V8HImode;
32129 case V8QImode: return V16QImode;
32131 case V2DFmode: return V4DFmode;
32132 case V4SFmode: return V8SFmode;
32133 case V2DImode: return V4DImode;
32134 case V4SImode: return V8SImode;
32135 case V8HImode: return V16HImode;
32136 case V16QImode: return V32QImode;
32138 case V4DFmode: return V8DFmode;
32139 case V8SFmode: return V16SFmode;
32140 case V4DImode: return V8DImode;
32141 case V8SImode: return V16SImode;
32142 case V16HImode: return V32HImode;
32143 case V32QImode: return V64QImode;
32146 gcc_unreachable ();
32150 /* Construct (set target (vec_select op0 (parallel perm))) and
32151 return true if that's a valid instruction in the active ISA. */
32154 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32156 rtx rperm[MAX_VECT_LEN], x;
32159 for (i = 0; i < nelt; ++i)
32160 rperm[i] = GEN_INT (perm[i]);
32162 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32163 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32164 x = gen_rtx_SET (VOIDmode, target, x);
32167 if (recog_memoized (x) < 0)
32175 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32178 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32179 const unsigned char *perm, unsigned nelt)
32181 enum machine_mode v2mode;
32184 v2mode = doublesize_vector_mode (GET_MODE (op0));
32185 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32186 return expand_vselect (target, x, perm, nelt);
32189 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32190 in terms of blendp[sd] / pblendw / pblendvb. */
32193 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32195 enum machine_mode vmode = d->vmode;
32196 unsigned i, mask, nelt = d->nelt;
32197 rtx target, op0, op1, x;
32199 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32201 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32204 /* This is a blend, not a permute. Elements must stay in their
32205 respective lanes. */
32206 for (i = 0; i < nelt; ++i)
32208 unsigned e = d->perm[i];
32209 if (!(e == i || e == i + nelt))
32216 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32217 decision should be extracted elsewhere, so that we only try that
32218 sequence once all budget==3 options have been tried. */
32220 /* For bytes, see if bytes move in pairs so we can use pblendw with
32221 an immediate argument, rather than pblendvb with a vector argument. */
32222 if (vmode == V16QImode)
32224 bool pblendw_ok = true;
32225 for (i = 0; i < 16 && pblendw_ok; i += 2)
32226 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32230 rtx rperm[16], vperm;
32232 for (i = 0; i < nelt; ++i)
32233 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32235 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32236 vperm = force_reg (V16QImode, vperm);
32238 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32243 target = d->target;
32255 for (i = 0; i < nelt; ++i)
32256 mask |= (d->perm[i] >= nelt) << i;
32260 for (i = 0; i < 2; ++i)
32261 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32265 for (i = 0; i < 4; ++i)
32266 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32270 for (i = 0; i < 8; ++i)
32271 mask |= (d->perm[i * 2] >= 16) << i;
32275 target = gen_lowpart (vmode, target);
32276 op0 = gen_lowpart (vmode, op0);
32277 op1 = gen_lowpart (vmode, op1);
32281 gcc_unreachable ();
32284 /* This matches five different patterns with the different modes. */
32285 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32286 x = gen_rtx_SET (VOIDmode, target, x);
32292 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32293 in terms of the variable form of vpermilps.
32295 Note that we will have already failed the immediate input vpermilps,
32296 which requires that the high and low part shuffle be identical; the
32297 variable form doesn't require that. */
32300 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32302 rtx rperm[8], vperm;
32305 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32308 /* We can only permute within the 128-bit lane. */
32309 for (i = 0; i < 8; ++i)
32311 unsigned e = d->perm[i];
32312 if (i < 4 ? e >= 4 : e < 4)
32319 for (i = 0; i < 8; ++i)
32321 unsigned e = d->perm[i];
32323 /* Within each 128-bit lane, the elements of op0 are numbered
32324 from 0 and the elements of op1 are numbered from 4. */
32330 rperm[i] = GEN_INT (e);
32333 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32334 vperm = force_reg (V8SImode, vperm);
32335 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32340 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32341 in terms of pshufb or vpperm. */
32344 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32346 unsigned i, nelt, eltsz;
32347 rtx rperm[16], vperm, target, op0, op1;
32349 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32351 if (GET_MODE_SIZE (d->vmode) != 16)
32358 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32360 for (i = 0; i < nelt; ++i)
32362 unsigned j, e = d->perm[i];
32363 for (j = 0; j < eltsz; ++j)
32364 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32367 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32368 vperm = force_reg (V16QImode, vperm);
32370 target = gen_lowpart (V16QImode, d->target);
32371 op0 = gen_lowpart (V16QImode, d->op0);
32372 if (d->op0 == d->op1)
32373 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32376 op1 = gen_lowpart (V16QImode, d->op1);
32377 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32383 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32384 in a single instruction. */
32387 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32389 unsigned i, nelt = d->nelt;
32390 unsigned char perm2[MAX_VECT_LEN];
32392 /* Check plain VEC_SELECT first, because AVX has instructions that could
32393 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32394 input where SEL+CONCAT may not. */
32395 if (d->op0 == d->op1)
32397 int mask = nelt - 1;
32399 for (i = 0; i < nelt; i++)
32400 perm2[i] = d->perm[i] & mask;
32402 if (expand_vselect (d->target, d->op0, perm2, nelt))
32405 /* There are plenty of patterns in sse.md that are written for
32406 SEL+CONCAT and are not replicated for a single op. Perhaps
32407 that should be changed, to avoid the nastiness here. */
32409 /* Recognize interleave style patterns, which means incrementing
32410 every other permutation operand. */
32411 for (i = 0; i < nelt; i += 2)
32413 perm2[i] = d->perm[i] & mask;
32414 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32416 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32419 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32422 for (i = 0; i < nelt; i += 4)
32424 perm2[i + 0] = d->perm[i + 0] & mask;
32425 perm2[i + 1] = d->perm[i + 1] & mask;
32426 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32427 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32430 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32435 /* Finally, try the fully general two operand permute. */
32436 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32439 /* Recognize interleave style patterns with reversed operands. */
32440 if (d->op0 != d->op1)
32442 for (i = 0; i < nelt; ++i)
32444 unsigned e = d->perm[i];
32452 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32456 /* Try the SSE4.1 blend variable merge instructions. */
32457 if (expand_vec_perm_blend (d))
32460 /* Try one of the AVX vpermil variable permutations. */
32461 if (expand_vec_perm_vpermil (d))
32464 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32465 if (expand_vec_perm_pshufb (d))
32471 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32472 in terms of a pair of pshuflw + pshufhw instructions. */
32475 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32477 unsigned char perm2[MAX_VECT_LEN];
32481 if (d->vmode != V8HImode || d->op0 != d->op1)
32484 /* The two permutations only operate in 64-bit lanes. */
32485 for (i = 0; i < 4; ++i)
32486 if (d->perm[i] >= 4)
32488 for (i = 4; i < 8; ++i)
32489 if (d->perm[i] < 4)
32495 /* Emit the pshuflw. */
32496 memcpy (perm2, d->perm, 4);
32497 for (i = 4; i < 8; ++i)
32499 ok = expand_vselect (d->target, d->op0, perm2, 8);
32502 /* Emit the pshufhw. */
32503 memcpy (perm2 + 4, d->perm + 4, 4);
32504 for (i = 0; i < 4; ++i)
32506 ok = expand_vselect (d->target, d->target, perm2, 8);
32512 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32513 the permutation using the SSSE3 palignr instruction. This succeeds
32514 when all of the elements in PERM fit within one vector and we merely
32515 need to shift them down so that a single vector permutation has a
32516 chance to succeed. */
32519 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32521 unsigned i, nelt = d->nelt;
32526 /* Even with AVX, palignr only operates on 128-bit vectors. */
32527 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32530 min = nelt, max = 0;
32531 for (i = 0; i < nelt; ++i)
32533 unsigned e = d->perm[i];
32539 if (min == 0 || max - min >= nelt)
32542 /* Given that we have SSSE3, we know we'll be able to implement the
32543 single operand permutation after the palignr with pshufb. */
32547 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32548 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32549 gen_lowpart (TImode, d->op1),
32550 gen_lowpart (TImode, d->op0), shift));
32552 d->op0 = d->op1 = d->target;
32555 for (i = 0; i < nelt; ++i)
32557 unsigned e = d->perm[i] - min;
32563 /* Test for the degenerate case where the alignment by itself
32564 produces the desired permutation. */
32568 ok = expand_vec_perm_1 (d);
32574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32575 a two vector permutation into a single vector permutation by using
32576 an interleave operation to merge the vectors. */
32579 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32581 struct expand_vec_perm_d dremap, dfinal;
32582 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32583 unsigned contents, h1, h2, h3, h4;
32584 unsigned char remap[2 * MAX_VECT_LEN];
32588 if (d->op0 == d->op1)
32591 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32592 lanes. We can use similar techniques with the vperm2f128 instruction,
32593 but it requires slightly different logic. */
32594 if (GET_MODE_SIZE (d->vmode) != 16)
32597 /* Examine from whence the elements come. */
32599 for (i = 0; i < nelt; ++i)
32600 contents |= 1u << d->perm[i];
32602 /* Split the two input vectors into 4 halves. */
32603 h1 = (1u << nelt2) - 1;
32608 memset (remap, 0xff, sizeof (remap));
32611 /* If the elements from the low halves use interleave low, and similarly
32612 for interleave high. If the elements are from mis-matched halves, we
32613 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32614 if ((contents & (h1 | h3)) == contents)
32616 for (i = 0; i < nelt2; ++i)
32619 remap[i + nelt] = i * 2 + 1;
32620 dremap.perm[i * 2] = i;
32621 dremap.perm[i * 2 + 1] = i + nelt;
32624 else if ((contents & (h2 | h4)) == contents)
32626 for (i = 0; i < nelt2; ++i)
32628 remap[i + nelt2] = i * 2;
32629 remap[i + nelt + nelt2] = i * 2 + 1;
32630 dremap.perm[i * 2] = i + nelt2;
32631 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32634 else if ((contents & (h1 | h4)) == contents)
32636 for (i = 0; i < nelt2; ++i)
32639 remap[i + nelt + nelt2] = i + nelt2;
32640 dremap.perm[i] = i;
32641 dremap.perm[i + nelt2] = i + nelt + nelt2;
32645 dremap.vmode = V2DImode;
32647 dremap.perm[0] = 0;
32648 dremap.perm[1] = 3;
32651 else if ((contents & (h2 | h3)) == contents)
32653 for (i = 0; i < nelt2; ++i)
32655 remap[i + nelt2] = i;
32656 remap[i + nelt] = i + nelt2;
32657 dremap.perm[i] = i + nelt2;
32658 dremap.perm[i + nelt2] = i + nelt;
32662 dremap.vmode = V2DImode;
32664 dremap.perm[0] = 1;
32665 dremap.perm[1] = 2;
32671 /* Use the remapping array set up above to move the elements from their
32672 swizzled locations into their final destinations. */
32674 for (i = 0; i < nelt; ++i)
32676 unsigned e = remap[d->perm[i]];
32677 gcc_assert (e < nelt);
32678 dfinal.perm[i] = e;
32680 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
32681 dfinal.op1 = dfinal.op0;
32682 dremap.target = dfinal.op0;
32684 /* Test if the final remap can be done with a single insn. For V4SFmode or
32685 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
32687 ok = expand_vec_perm_1 (&dfinal);
32688 seq = get_insns ();
32694 if (dremap.vmode != dfinal.vmode)
32696 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
32697 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
32698 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
32701 ok = expand_vec_perm_1 (&dremap);
32708 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
32709 permutation with two pshufb insns and an ior. We should have already
32710 failed all two instruction sequences. */
32713 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
32715 rtx rperm[2][16], vperm, l, h, op, m128;
32716 unsigned int i, nelt, eltsz;
32718 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32720 gcc_assert (d->op0 != d->op1);
32723 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32725 /* Generate two permutation masks. If the required element is within
32726 the given vector it is shuffled into the proper lane. If the required
32727 element is in the other vector, force a zero into the lane by setting
32728 bit 7 in the permutation mask. */
32729 m128 = GEN_INT (-128);
32730 for (i = 0; i < nelt; ++i)
32732 unsigned j, e = d->perm[i];
32733 unsigned which = (e >= nelt);
32737 for (j = 0; j < eltsz; ++j)
32739 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
32740 rperm[1-which][i*eltsz + j] = m128;
32744 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
32745 vperm = force_reg (V16QImode, vperm);
32747 l = gen_reg_rtx (V16QImode);
32748 op = gen_lowpart (V16QImode, d->op0);
32749 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
32751 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
32752 vperm = force_reg (V16QImode, vperm);
32754 h = gen_reg_rtx (V16QImode);
32755 op = gen_lowpart (V16QImode, d->op1);
32756 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
32758 op = gen_lowpart (V16QImode, d->target);
32759 emit_insn (gen_iorv16qi3 (op, l, h));
32764 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
32765 and extract-odd permutations. */
32768 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
32775 t1 = gen_reg_rtx (V4DFmode);
32776 t2 = gen_reg_rtx (V4DFmode);
32778 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
32779 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
32780 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
32782 /* Now an unpck[lh]pd will produce the result required. */
32784 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
32786 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
32792 int mask = odd ? 0xdd : 0x88;
32794 t1 = gen_reg_rtx (V8SFmode);
32795 t2 = gen_reg_rtx (V8SFmode);
32796 t3 = gen_reg_rtx (V8SFmode);
32798 /* Shuffle within the 128-bit lanes to produce:
32799 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
32800 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
32803 /* Shuffle the lanes around to produce:
32804 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
32805 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
32808 /* Shuffle within the 128-bit lanes to produce:
32809 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
32810 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
32812 /* Shuffle within the 128-bit lanes to produce:
32813 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
32814 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
32816 /* Shuffle the lanes around to produce:
32817 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
32818 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
32827 /* These are always directly implementable by expand_vec_perm_1. */
32828 gcc_unreachable ();
32832 return expand_vec_perm_pshufb2 (d);
32835 /* We need 2*log2(N)-1 operations to achieve odd/even
32836 with interleave. */
32837 t1 = gen_reg_rtx (V8HImode);
32838 t2 = gen_reg_rtx (V8HImode);
32839 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
32840 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
32841 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
32842 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
32844 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
32846 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
32853 return expand_vec_perm_pshufb2 (d);
32856 t1 = gen_reg_rtx (V16QImode);
32857 t2 = gen_reg_rtx (V16QImode);
32858 t3 = gen_reg_rtx (V16QImode);
32859 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
32860 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
32861 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
32862 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
32863 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
32864 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
32866 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
32868 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
32874 gcc_unreachable ();
32880 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
32881 extract-even and extract-odd permutations. */
32884 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
32886 unsigned i, odd, nelt = d->nelt;
32889 if (odd != 0 && odd != 1)
32892 for (i = 1; i < nelt; ++i)
32893 if (d->perm[i] != 2 * i + odd)
32896 return expand_vec_perm_even_odd_1 (d, odd);
32899 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
32900 permutations. We assume that expand_vec_perm_1 has already failed. */
32903 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
32905 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
32906 enum machine_mode vmode = d->vmode;
32907 unsigned char perm2[4];
32915 /* These are special-cased in sse.md so that we can optionally
32916 use the vbroadcast instruction. They expand to two insns
32917 if the input happens to be in a register. */
32918 gcc_unreachable ();
32924 /* These are always implementable using standard shuffle patterns. */
32925 gcc_unreachable ();
32929 /* These can be implemented via interleave. We save one insn by
32930 stopping once we have promoted to V4SImode and then use pshufd. */
32933 optab otab = vec_interleave_low_optab;
32937 otab = vec_interleave_high_optab;
32942 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
32943 vmode = get_mode_wider_vector (vmode);
32944 op0 = gen_lowpart (vmode, op0);
32946 while (vmode != V4SImode);
32948 memset (perm2, elt, 4);
32949 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
32954 gcc_unreachable ();
32958 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
32959 broadcast permutations. */
32962 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
32964 unsigned i, elt, nelt = d->nelt;
32966 if (d->op0 != d->op1)
32970 for (i = 1; i < nelt; ++i)
32971 if (d->perm[i] != elt)
32974 return expand_vec_perm_broadcast_1 (d);
32977 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
32978 With all of the interface bits taken care of, perform the expansion
32979 in D and return true on success. */
32982 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
32984 /* Try a single instruction expansion. */
32985 if (expand_vec_perm_1 (d))
32988 /* Try sequences of two instructions. */
32990 if (expand_vec_perm_pshuflw_pshufhw (d))
32993 if (expand_vec_perm_palignr (d))
32996 if (expand_vec_perm_interleave2 (d))
32999 if (expand_vec_perm_broadcast (d))
33002 /* Try sequences of three instructions. */
33004 if (expand_vec_perm_pshufb2 (d))
33007 /* ??? Look for narrow permutations whose element orderings would
33008 allow the promotion to a wider mode. */
33010 /* ??? Look for sequences of interleave or a wider permute that place
33011 the data into the correct lanes for a half-vector shuffle like
33012 pshuf[lh]w or vpermilps. */
33014 /* ??? Look for sequences of interleave that produce the desired results.
33015 The combinatorics of punpck[lh] get pretty ugly... */
33017 if (expand_vec_perm_even_odd (d))
33023 /* Extract the values from the vector CST into the permutation array in D.
33024 Return 0 on error, 1 if all values from the permutation come from the
33025 first vector, 2 if all values from the second vector, and 3 otherwise. */
33028 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33030 tree list = TREE_VECTOR_CST_ELTS (cst);
33031 unsigned i, nelt = d->nelt;
33034 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33036 unsigned HOST_WIDE_INT e;
33038 if (!host_integerp (TREE_VALUE (list), 1))
33040 e = tree_low_cst (TREE_VALUE (list), 1);
33044 ret |= (e < nelt ? 1 : 2);
33047 gcc_assert (list == NULL);
33049 /* For all elements from second vector, fold the elements to first. */
33051 for (i = 0; i < nelt; ++i)
33052 d->perm[i] -= nelt;
33058 ix86_expand_vec_perm_builtin (tree exp)
33060 struct expand_vec_perm_d d;
33061 tree arg0, arg1, arg2;
33063 arg0 = CALL_EXPR_ARG (exp, 0);
33064 arg1 = CALL_EXPR_ARG (exp, 1);
33065 arg2 = CALL_EXPR_ARG (exp, 2);
33067 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33068 d.nelt = GET_MODE_NUNITS (d.vmode);
33069 d.testing_p = false;
33070 gcc_assert (VECTOR_MODE_P (d.vmode));
33072 if (TREE_CODE (arg2) != VECTOR_CST)
33074 error_at (EXPR_LOCATION (exp),
33075 "vector permutation requires vector constant");
33079 switch (extract_vec_perm_cst (&d, arg2))
33085 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33089 if (!operand_equal_p (arg0, arg1, 0))
33091 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33092 d.op0 = force_reg (d.vmode, d.op0);
33093 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33094 d.op1 = force_reg (d.vmode, d.op1);
33098 /* The elements of PERM do not suggest that only the first operand
33099 is used, but both operands are identical. Allow easier matching
33100 of the permutation by folding the permutation into the single
33103 unsigned i, nelt = d.nelt;
33104 for (i = 0; i < nelt; ++i)
33105 if (d.perm[i] >= nelt)
33111 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33112 d.op0 = force_reg (d.vmode, d.op0);
33117 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33118 d.op0 = force_reg (d.vmode, d.op0);
33123 d.target = gen_reg_rtx (d.vmode);
33124 if (ix86_expand_vec_perm_builtin_1 (&d))
33127 /* For compiler generated permutations, we should never got here, because
33128 the compiler should also be checking the ok hook. But since this is a
33129 builtin the user has access too, so don't abort. */
33133 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33136 sorry ("vector permutation (%d %d %d %d)",
33137 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33140 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33141 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33142 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33145 sorry ("vector permutation "
33146 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33147 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33148 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33149 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33150 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33153 gcc_unreachable ();
33156 return CONST0_RTX (d.vmode);
33159 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33162 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33164 struct expand_vec_perm_d d;
33168 d.vmode = TYPE_MODE (vec_type);
33169 d.nelt = GET_MODE_NUNITS (d.vmode);
33170 d.testing_p = true;
33172 /* Given sufficient ISA support we can just return true here
33173 for selected vector modes. */
33174 if (GET_MODE_SIZE (d.vmode) == 16)
33176 /* All implementable with a single vpperm insn. */
33179 /* All implementable with 2 pshufb + 1 ior. */
33182 /* All implementable with shufpd or unpck[lh]pd. */
33187 vec_mask = extract_vec_perm_cst (&d, mask);
33189 /* This hook is cannot be called in response to something that the
33190 user does (unlike the builtin expander) so we shouldn't ever see
33191 an error generated from the extract. */
33192 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33193 one_vec = (vec_mask != 3);
33195 /* Implementable with shufps or pshufd. */
33196 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33199 /* Otherwise we have to go through the motions and see if we can
33200 figure out how to generate the requested permutation. */
33201 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33202 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33204 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33207 ret = ix86_expand_vec_perm_builtin_1 (&d);
33214 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33216 struct expand_vec_perm_d d;
33222 d.vmode = GET_MODE (targ);
33223 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33224 d.testing_p = false;
33226 for (i = 0; i < nelt; ++i)
33227 d.perm[i] = i * 2 + odd;
33229 /* We'll either be able to implement the permutation directly... */
33230 if (expand_vec_perm_1 (&d))
33233 /* ... or we use the special-case patterns. */
33234 expand_vec_perm_even_odd_1 (&d, odd);
33237 /* This function returns the calling abi specific va_list type node.
33238 It returns the FNDECL specific va_list type. */
33241 ix86_fn_abi_va_list (tree fndecl)
33244 return va_list_type_node;
33245 gcc_assert (fndecl != NULL_TREE);
33247 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33248 return ms_va_list_type_node;
33250 return sysv_va_list_type_node;
33253 /* Returns the canonical va_list type specified by TYPE. If there
33254 is no valid TYPE provided, it return NULL_TREE. */
33257 ix86_canonical_va_list_type (tree type)
33261 /* Resolve references and pointers to va_list type. */
33262 if (TREE_CODE (type) == MEM_REF)
33263 type = TREE_TYPE (type);
33264 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33265 type = TREE_TYPE (type);
33266 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33267 type = TREE_TYPE (type);
33271 wtype = va_list_type_node;
33272 gcc_assert (wtype != NULL_TREE);
33274 if (TREE_CODE (wtype) == ARRAY_TYPE)
33276 /* If va_list is an array type, the argument may have decayed
33277 to a pointer type, e.g. by being passed to another function.
33278 In that case, unwrap both types so that we can compare the
33279 underlying records. */
33280 if (TREE_CODE (htype) == ARRAY_TYPE
33281 || POINTER_TYPE_P (htype))
33283 wtype = TREE_TYPE (wtype);
33284 htype = TREE_TYPE (htype);
33287 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33288 return va_list_type_node;
33289 wtype = sysv_va_list_type_node;
33290 gcc_assert (wtype != NULL_TREE);
33292 if (TREE_CODE (wtype) == ARRAY_TYPE)
33294 /* If va_list is an array type, the argument may have decayed
33295 to a pointer type, e.g. by being passed to another function.
33296 In that case, unwrap both types so that we can compare the
33297 underlying records. */
33298 if (TREE_CODE (htype) == ARRAY_TYPE
33299 || POINTER_TYPE_P (htype))
33301 wtype = TREE_TYPE (wtype);
33302 htype = TREE_TYPE (htype);
33305 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33306 return sysv_va_list_type_node;
33307 wtype = ms_va_list_type_node;
33308 gcc_assert (wtype != NULL_TREE);
33310 if (TREE_CODE (wtype) == ARRAY_TYPE)
33312 /* If va_list is an array type, the argument may have decayed
33313 to a pointer type, e.g. by being passed to another function.
33314 In that case, unwrap both types so that we can compare the
33315 underlying records. */
33316 if (TREE_CODE (htype) == ARRAY_TYPE
33317 || POINTER_TYPE_P (htype))
33319 wtype = TREE_TYPE (wtype);
33320 htype = TREE_TYPE (htype);
33323 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33324 return ms_va_list_type_node;
33327 return std_canonical_va_list_type (type);
33330 /* Iterate through the target-specific builtin types for va_list.
33331 IDX denotes the iterator, *PTREE is set to the result type of
33332 the va_list builtin, and *PNAME to its internal type.
33333 Returns zero if there is no element for this index, otherwise
33334 IDX should be increased upon the next call.
33335 Note, do not iterate a base builtin's name like __builtin_va_list.
33336 Used from c_common_nodes_and_builtins. */
33339 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33349 *ptree = ms_va_list_type_node;
33350 *pname = "__builtin_ms_va_list";
33354 *ptree = sysv_va_list_type_node;
33355 *pname = "__builtin_sysv_va_list";
33363 #undef TARGET_SCHED_DISPATCH
33364 #define TARGET_SCHED_DISPATCH has_dispatch
33365 #undef TARGET_SCHED_DISPATCH_DO
33366 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33368 /* The size of the dispatch window is the total number of bytes of
33369 object code allowed in a window. */
33370 #define DISPATCH_WINDOW_SIZE 16
33372 /* Number of dispatch windows considered for scheduling. */
33373 #define MAX_DISPATCH_WINDOWS 3
33375 /* Maximum number of instructions in a window. */
33378 /* Maximum number of immediate operands in a window. */
33381 /* Maximum number of immediate bits allowed in a window. */
33382 #define MAX_IMM_SIZE 128
33384 /* Maximum number of 32 bit immediates allowed in a window. */
33385 #define MAX_IMM_32 4
33387 /* Maximum number of 64 bit immediates allowed in a window. */
33388 #define MAX_IMM_64 2
33390 /* Maximum total of loads or prefetches allowed in a window. */
33393 /* Maximum total of stores allowed in a window. */
33394 #define MAX_STORE 1
33400 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33401 enum dispatch_group {
33416 /* Number of allowable groups in a dispatch window. It is an array
33417 indexed by dispatch_group enum. 100 is used as a big number,
33418 because the number of these kind of operations does not have any
33419 effect in dispatch window, but we need them for other reasons in
33421 static unsigned int num_allowable_groups[disp_last] = {
33422 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33425 char group_name[disp_last + 1][16] = {
33426 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33427 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33428 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33431 /* Instruction path. */
33434 path_single, /* Single micro op. */
33435 path_double, /* Double micro op. */
33436 path_multi, /* Instructions with more than 2 micro op.. */
33440 /* sched_insn_info defines a window to the instructions scheduled in
33441 the basic block. It contains a pointer to the insn_info table and
33442 the instruction scheduled.
33444 Windows are allocated for each basic block and are linked
33446 typedef struct sched_insn_info_s {
33448 enum dispatch_group group;
33449 enum insn_path path;
33454 /* Linked list of dispatch windows. This is a two way list of
33455 dispatch windows of a basic block. It contains information about
33456 the number of uops in the window and the total number of
33457 instructions and of bytes in the object code for this dispatch
33459 typedef struct dispatch_windows_s {
33460 int num_insn; /* Number of insn in the window. */
33461 int num_uops; /* Number of uops in the window. */
33462 int window_size; /* Number of bytes in the window. */
33463 int window_num; /* Window number between 0 or 1. */
33464 int num_imm; /* Number of immediates in an insn. */
33465 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33466 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33467 int imm_size; /* Total immediates in the window. */
33468 int num_loads; /* Total memory loads in the window. */
33469 int num_stores; /* Total memory stores in the window. */
33470 int violation; /* Violation exists in window. */
33471 sched_insn_info *window; /* Pointer to the window. */
33472 struct dispatch_windows_s *next;
33473 struct dispatch_windows_s *prev;
33474 } dispatch_windows;
33476 /* Immediate valuse used in an insn. */
33477 typedef struct imm_info_s
33484 static dispatch_windows *dispatch_window_list;
33485 static dispatch_windows *dispatch_window_list1;
33487 /* Get dispatch group of insn. */
33489 static enum dispatch_group
33490 get_mem_group (rtx insn)
33492 enum attr_memory memory;
33494 if (INSN_CODE (insn) < 0)
33495 return disp_no_group;
33496 memory = get_attr_memory (insn);
33497 if (memory == MEMORY_STORE)
33500 if (memory == MEMORY_LOAD)
33503 if (memory == MEMORY_BOTH)
33504 return disp_load_store;
33506 return disp_no_group;
33509 /* Return true if insn is a compare instruction. */
33514 enum attr_type type;
33516 type = get_attr_type (insn);
33517 return (type == TYPE_TEST
33518 || type == TYPE_ICMP
33519 || type == TYPE_FCMP
33520 || GET_CODE (PATTERN (insn)) == COMPARE);
33523 /* Return true if a dispatch violation encountered. */
33526 dispatch_violation (void)
33528 if (dispatch_window_list->next)
33529 return dispatch_window_list->next->violation;
33530 return dispatch_window_list->violation;
33533 /* Return true if insn is a branch instruction. */
33536 is_branch (rtx insn)
33538 return (CALL_P (insn) || JUMP_P (insn));
33541 /* Return true if insn is a prefetch instruction. */
33544 is_prefetch (rtx insn)
33546 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33549 /* This function initializes a dispatch window and the list container holding a
33550 pointer to the window. */
33553 init_window (int window_num)
33556 dispatch_windows *new_list;
33558 if (window_num == 0)
33559 new_list = dispatch_window_list;
33561 new_list = dispatch_window_list1;
33563 new_list->num_insn = 0;
33564 new_list->num_uops = 0;
33565 new_list->window_size = 0;
33566 new_list->next = NULL;
33567 new_list->prev = NULL;
33568 new_list->window_num = window_num;
33569 new_list->num_imm = 0;
33570 new_list->num_imm_32 = 0;
33571 new_list->num_imm_64 = 0;
33572 new_list->imm_size = 0;
33573 new_list->num_loads = 0;
33574 new_list->num_stores = 0;
33575 new_list->violation = false;
33577 for (i = 0; i < MAX_INSN; i++)
33579 new_list->window[i].insn = NULL;
33580 new_list->window[i].group = disp_no_group;
33581 new_list->window[i].path = no_path;
33582 new_list->window[i].byte_len = 0;
33583 new_list->window[i].imm_bytes = 0;
33588 /* This function allocates and initializes a dispatch window and the
33589 list container holding a pointer to the window. */
33591 static dispatch_windows *
33592 allocate_window (void)
33594 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33595 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33600 /* This routine initializes the dispatch scheduling information. It
33601 initiates building dispatch scheduler tables and constructs the
33602 first dispatch window. */
33605 init_dispatch_sched (void)
33607 /* Allocate a dispatch list and a window. */
33608 dispatch_window_list = allocate_window ();
33609 dispatch_window_list1 = allocate_window ();
33614 /* This function returns true if a branch is detected. End of a basic block
33615 does not have to be a branch, but here we assume only branches end a
33619 is_end_basic_block (enum dispatch_group group)
33621 return group == disp_branch;
33624 /* This function is called when the end of a window processing is reached. */
33627 process_end_window (void)
33629 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
33630 if (dispatch_window_list->next)
33632 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
33633 gcc_assert (dispatch_window_list->window_size
33634 + dispatch_window_list1->window_size <= 48);
33640 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
33641 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
33642 for 48 bytes of instructions. Note that these windows are not dispatch
33643 windows that their sizes are DISPATCH_WINDOW_SIZE. */
33645 static dispatch_windows *
33646 allocate_next_window (int window_num)
33648 if (window_num == 0)
33650 if (dispatch_window_list->next)
33653 return dispatch_window_list;
33656 dispatch_window_list->next = dispatch_window_list1;
33657 dispatch_window_list1->prev = dispatch_window_list;
33659 return dispatch_window_list1;
33662 /* Increment the number of immediate operands of an instruction. */
33665 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
33670 switch ( GET_CODE (*in_rtx))
33675 (imm_values->imm)++;
33676 if (x86_64_immediate_operand (*in_rtx, SImode))
33677 (imm_values->imm32)++;
33679 (imm_values->imm64)++;
33683 (imm_values->imm)++;
33684 (imm_values->imm64)++;
33688 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
33690 (imm_values->imm)++;
33691 (imm_values->imm32)++;
33702 /* Compute number of immediate operands of an instruction. */
33705 find_constant (rtx in_rtx, imm_info *imm_values)
33707 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
33708 (rtx_function) find_constant_1, (void *) imm_values);
33711 /* Return total size of immediate operands of an instruction along with number
33712 of corresponding immediate-operands. It initializes its parameters to zero
33713 befor calling FIND_CONSTANT.
33714 INSN is the input instruction. IMM is the total of immediates.
33715 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
33719 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
33721 imm_info imm_values = {0, 0, 0};
33723 find_constant (insn, &imm_values);
33724 *imm = imm_values.imm;
33725 *imm32 = imm_values.imm32;
33726 *imm64 = imm_values.imm64;
33727 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
33730 /* This function indicates if an operand of an instruction is an
33734 has_immediate (rtx insn)
33736 int num_imm_operand;
33737 int num_imm32_operand;
33738 int num_imm64_operand;
33741 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33742 &num_imm64_operand);
33746 /* Return single or double path for instructions. */
33748 static enum insn_path
33749 get_insn_path (rtx insn)
33751 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
33753 if ((int)path == 0)
33754 return path_single;
33756 if ((int)path == 1)
33757 return path_double;
33762 /* Return insn dispatch group. */
33764 static enum dispatch_group
33765 get_insn_group (rtx insn)
33767 enum dispatch_group group = get_mem_group (insn);
33771 if (is_branch (insn))
33772 return disp_branch;
33777 if (has_immediate (insn))
33780 if (is_prefetch (insn))
33781 return disp_prefetch;
33783 return disp_no_group;
33786 /* Count number of GROUP restricted instructions in a dispatch
33787 window WINDOW_LIST. */
33790 count_num_restricted (rtx insn, dispatch_windows *window_list)
33792 enum dispatch_group group = get_insn_group (insn);
33794 int num_imm_operand;
33795 int num_imm32_operand;
33796 int num_imm64_operand;
33798 if (group == disp_no_group)
33801 if (group == disp_imm)
33803 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33804 &num_imm64_operand);
33805 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
33806 || num_imm_operand + window_list->num_imm > MAX_IMM
33807 || (num_imm32_operand > 0
33808 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
33809 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
33810 || (num_imm64_operand > 0
33811 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
33812 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
33813 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
33814 && num_imm64_operand > 0
33815 && ((window_list->num_imm_64 > 0
33816 && window_list->num_insn >= 2)
33817 || window_list->num_insn >= 3)))
33823 if ((group == disp_load_store
33824 && (window_list->num_loads >= MAX_LOAD
33825 || window_list->num_stores >= MAX_STORE))
33826 || ((group == disp_load
33827 || group == disp_prefetch)
33828 && window_list->num_loads >= MAX_LOAD)
33829 || (group == disp_store
33830 && window_list->num_stores >= MAX_STORE))
33836 /* This function returns true if insn satisfies dispatch rules on the
33837 last window scheduled. */
33840 fits_dispatch_window (rtx insn)
33842 dispatch_windows *window_list = dispatch_window_list;
33843 dispatch_windows *window_list_next = dispatch_window_list->next;
33844 unsigned int num_restrict;
33845 enum dispatch_group group = get_insn_group (insn);
33846 enum insn_path path = get_insn_path (insn);
33849 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
33850 instructions should be given the lowest priority in the
33851 scheduling process in Haifa scheduler to make sure they will be
33852 scheduled in the same dispatch window as the refrence to them. */
33853 if (group == disp_jcc || group == disp_cmp)
33856 /* Check nonrestricted. */
33857 if (group == disp_no_group || group == disp_branch)
33860 /* Get last dispatch window. */
33861 if (window_list_next)
33862 window_list = window_list_next;
33864 if (window_list->window_num == 1)
33866 sum = window_list->prev->window_size + window_list->window_size;
33869 || (min_insn_size (insn) + sum) >= 48)
33870 /* Window 1 is full. Go for next window. */
33874 num_restrict = count_num_restricted (insn, window_list);
33876 if (num_restrict > num_allowable_groups[group])
33879 /* See if it fits in the first window. */
33880 if (window_list->window_num == 0)
33882 /* The first widow should have only single and double path
33884 if (path == path_double
33885 && (window_list->num_uops + 2) > MAX_INSN)
33887 else if (path != path_single)
33893 /* Add an instruction INSN with NUM_UOPS micro-operations to the
33894 dispatch window WINDOW_LIST. */
33897 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
33899 int byte_len = min_insn_size (insn);
33900 int num_insn = window_list->num_insn;
33902 sched_insn_info *window = window_list->window;
33903 enum dispatch_group group = get_insn_group (insn);
33904 enum insn_path path = get_insn_path (insn);
33905 int num_imm_operand;
33906 int num_imm32_operand;
33907 int num_imm64_operand;
33909 if (!window_list->violation && group != disp_cmp
33910 && !fits_dispatch_window (insn))
33911 window_list->violation = true;
33913 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33914 &num_imm64_operand);
33916 /* Initialize window with new instruction. */
33917 window[num_insn].insn = insn;
33918 window[num_insn].byte_len = byte_len;
33919 window[num_insn].group = group;
33920 window[num_insn].path = path;
33921 window[num_insn].imm_bytes = imm_size;
33923 window_list->window_size += byte_len;
33924 window_list->num_insn = num_insn + 1;
33925 window_list->num_uops = window_list->num_uops + num_uops;
33926 window_list->imm_size += imm_size;
33927 window_list->num_imm += num_imm_operand;
33928 window_list->num_imm_32 += num_imm32_operand;
33929 window_list->num_imm_64 += num_imm64_operand;
33931 if (group == disp_store)
33932 window_list->num_stores += 1;
33933 else if (group == disp_load
33934 || group == disp_prefetch)
33935 window_list->num_loads += 1;
33936 else if (group == disp_load_store)
33938 window_list->num_stores += 1;
33939 window_list->num_loads += 1;
33943 /* Adds a scheduled instruction, INSN, to the current dispatch window.
33944 If the total bytes of instructions or the number of instructions in
33945 the window exceed allowable, it allocates a new window. */
33948 add_to_dispatch_window (rtx insn)
33951 dispatch_windows *window_list;
33952 dispatch_windows *next_list;
33953 dispatch_windows *window0_list;
33954 enum insn_path path;
33955 enum dispatch_group insn_group;
33963 if (INSN_CODE (insn) < 0)
33966 byte_len = min_insn_size (insn);
33967 window_list = dispatch_window_list;
33968 next_list = window_list->next;
33969 path = get_insn_path (insn);
33970 insn_group = get_insn_group (insn);
33972 /* Get the last dispatch window. */
33974 window_list = dispatch_window_list->next;
33976 if (path == path_single)
33978 else if (path == path_double)
33981 insn_num_uops = (int) path;
33983 /* If current window is full, get a new window.
33984 Window number zero is full, if MAX_INSN uops are scheduled in it.
33985 Window number one is full, if window zero's bytes plus window
33986 one's bytes is 32, or if the bytes of the new instruction added
33987 to the total makes it greater than 48, or it has already MAX_INSN
33988 instructions in it. */
33989 num_insn = window_list->num_insn;
33990 num_uops = window_list->num_uops;
33991 window_num = window_list->window_num;
33992 insn_fits = fits_dispatch_window (insn);
33994 if (num_insn >= MAX_INSN
33995 || num_uops + insn_num_uops > MAX_INSN
33998 window_num = ~window_num & 1;
33999 window_list = allocate_next_window (window_num);
34002 if (window_num == 0)
34004 add_insn_window (insn, window_list, insn_num_uops);
34005 if (window_list->num_insn >= MAX_INSN
34006 && insn_group == disp_branch)
34008 process_end_window ();
34012 else if (window_num == 1)
34014 window0_list = window_list->prev;
34015 sum = window0_list->window_size + window_list->window_size;
34017 || (byte_len + sum) >= 48)
34019 process_end_window ();
34020 window_list = dispatch_window_list;
34023 add_insn_window (insn, window_list, insn_num_uops);
34026 gcc_unreachable ();
34028 if (is_end_basic_block (insn_group))
34030 /* End of basic block is reached do end-basic-block process. */
34031 process_end_window ();
34036 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34038 DEBUG_FUNCTION static void
34039 debug_dispatch_window_file (FILE *file, int window_num)
34041 dispatch_windows *list;
34044 if (window_num == 0)
34045 list = dispatch_window_list;
34047 list = dispatch_window_list1;
34049 fprintf (file, "Window #%d:\n", list->window_num);
34050 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34051 list->num_insn, list->num_uops, list->window_size);
34052 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34053 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34055 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34057 fprintf (file, " insn info:\n");
34059 for (i = 0; i < MAX_INSN; i++)
34061 if (!list->window[i].insn)
34063 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34064 i, group_name[list->window[i].group],
34065 i, (void *)list->window[i].insn,
34066 i, list->window[i].path,
34067 i, list->window[i].byte_len,
34068 i, list->window[i].imm_bytes);
34072 /* Print to stdout a dispatch window. */
34074 DEBUG_FUNCTION void
34075 debug_dispatch_window (int window_num)
34077 debug_dispatch_window_file (stdout, window_num);
34080 /* Print INSN dispatch information to FILE. */
34082 DEBUG_FUNCTION static void
34083 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34086 enum insn_path path;
34087 enum dispatch_group group;
34089 int num_imm_operand;
34090 int num_imm32_operand;
34091 int num_imm64_operand;
34093 if (INSN_CODE (insn) < 0)
34096 byte_len = min_insn_size (insn);
34097 path = get_insn_path (insn);
34098 group = get_insn_group (insn);
34099 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34100 &num_imm64_operand);
34102 fprintf (file, " insn info:\n");
34103 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34104 group_name[group], path, byte_len);
34105 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34106 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34109 /* Print to STDERR the status of the ready list with respect to
34110 dispatch windows. */
34112 DEBUG_FUNCTION void
34113 debug_ready_dispatch (void)
34116 int no_ready = number_in_ready ();
34118 fprintf (stdout, "Number of ready: %d\n", no_ready);
34120 for (i = 0; i < no_ready; i++)
34121 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34124 /* This routine is the driver of the dispatch scheduler. */
34127 do_dispatch (rtx insn, int mode)
34129 if (mode == DISPATCH_INIT)
34130 init_dispatch_sched ();
34131 else if (mode == ADD_TO_DISPATCH_WINDOW)
34132 add_to_dispatch_window (insn);
34135 /* Return TRUE if Dispatch Scheduling is supported. */
34138 has_dispatch (rtx insn, int action)
34140 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34146 case IS_DISPATCH_ON:
34151 return is_cmp (insn);
34153 case DISPATCH_VIOLATION:
34154 return dispatch_violation ();
34156 case FITS_DISPATCH_WINDOW:
34157 return fits_dispatch_window (insn);
34163 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34164 place emms and femms instructions. */
34166 static enum machine_mode
34167 ix86_preferred_simd_mode (enum machine_mode mode)
34169 /* Disable double precision vectorizer if needed. */
34170 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34173 if (!TARGET_AVX && !TARGET_SSE)
34179 return TARGET_AVX ? V8SFmode : V4SFmode;
34181 return TARGET_AVX ? V4DFmode : V2DFmode;
34197 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34200 static unsigned int
34201 ix86_autovectorize_vector_sizes (void)
34203 return TARGET_AVX ? 32 | 16 : 0;
34206 /* Initialize the GCC target structure. */
34207 #undef TARGET_RETURN_IN_MEMORY
34208 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34210 #undef TARGET_LEGITIMIZE_ADDRESS
34211 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34213 #undef TARGET_ATTRIBUTE_TABLE
34214 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34215 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34216 # undef TARGET_MERGE_DECL_ATTRIBUTES
34217 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34220 #undef TARGET_COMP_TYPE_ATTRIBUTES
34221 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34223 #undef TARGET_INIT_BUILTINS
34224 #define TARGET_INIT_BUILTINS ix86_init_builtins
34225 #undef TARGET_BUILTIN_DECL
34226 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34227 #undef TARGET_EXPAND_BUILTIN
34228 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34230 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34231 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34232 ix86_builtin_vectorized_function
34234 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34235 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34237 #undef TARGET_BUILTIN_RECIPROCAL
34238 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34240 #undef TARGET_ASM_FUNCTION_EPILOGUE
34241 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34243 #undef TARGET_ENCODE_SECTION_INFO
34244 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34245 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34247 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34250 #undef TARGET_ASM_OPEN_PAREN
34251 #define TARGET_ASM_OPEN_PAREN ""
34252 #undef TARGET_ASM_CLOSE_PAREN
34253 #define TARGET_ASM_CLOSE_PAREN ""
34255 #undef TARGET_ASM_BYTE_OP
34256 #define TARGET_ASM_BYTE_OP ASM_BYTE
34258 #undef TARGET_ASM_ALIGNED_HI_OP
34259 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34260 #undef TARGET_ASM_ALIGNED_SI_OP
34261 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34263 #undef TARGET_ASM_ALIGNED_DI_OP
34264 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34267 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34268 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34270 #undef TARGET_ASM_UNALIGNED_HI_OP
34271 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34272 #undef TARGET_ASM_UNALIGNED_SI_OP
34273 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34274 #undef TARGET_ASM_UNALIGNED_DI_OP
34275 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34277 #undef TARGET_PRINT_OPERAND
34278 #define TARGET_PRINT_OPERAND ix86_print_operand
34279 #undef TARGET_PRINT_OPERAND_ADDRESS
34280 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34281 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34282 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34283 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34284 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34286 #undef TARGET_SCHED_INIT_GLOBAL
34287 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34288 #undef TARGET_SCHED_ADJUST_COST
34289 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34290 #undef TARGET_SCHED_ISSUE_RATE
34291 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34292 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34293 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34294 ia32_multipass_dfa_lookahead
34296 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34297 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34300 #undef TARGET_HAVE_TLS
34301 #define TARGET_HAVE_TLS true
34303 #undef TARGET_CANNOT_FORCE_CONST_MEM
34304 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34305 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34306 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34308 #undef TARGET_DELEGITIMIZE_ADDRESS
34309 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34311 #undef TARGET_MS_BITFIELD_LAYOUT_P
34312 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34315 #undef TARGET_BINDS_LOCAL_P
34316 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34318 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34319 #undef TARGET_BINDS_LOCAL_P
34320 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34323 #undef TARGET_ASM_OUTPUT_MI_THUNK
34324 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34325 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34326 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34328 #undef TARGET_ASM_FILE_START
34329 #define TARGET_ASM_FILE_START x86_file_start
34331 #undef TARGET_DEFAULT_TARGET_FLAGS
34332 #define TARGET_DEFAULT_TARGET_FLAGS \
34334 | TARGET_SUBTARGET_DEFAULT \
34335 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
34338 #undef TARGET_HANDLE_OPTION
34339 #define TARGET_HANDLE_OPTION ix86_handle_option
34341 #undef TARGET_OPTION_OVERRIDE
34342 #define TARGET_OPTION_OVERRIDE ix86_option_override
34343 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34344 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34345 #undef TARGET_OPTION_INIT_STRUCT
34346 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34348 #undef TARGET_REGISTER_MOVE_COST
34349 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34350 #undef TARGET_MEMORY_MOVE_COST
34351 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34352 #undef TARGET_RTX_COSTS
34353 #define TARGET_RTX_COSTS ix86_rtx_costs
34354 #undef TARGET_ADDRESS_COST
34355 #define TARGET_ADDRESS_COST ix86_address_cost
34357 #undef TARGET_FIXED_CONDITION_CODE_REGS
34358 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34359 #undef TARGET_CC_MODES_COMPATIBLE
34360 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34362 #undef TARGET_MACHINE_DEPENDENT_REORG
34363 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34365 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34366 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34368 #undef TARGET_BUILD_BUILTIN_VA_LIST
34369 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34371 #undef TARGET_ENUM_VA_LIST_P
34372 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34374 #undef TARGET_FN_ABI_VA_LIST
34375 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34377 #undef TARGET_CANONICAL_VA_LIST_TYPE
34378 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34380 #undef TARGET_EXPAND_BUILTIN_VA_START
34381 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34383 #undef TARGET_MD_ASM_CLOBBERS
34384 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34386 #undef TARGET_PROMOTE_PROTOTYPES
34387 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34388 #undef TARGET_STRUCT_VALUE_RTX
34389 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34390 #undef TARGET_SETUP_INCOMING_VARARGS
34391 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34392 #undef TARGET_MUST_PASS_IN_STACK
34393 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34394 #undef TARGET_FUNCTION_ARG_ADVANCE
34395 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34396 #undef TARGET_FUNCTION_ARG
34397 #define TARGET_FUNCTION_ARG ix86_function_arg
34398 #undef TARGET_PASS_BY_REFERENCE
34399 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34400 #undef TARGET_INTERNAL_ARG_POINTER
34401 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34402 #undef TARGET_UPDATE_STACK_BOUNDARY
34403 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34404 #undef TARGET_GET_DRAP_RTX
34405 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34406 #undef TARGET_STRICT_ARGUMENT_NAMING
34407 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34408 #undef TARGET_STATIC_CHAIN
34409 #define TARGET_STATIC_CHAIN ix86_static_chain
34410 #undef TARGET_TRAMPOLINE_INIT
34411 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34412 #undef TARGET_RETURN_POPS_ARGS
34413 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34415 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34416 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34418 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34419 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34421 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34422 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34424 #undef TARGET_C_MODE_FOR_SUFFIX
34425 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34428 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34429 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34432 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34433 #undef TARGET_INSERT_ATTRIBUTES
34434 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34437 #undef TARGET_MANGLE_TYPE
34438 #define TARGET_MANGLE_TYPE ix86_mangle_type
34440 #undef TARGET_STACK_PROTECT_FAIL
34441 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34443 #undef TARGET_SUPPORTS_SPLIT_STACK
34444 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34446 #undef TARGET_FUNCTION_VALUE
34447 #define TARGET_FUNCTION_VALUE ix86_function_value
34449 #undef TARGET_FUNCTION_VALUE_REGNO_P
34450 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34452 #undef TARGET_SECONDARY_RELOAD
34453 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34455 #undef TARGET_PREFERRED_RELOAD_CLASS
34456 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34457 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34458 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34459 #undef TARGET_CLASS_LIKELY_SPILLED_P
34460 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34462 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34463 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34464 ix86_builtin_vectorization_cost
34465 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34466 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34467 ix86_vectorize_builtin_vec_perm
34468 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34469 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34470 ix86_vectorize_builtin_vec_perm_ok
34471 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34472 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34473 ix86_preferred_simd_mode
34474 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34475 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34476 ix86_autovectorize_vector_sizes
34478 #undef TARGET_SET_CURRENT_FUNCTION
34479 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34481 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34482 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34484 #undef TARGET_OPTION_SAVE
34485 #define TARGET_OPTION_SAVE ix86_function_specific_save
34487 #undef TARGET_OPTION_RESTORE
34488 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34490 #undef TARGET_OPTION_PRINT
34491 #define TARGET_OPTION_PRINT ix86_function_specific_print
34493 #undef TARGET_CAN_INLINE_P
34494 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34496 #undef TARGET_EXPAND_TO_RTL_HOOK
34497 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34499 #undef TARGET_LEGITIMATE_ADDRESS_P
34500 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34502 #undef TARGET_IRA_COVER_CLASSES
34503 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
34505 #undef TARGET_FRAME_POINTER_REQUIRED
34506 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34508 #undef TARGET_CAN_ELIMINATE
34509 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34511 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34512 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34514 #undef TARGET_ASM_CODE_END
34515 #define TARGET_ASM_CODE_END ix86_code_end
34517 struct gcc_target targetm = TARGET_INITIALIZER;
34519 #include "gt-i386.h"