1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
60 typedef struct block_info_def
62 /* TRUE if the upper 128bits of any AVX registers are live at exit. */
63 bool upper_128bits_set;
64 /* TRUE if block has been processed. */
68 #define BLOCK_INFO(B) ((block_info) (B)->aux)
70 enum call_avx256_state
72 /* Callee returns 256bit AVX register. */
73 callee_return_avx256 = -1,
74 /* Callee returns and passes 256bit AVX register. */
75 callee_return_pass_avx256,
76 /* Callee passes 256bit AVX register. */
78 /* Callee doesn't return nor passe 256bit AVX register, or no
79 256bit AVX register in function return. */
81 /* vzeroupper intrinsic. */
85 /* Check if a 256bit AVX register is referenced in stores. */
88 check_avx256_stores (rtx dest, const_rtx set, void *data)
91 && VALID_AVX256_REG_MODE (GET_MODE (dest)))
92 || (GET_CODE (set) == SET
93 && REG_P (SET_SRC (set))
94 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set)))))
96 bool *upper_128bits_set = (bool *) data;
97 *upper_128bits_set = true;
101 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
102 in basic block BB. Delete it if upper 128bit AVX registers are
103 unused. If it isn't deleted, move it to just before a jump insn.
105 UPPER_128BITS_LIVE is TRUE if the upper 128bits of any AVX registers
106 are live at entry. */
109 move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
112 rtx vzeroupper_insn = NULL_RTX;
117 fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
118 bb->index, upper_128bits_set);
121 while (insn != BB_END (bb))
123 insn = NEXT_INSN (insn);
125 if (!NONDEBUG_INSN_P (insn))
128 /* Move vzeroupper before jump/call. */
129 if (JUMP_P (insn) || CALL_P (insn))
131 if (!vzeroupper_insn)
134 if (PREV_INSN (insn) != vzeroupper_insn)
138 fprintf (dump_file, "Move vzeroupper after:\n");
139 print_rtl_single (dump_file, PREV_INSN (insn));
140 fprintf (dump_file, "before:\n");
141 print_rtl_single (dump_file, insn);
143 reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
146 vzeroupper_insn = NULL_RTX;
150 pat = PATTERN (insn);
152 /* Check insn for vzeroupper intrinsic. */
153 if (GET_CODE (pat) == UNSPEC_VOLATILE
154 && XINT (pat, 1) == UNSPECV_VZEROUPPER)
158 /* Found vzeroupper intrinsic. */
159 fprintf (dump_file, "Found vzeroupper:\n");
160 print_rtl_single (dump_file, insn);
165 /* Check insn for vzeroall intrinsic. */
166 if (GET_CODE (pat) == PARALLEL
167 && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
168 && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
170 upper_128bits_set = false;
172 /* Delete pending vzeroupper insertion. */
175 delete_insn (vzeroupper_insn);
176 vzeroupper_insn = NULL_RTX;
179 else if (!upper_128bits_set)
180 note_stores (pat, check_avx256_stores, &upper_128bits_set);
184 /* Process vzeroupper intrinsic. */
185 avx256 = INTVAL (XVECEXP (pat, 0, 0));
187 if (!upper_128bits_set)
189 /* Since the upper 128bits are cleared, callee must not pass
190 256bit AVX register. We only need to check if callee
191 returns 256bit AVX register. */
192 upper_128bits_set = (avx256 == callee_return_avx256);
194 /* Remove unnecessary vzeroupper since
195 upper 128bits are cleared. */
198 fprintf (dump_file, "Delete redundant vzeroupper:\n");
199 print_rtl_single (dump_file, insn);
203 else if (avx256 == callee_return_pass_avx256
204 || avx256 == callee_pass_avx256)
206 /* Callee passes 256bit AVX register. Check if callee
207 returns 256bit AVX register. */
208 upper_128bits_set = (avx256 == callee_return_pass_avx256);
210 /* Must remove vzeroupper since
211 callee passes in 256bit AVX register. */
214 fprintf (dump_file, "Delete callee pass vzeroupper:\n");
215 print_rtl_single (dump_file, insn);
221 upper_128bits_set = false;
222 vzeroupper_insn = insn;
226 BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
229 fprintf (dump_file, " BB [%i] exit: upper 128bits: %d\n",
230 bb->index, upper_128bits_set);
233 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
234 in BLOCK and its predecessor blocks recursively. */
237 move_or_delete_vzeroupper_1 (basic_block block)
241 bool upper_128bits_set;
244 fprintf (dump_file, " Process BB [%i]: status: %d\n",
245 block->index, BLOCK_INFO (block)->done);
247 if (BLOCK_INFO (block)->done)
250 BLOCK_INFO (block)->done = true;
252 upper_128bits_set = false;
254 /* Process all predecessor edges of this block. */
255 FOR_EACH_EDGE (e, ei, block->preds)
259 move_or_delete_vzeroupper_1 (e->src);
260 if (BLOCK_INFO (e->src)->upper_128bits_set)
261 upper_128bits_set = true;
264 /* Process this block. */
265 move_or_delete_vzeroupper_2 (block, upper_128bits_set);
268 /* Go through the instruction stream looking for vzeroupper. Delete
269 it if upper 128bit AVX registers are unused. If it isn't deleted,
270 move it to just before a jump insn. */
273 move_or_delete_vzeroupper (void)
278 /* Set up block info for each basic block. */
279 alloc_aux_for_blocks (sizeof (struct block_info_def));
281 /* Process successor blocks of all entry points. */
283 fprintf (dump_file, "Process all entry points\n");
285 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
287 move_or_delete_vzeroupper_2 (e->dest,
288 cfun->machine->caller_pass_avx256_p);
289 BLOCK_INFO (e->dest)->done = true;
292 /* Process predecessor blocks of all exit points. */
294 fprintf (dump_file, "Process all exit points\n");
296 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
297 move_or_delete_vzeroupper_1 (e->src);
299 free_aux_for_blocks ();
302 static rtx legitimize_dllimport_symbol (rtx, bool);
304 #ifndef CHECK_STACK_LIMIT
305 #define CHECK_STACK_LIMIT (-1)
308 /* Return index of given mode in mult and division cost tables. */
309 #define MODE_INDEX(mode) \
310 ((mode) == QImode ? 0 \
311 : (mode) == HImode ? 1 \
312 : (mode) == SImode ? 2 \
313 : (mode) == DImode ? 3 \
316 /* Processor costs (relative to an add) */
317 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
318 #define COSTS_N_BYTES(N) ((N) * 2)
320 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
323 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
324 COSTS_N_BYTES (2), /* cost of an add instruction */
325 COSTS_N_BYTES (3), /* cost of a lea instruction */
326 COSTS_N_BYTES (2), /* variable shift costs */
327 COSTS_N_BYTES (3), /* constant shift costs */
328 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
329 COSTS_N_BYTES (3), /* HI */
330 COSTS_N_BYTES (3), /* SI */
331 COSTS_N_BYTES (3), /* DI */
332 COSTS_N_BYTES (5)}, /* other */
333 0, /* cost of multiply per each bit set */
334 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
335 COSTS_N_BYTES (3), /* HI */
336 COSTS_N_BYTES (3), /* SI */
337 COSTS_N_BYTES (3), /* DI */
338 COSTS_N_BYTES (5)}, /* other */
339 COSTS_N_BYTES (3), /* cost of movsx */
340 COSTS_N_BYTES (3), /* cost of movzx */
341 0, /* "large" insn */
343 2, /* cost for loading QImode using movzbl */
344 {2, 2, 2}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {2, 2, 2}, /* cost of storing integer registers */
348 2, /* cost of reg,reg fld/fst */
349 {2, 2, 2}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {2, 2, 2}, /* cost of storing fp registers
352 in SFmode, DFmode and XFmode */
353 3, /* cost of moving MMX register */
354 {3, 3}, /* cost of loading MMX registers
355 in SImode and DImode */
356 {3, 3}, /* cost of storing MMX registers
357 in SImode and DImode */
358 3, /* cost of moving SSE register */
359 {3, 3, 3}, /* cost of loading SSE registers
360 in SImode, DImode and TImode */
361 {3, 3, 3}, /* cost of storing SSE registers
362 in SImode, DImode and TImode */
363 3, /* MMX or SSE register to integer */
364 0, /* size of l1 cache */
365 0, /* size of l2 cache */
366 0, /* size of prefetch block */
367 0, /* number of parallel prefetches */
369 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
370 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
371 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
372 COSTS_N_BYTES (2), /* cost of FABS instruction. */
373 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
374 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
375 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
376 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
377 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
378 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
379 1, /* scalar_stmt_cost. */
380 1, /* scalar load_cost. */
381 1, /* scalar_store_cost. */
382 1, /* vec_stmt_cost. */
383 1, /* vec_to_scalar_cost. */
384 1, /* scalar_to_vec_cost. */
385 1, /* vec_align_load_cost. */
386 1, /* vec_unalign_load_cost. */
387 1, /* vec_store_cost. */
388 1, /* cond_taken_branch_cost. */
389 1, /* cond_not_taken_branch_cost. */
392 /* Processor costs (relative to an add) */
394 struct processor_costs i386_cost = { /* 386 specific costs */
395 COSTS_N_INSNS (1), /* cost of an add instruction */
396 COSTS_N_INSNS (1), /* cost of a lea instruction */
397 COSTS_N_INSNS (3), /* variable shift costs */
398 COSTS_N_INSNS (2), /* constant shift costs */
399 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
400 COSTS_N_INSNS (6), /* HI */
401 COSTS_N_INSNS (6), /* SI */
402 COSTS_N_INSNS (6), /* DI */
403 COSTS_N_INSNS (6)}, /* other */
404 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
405 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
406 COSTS_N_INSNS (23), /* HI */
407 COSTS_N_INSNS (23), /* SI */
408 COSTS_N_INSNS (23), /* DI */
409 COSTS_N_INSNS (23)}, /* other */
410 COSTS_N_INSNS (3), /* cost of movsx */
411 COSTS_N_INSNS (2), /* cost of movzx */
412 15, /* "large" insn */
414 4, /* cost for loading QImode using movzbl */
415 {2, 4, 2}, /* cost of loading integer registers
416 in QImode, HImode and SImode.
417 Relative to reg-reg move (2). */
418 {2, 4, 2}, /* cost of storing integer registers */
419 2, /* cost of reg,reg fld/fst */
420 {8, 8, 8}, /* cost of loading fp registers
421 in SFmode, DFmode and XFmode */
422 {8, 8, 8}, /* cost of storing fp registers
423 in SFmode, DFmode and XFmode */
424 2, /* cost of moving MMX register */
425 {4, 8}, /* cost of loading MMX registers
426 in SImode and DImode */
427 {4, 8}, /* cost of storing MMX registers
428 in SImode and DImode */
429 2, /* cost of moving SSE register */
430 {4, 8, 16}, /* cost of loading SSE registers
431 in SImode, DImode and TImode */
432 {4, 8, 16}, /* cost of storing SSE registers
433 in SImode, DImode and TImode */
434 3, /* MMX or SSE register to integer */
435 0, /* size of l1 cache */
436 0, /* size of l2 cache */
437 0, /* size of prefetch block */
438 0, /* number of parallel prefetches */
440 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
441 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
442 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
443 COSTS_N_INSNS (22), /* cost of FABS instruction. */
444 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
445 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
446 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
447 DUMMY_STRINGOP_ALGS},
448 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
449 DUMMY_STRINGOP_ALGS},
450 1, /* scalar_stmt_cost. */
451 1, /* scalar load_cost. */
452 1, /* scalar_store_cost. */
453 1, /* vec_stmt_cost. */
454 1, /* vec_to_scalar_cost. */
455 1, /* scalar_to_vec_cost. */
456 1, /* vec_align_load_cost. */
457 2, /* vec_unalign_load_cost. */
458 1, /* vec_store_cost. */
459 3, /* cond_taken_branch_cost. */
460 1, /* cond_not_taken_branch_cost. */
464 struct processor_costs i486_cost = { /* 486 specific costs */
465 COSTS_N_INSNS (1), /* cost of an add instruction */
466 COSTS_N_INSNS (1), /* cost of a lea instruction */
467 COSTS_N_INSNS (3), /* variable shift costs */
468 COSTS_N_INSNS (2), /* constant shift costs */
469 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
470 COSTS_N_INSNS (12), /* HI */
471 COSTS_N_INSNS (12), /* SI */
472 COSTS_N_INSNS (12), /* DI */
473 COSTS_N_INSNS (12)}, /* other */
474 1, /* cost of multiply per each bit set */
475 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
476 COSTS_N_INSNS (40), /* HI */
477 COSTS_N_INSNS (40), /* SI */
478 COSTS_N_INSNS (40), /* DI */
479 COSTS_N_INSNS (40)}, /* other */
480 COSTS_N_INSNS (3), /* cost of movsx */
481 COSTS_N_INSNS (2), /* cost of movzx */
482 15, /* "large" insn */
484 4, /* cost for loading QImode using movzbl */
485 {2, 4, 2}, /* cost of loading integer registers
486 in QImode, HImode and SImode.
487 Relative to reg-reg move (2). */
488 {2, 4, 2}, /* cost of storing integer registers */
489 2, /* cost of reg,reg fld/fst */
490 {8, 8, 8}, /* cost of loading fp registers
491 in SFmode, DFmode and XFmode */
492 {8, 8, 8}, /* cost of storing fp registers
493 in SFmode, DFmode and XFmode */
494 2, /* cost of moving MMX register */
495 {4, 8}, /* cost of loading MMX registers
496 in SImode and DImode */
497 {4, 8}, /* cost of storing MMX registers
498 in SImode and DImode */
499 2, /* cost of moving SSE register */
500 {4, 8, 16}, /* cost of loading SSE registers
501 in SImode, DImode and TImode */
502 {4, 8, 16}, /* cost of storing SSE registers
503 in SImode, DImode and TImode */
504 3, /* MMX or SSE register to integer */
505 4, /* size of l1 cache. 486 has 8kB cache
506 shared for code and data, so 4kB is
507 not really precise. */
508 4, /* size of l2 cache */
509 0, /* size of prefetch block */
510 0, /* number of parallel prefetches */
512 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
513 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
514 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
515 COSTS_N_INSNS (3), /* cost of FABS instruction. */
516 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
517 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
518 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
519 DUMMY_STRINGOP_ALGS},
520 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
521 DUMMY_STRINGOP_ALGS},
522 1, /* scalar_stmt_cost. */
523 1, /* scalar load_cost. */
524 1, /* scalar_store_cost. */
525 1, /* vec_stmt_cost. */
526 1, /* vec_to_scalar_cost. */
527 1, /* scalar_to_vec_cost. */
528 1, /* vec_align_load_cost. */
529 2, /* vec_unalign_load_cost. */
530 1, /* vec_store_cost. */
531 3, /* cond_taken_branch_cost. */
532 1, /* cond_not_taken_branch_cost. */
536 struct processor_costs pentium_cost = {
537 COSTS_N_INSNS (1), /* cost of an add instruction */
538 COSTS_N_INSNS (1), /* cost of a lea instruction */
539 COSTS_N_INSNS (4), /* variable shift costs */
540 COSTS_N_INSNS (1), /* constant shift costs */
541 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
542 COSTS_N_INSNS (11), /* HI */
543 COSTS_N_INSNS (11), /* SI */
544 COSTS_N_INSNS (11), /* DI */
545 COSTS_N_INSNS (11)}, /* other */
546 0, /* cost of multiply per each bit set */
547 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
548 COSTS_N_INSNS (25), /* HI */
549 COSTS_N_INSNS (25), /* SI */
550 COSTS_N_INSNS (25), /* DI */
551 COSTS_N_INSNS (25)}, /* other */
552 COSTS_N_INSNS (3), /* cost of movsx */
553 COSTS_N_INSNS (2), /* cost of movzx */
554 8, /* "large" insn */
556 6, /* cost for loading QImode using movzbl */
557 {2, 4, 2}, /* cost of loading integer registers
558 in QImode, HImode and SImode.
559 Relative to reg-reg move (2). */
560 {2, 4, 2}, /* cost of storing integer registers */
561 2, /* cost of reg,reg fld/fst */
562 {2, 2, 6}, /* cost of loading fp registers
563 in SFmode, DFmode and XFmode */
564 {4, 4, 6}, /* cost of storing fp registers
565 in SFmode, DFmode and XFmode */
566 8, /* cost of moving MMX register */
567 {8, 8}, /* cost of loading MMX registers
568 in SImode and DImode */
569 {8, 8}, /* cost of storing MMX registers
570 in SImode and DImode */
571 2, /* cost of moving SSE register */
572 {4, 8, 16}, /* cost of loading SSE registers
573 in SImode, DImode and TImode */
574 {4, 8, 16}, /* cost of storing SSE registers
575 in SImode, DImode and TImode */
576 3, /* MMX or SSE register to integer */
577 8, /* size of l1 cache. */
578 8, /* size of l2 cache */
579 0, /* size of prefetch block */
580 0, /* number of parallel prefetches */
582 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (1), /* cost of FABS instruction. */
586 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
588 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
589 DUMMY_STRINGOP_ALGS},
590 {{libcall, {{-1, rep_prefix_4_byte}}},
591 DUMMY_STRINGOP_ALGS},
592 1, /* scalar_stmt_cost. */
593 1, /* scalar load_cost. */
594 1, /* scalar_store_cost. */
595 1, /* vec_stmt_cost. */
596 1, /* vec_to_scalar_cost. */
597 1, /* scalar_to_vec_cost. */
598 1, /* vec_align_load_cost. */
599 2, /* vec_unalign_load_cost. */
600 1, /* vec_store_cost. */
601 3, /* cond_taken_branch_cost. */
602 1, /* cond_not_taken_branch_cost. */
606 struct processor_costs pentiumpro_cost = {
607 COSTS_N_INSNS (1), /* cost of an add instruction */
608 COSTS_N_INSNS (1), /* cost of a lea instruction */
609 COSTS_N_INSNS (1), /* variable shift costs */
610 COSTS_N_INSNS (1), /* constant shift costs */
611 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
612 COSTS_N_INSNS (4), /* HI */
613 COSTS_N_INSNS (4), /* SI */
614 COSTS_N_INSNS (4), /* DI */
615 COSTS_N_INSNS (4)}, /* other */
616 0, /* cost of multiply per each bit set */
617 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
618 COSTS_N_INSNS (17), /* HI */
619 COSTS_N_INSNS (17), /* SI */
620 COSTS_N_INSNS (17), /* DI */
621 COSTS_N_INSNS (17)}, /* other */
622 COSTS_N_INSNS (1), /* cost of movsx */
623 COSTS_N_INSNS (1), /* cost of movzx */
624 8, /* "large" insn */
626 2, /* cost for loading QImode using movzbl */
627 {4, 4, 4}, /* cost of loading integer registers
628 in QImode, HImode and SImode.
629 Relative to reg-reg move (2). */
630 {2, 2, 2}, /* cost of storing integer registers */
631 2, /* cost of reg,reg fld/fst */
632 {2, 2, 6}, /* cost of loading fp registers
633 in SFmode, DFmode and XFmode */
634 {4, 4, 6}, /* cost of storing fp registers
635 in SFmode, DFmode and XFmode */
636 2, /* cost of moving MMX register */
637 {2, 2}, /* cost of loading MMX registers
638 in SImode and DImode */
639 {2, 2}, /* cost of storing MMX registers
640 in SImode and DImode */
641 2, /* cost of moving SSE register */
642 {2, 2, 8}, /* cost of loading SSE registers
643 in SImode, DImode and TImode */
644 {2, 2, 8}, /* cost of storing SSE registers
645 in SImode, DImode and TImode */
646 3, /* MMX or SSE register to integer */
647 8, /* size of l1 cache. */
648 256, /* size of l2 cache */
649 32, /* size of prefetch block */
650 6, /* number of parallel prefetches */
652 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
653 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
654 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
657 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
658 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
659 (we ensure the alignment). For small blocks inline loop is still a
660 noticeable win, for bigger blocks either rep movsl or rep movsb is
661 way to go. Rep movsb has apparently more expensive startup time in CPU,
662 but after 4K the difference is down in the noise. */
663 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
664 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
665 DUMMY_STRINGOP_ALGS},
666 {{rep_prefix_4_byte, {{1024, unrolled_loop},
667 {8192, rep_prefix_4_byte}, {-1, libcall}}},
668 DUMMY_STRINGOP_ALGS},
669 1, /* scalar_stmt_cost. */
670 1, /* scalar load_cost. */
671 1, /* scalar_store_cost. */
672 1, /* vec_stmt_cost. */
673 1, /* vec_to_scalar_cost. */
674 1, /* scalar_to_vec_cost. */
675 1, /* vec_align_load_cost. */
676 2, /* vec_unalign_load_cost. */
677 1, /* vec_store_cost. */
678 3, /* cond_taken_branch_cost. */
679 1, /* cond_not_taken_branch_cost. */
683 struct processor_costs geode_cost = {
684 COSTS_N_INSNS (1), /* cost of an add instruction */
685 COSTS_N_INSNS (1), /* cost of a lea instruction */
686 COSTS_N_INSNS (2), /* variable shift costs */
687 COSTS_N_INSNS (1), /* constant shift costs */
688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
689 COSTS_N_INSNS (4), /* HI */
690 COSTS_N_INSNS (7), /* SI */
691 COSTS_N_INSNS (7), /* DI */
692 COSTS_N_INSNS (7)}, /* other */
693 0, /* cost of multiply per each bit set */
694 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
695 COSTS_N_INSNS (23), /* HI */
696 COSTS_N_INSNS (39), /* SI */
697 COSTS_N_INSNS (39), /* DI */
698 COSTS_N_INSNS (39)}, /* other */
699 COSTS_N_INSNS (1), /* cost of movsx */
700 COSTS_N_INSNS (1), /* cost of movzx */
701 8, /* "large" insn */
703 1, /* cost for loading QImode using movzbl */
704 {1, 1, 1}, /* cost of loading integer registers
705 in QImode, HImode and SImode.
706 Relative to reg-reg move (2). */
707 {1, 1, 1}, /* cost of storing integer registers */
708 1, /* cost of reg,reg fld/fst */
709 {1, 1, 1}, /* cost of loading fp registers
710 in SFmode, DFmode and XFmode */
711 {4, 6, 6}, /* cost of storing fp registers
712 in SFmode, DFmode and XFmode */
714 1, /* cost of moving MMX register */
715 {1, 1}, /* cost of loading MMX registers
716 in SImode and DImode */
717 {1, 1}, /* cost of storing MMX registers
718 in SImode and DImode */
719 1, /* cost of moving SSE register */
720 {1, 1, 1}, /* cost of loading SSE registers
721 in SImode, DImode and TImode */
722 {1, 1, 1}, /* cost of storing SSE registers
723 in SImode, DImode and TImode */
724 1, /* MMX or SSE register to integer */
725 64, /* size of l1 cache. */
726 128, /* size of l2 cache. */
727 32, /* size of prefetch block */
728 1, /* number of parallel prefetches */
730 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
731 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
732 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
733 COSTS_N_INSNS (1), /* cost of FABS instruction. */
734 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
735 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
736 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
737 DUMMY_STRINGOP_ALGS},
738 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
739 DUMMY_STRINGOP_ALGS},
740 1, /* scalar_stmt_cost. */
741 1, /* scalar load_cost. */
742 1, /* scalar_store_cost. */
743 1, /* vec_stmt_cost. */
744 1, /* vec_to_scalar_cost. */
745 1, /* scalar_to_vec_cost. */
746 1, /* vec_align_load_cost. */
747 2, /* vec_unalign_load_cost. */
748 1, /* vec_store_cost. */
749 3, /* cond_taken_branch_cost. */
750 1, /* cond_not_taken_branch_cost. */
754 struct processor_costs k6_cost = {
755 COSTS_N_INSNS (1), /* cost of an add instruction */
756 COSTS_N_INSNS (2), /* cost of a lea instruction */
757 COSTS_N_INSNS (1), /* variable shift costs */
758 COSTS_N_INSNS (1), /* constant shift costs */
759 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
760 COSTS_N_INSNS (3), /* HI */
761 COSTS_N_INSNS (3), /* SI */
762 COSTS_N_INSNS (3), /* DI */
763 COSTS_N_INSNS (3)}, /* other */
764 0, /* cost of multiply per each bit set */
765 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
766 COSTS_N_INSNS (18), /* HI */
767 COSTS_N_INSNS (18), /* SI */
768 COSTS_N_INSNS (18), /* DI */
769 COSTS_N_INSNS (18)}, /* other */
770 COSTS_N_INSNS (2), /* cost of movsx */
771 COSTS_N_INSNS (2), /* cost of movzx */
772 8, /* "large" insn */
774 3, /* cost for loading QImode using movzbl */
775 {4, 5, 4}, /* cost of loading integer registers
776 in QImode, HImode and SImode.
777 Relative to reg-reg move (2). */
778 {2, 3, 2}, /* cost of storing integer registers */
779 4, /* cost of reg,reg fld/fst */
780 {6, 6, 6}, /* cost of loading fp registers
781 in SFmode, DFmode and XFmode */
782 {4, 4, 4}, /* cost of storing fp registers
783 in SFmode, DFmode and XFmode */
784 2, /* cost of moving MMX register */
785 {2, 2}, /* cost of loading MMX registers
786 in SImode and DImode */
787 {2, 2}, /* cost of storing MMX registers
788 in SImode and DImode */
789 2, /* cost of moving SSE register */
790 {2, 2, 8}, /* cost of loading SSE registers
791 in SImode, DImode and TImode */
792 {2, 2, 8}, /* cost of storing SSE registers
793 in SImode, DImode and TImode */
794 6, /* MMX or SSE register to integer */
795 32, /* size of l1 cache. */
796 32, /* size of l2 cache. Some models
797 have integrated l2 cache, but
798 optimizing for k6 is not important
799 enough to worry about that. */
800 32, /* size of prefetch block */
801 1, /* number of parallel prefetches */
803 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
804 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
805 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
806 COSTS_N_INSNS (2), /* cost of FABS instruction. */
807 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
808 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
809 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
810 DUMMY_STRINGOP_ALGS},
811 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
812 DUMMY_STRINGOP_ALGS},
813 1, /* scalar_stmt_cost. */
814 1, /* scalar load_cost. */
815 1, /* scalar_store_cost. */
816 1, /* vec_stmt_cost. */
817 1, /* vec_to_scalar_cost. */
818 1, /* scalar_to_vec_cost. */
819 1, /* vec_align_load_cost. */
820 2, /* vec_unalign_load_cost. */
821 1, /* vec_store_cost. */
822 3, /* cond_taken_branch_cost. */
823 1, /* cond_not_taken_branch_cost. */
827 struct processor_costs athlon_cost = {
828 COSTS_N_INSNS (1), /* cost of an add instruction */
829 COSTS_N_INSNS (2), /* cost of a lea instruction */
830 COSTS_N_INSNS (1), /* variable shift costs */
831 COSTS_N_INSNS (1), /* constant shift costs */
832 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
833 COSTS_N_INSNS (5), /* HI */
834 COSTS_N_INSNS (5), /* SI */
835 COSTS_N_INSNS (5), /* DI */
836 COSTS_N_INSNS (5)}, /* other */
837 0, /* cost of multiply per each bit set */
838 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
839 COSTS_N_INSNS (26), /* HI */
840 COSTS_N_INSNS (42), /* SI */
841 COSTS_N_INSNS (74), /* DI */
842 COSTS_N_INSNS (74)}, /* other */
843 COSTS_N_INSNS (1), /* cost of movsx */
844 COSTS_N_INSNS (1), /* cost of movzx */
845 8, /* "large" insn */
847 4, /* cost for loading QImode using movzbl */
848 {3, 4, 3}, /* cost of loading integer registers
849 in QImode, HImode and SImode.
850 Relative to reg-reg move (2). */
851 {3, 4, 3}, /* cost of storing integer registers */
852 4, /* cost of reg,reg fld/fst */
853 {4, 4, 12}, /* cost of loading fp registers
854 in SFmode, DFmode and XFmode */
855 {6, 6, 8}, /* cost of storing fp registers
856 in SFmode, DFmode and XFmode */
857 2, /* cost of moving MMX register */
858 {4, 4}, /* cost of loading MMX registers
859 in SImode and DImode */
860 {4, 4}, /* cost of storing MMX registers
861 in SImode and DImode */
862 2, /* cost of moving SSE register */
863 {4, 4, 6}, /* cost of loading SSE registers
864 in SImode, DImode and TImode */
865 {4, 4, 5}, /* cost of storing SSE registers
866 in SImode, DImode and TImode */
867 5, /* MMX or SSE register to integer */
868 64, /* size of l1 cache. */
869 256, /* size of l2 cache. */
870 64, /* size of prefetch block */
871 6, /* number of parallel prefetches */
873 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
874 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
875 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
876 COSTS_N_INSNS (2), /* cost of FABS instruction. */
877 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
878 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
879 /* For some reason, Athlon deals better with REP prefix (relative to loops)
880 compared to K8. Alignment becomes important after 8 bytes for memcpy and
881 128 bytes for memset. */
882 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
883 DUMMY_STRINGOP_ALGS},
884 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
885 DUMMY_STRINGOP_ALGS},
886 1, /* scalar_stmt_cost. */
887 1, /* scalar load_cost. */
888 1, /* scalar_store_cost. */
889 1, /* vec_stmt_cost. */
890 1, /* vec_to_scalar_cost. */
891 1, /* scalar_to_vec_cost. */
892 1, /* vec_align_load_cost. */
893 2, /* vec_unalign_load_cost. */
894 1, /* vec_store_cost. */
895 3, /* cond_taken_branch_cost. */
896 1, /* cond_not_taken_branch_cost. */
900 struct processor_costs k8_cost = {
901 COSTS_N_INSNS (1), /* cost of an add instruction */
902 COSTS_N_INSNS (2), /* cost of a lea instruction */
903 COSTS_N_INSNS (1), /* variable shift costs */
904 COSTS_N_INSNS (1), /* constant shift costs */
905 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
906 COSTS_N_INSNS (4), /* HI */
907 COSTS_N_INSNS (3), /* SI */
908 COSTS_N_INSNS (4), /* DI */
909 COSTS_N_INSNS (5)}, /* other */
910 0, /* cost of multiply per each bit set */
911 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
912 COSTS_N_INSNS (26), /* HI */
913 COSTS_N_INSNS (42), /* SI */
914 COSTS_N_INSNS (74), /* DI */
915 COSTS_N_INSNS (74)}, /* other */
916 COSTS_N_INSNS (1), /* cost of movsx */
917 COSTS_N_INSNS (1), /* cost of movzx */
918 8, /* "large" insn */
920 4, /* cost for loading QImode using movzbl */
921 {3, 4, 3}, /* cost of loading integer registers
922 in QImode, HImode and SImode.
923 Relative to reg-reg move (2). */
924 {3, 4, 3}, /* cost of storing integer registers */
925 4, /* cost of reg,reg fld/fst */
926 {4, 4, 12}, /* cost of loading fp registers
927 in SFmode, DFmode and XFmode */
928 {6, 6, 8}, /* cost of storing fp registers
929 in SFmode, DFmode and XFmode */
930 2, /* cost of moving MMX register */
931 {3, 3}, /* cost of loading MMX registers
932 in SImode and DImode */
933 {4, 4}, /* cost of storing MMX registers
934 in SImode and DImode */
935 2, /* cost of moving SSE register */
936 {4, 3, 6}, /* cost of loading SSE registers
937 in SImode, DImode and TImode */
938 {4, 4, 5}, /* cost of storing SSE registers
939 in SImode, DImode and TImode */
940 5, /* MMX or SSE register to integer */
941 64, /* size of l1 cache. */
942 512, /* size of l2 cache. */
943 64, /* size of prefetch block */
944 /* New AMD processors never drop prefetches; if they cannot be performed
945 immediately, they are queued. We set number of simultaneous prefetches
946 to a large constant to reflect this (it probably is not a good idea not
947 to limit number of prefetches at all, as their execution also takes some
949 100, /* number of parallel prefetches */
951 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
952 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
953 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
954 COSTS_N_INSNS (2), /* cost of FABS instruction. */
955 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
956 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
957 /* K8 has optimized REP instruction for medium sized blocks, but for very
958 small blocks it is better to use loop. For large blocks, libcall can
959 do nontemporary accesses and beat inline considerably. */
960 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
961 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
962 {{libcall, {{8, loop}, {24, unrolled_loop},
963 {2048, rep_prefix_4_byte}, {-1, libcall}}},
964 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
965 4, /* scalar_stmt_cost. */
966 2, /* scalar load_cost. */
967 2, /* scalar_store_cost. */
968 5, /* vec_stmt_cost. */
969 0, /* vec_to_scalar_cost. */
970 2, /* scalar_to_vec_cost. */
971 2, /* vec_align_load_cost. */
972 3, /* vec_unalign_load_cost. */
973 3, /* vec_store_cost. */
974 3, /* cond_taken_branch_cost. */
975 2, /* cond_not_taken_branch_cost. */
978 struct processor_costs amdfam10_cost = {
979 COSTS_N_INSNS (1), /* cost of an add instruction */
980 COSTS_N_INSNS (2), /* cost of a lea instruction */
981 COSTS_N_INSNS (1), /* variable shift costs */
982 COSTS_N_INSNS (1), /* constant shift costs */
983 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
984 COSTS_N_INSNS (4), /* HI */
985 COSTS_N_INSNS (3), /* SI */
986 COSTS_N_INSNS (4), /* DI */
987 COSTS_N_INSNS (5)}, /* other */
988 0, /* cost of multiply per each bit set */
989 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
990 COSTS_N_INSNS (35), /* HI */
991 COSTS_N_INSNS (51), /* SI */
992 COSTS_N_INSNS (83), /* DI */
993 COSTS_N_INSNS (83)}, /* other */
994 COSTS_N_INSNS (1), /* cost of movsx */
995 COSTS_N_INSNS (1), /* cost of movzx */
996 8, /* "large" insn */
998 4, /* cost for loading QImode using movzbl */
999 {3, 4, 3}, /* cost of loading integer registers
1000 in QImode, HImode and SImode.
1001 Relative to reg-reg move (2). */
1002 {3, 4, 3}, /* cost of storing integer registers */
1003 4, /* cost of reg,reg fld/fst */
1004 {4, 4, 12}, /* cost of loading fp registers
1005 in SFmode, DFmode and XFmode */
1006 {6, 6, 8}, /* cost of storing fp registers
1007 in SFmode, DFmode and XFmode */
1008 2, /* cost of moving MMX register */
1009 {3, 3}, /* cost of loading MMX registers
1010 in SImode and DImode */
1011 {4, 4}, /* cost of storing MMX registers
1012 in SImode and DImode */
1013 2, /* cost of moving SSE register */
1014 {4, 4, 3}, /* cost of loading SSE registers
1015 in SImode, DImode and TImode */
1016 {4, 4, 5}, /* cost of storing SSE registers
1017 in SImode, DImode and TImode */
1018 3, /* MMX or SSE register to integer */
1020 MOVD reg64, xmmreg Double FSTORE 4
1021 MOVD reg32, xmmreg Double FSTORE 4
1023 MOVD reg64, xmmreg Double FADD 3
1025 MOVD reg32, xmmreg Double FADD 3
1027 64, /* size of l1 cache. */
1028 512, /* size of l2 cache. */
1029 64, /* size of prefetch block */
1030 /* New AMD processors never drop prefetches; if they cannot be performed
1031 immediately, they are queued. We set number of simultaneous prefetches
1032 to a large constant to reflect this (it probably is not a good idea not
1033 to limit number of prefetches at all, as their execution also takes some
1035 100, /* number of parallel prefetches */
1036 2, /* Branch cost */
1037 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1038 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1039 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1040 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1041 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1042 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1044 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1045 very small blocks it is better to use loop. For large blocks, libcall can
1046 do nontemporary accesses and beat inline considerably. */
1047 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1048 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1049 {{libcall, {{8, loop}, {24, unrolled_loop},
1050 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1051 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1052 4, /* scalar_stmt_cost. */
1053 2, /* scalar load_cost. */
1054 2, /* scalar_store_cost. */
1055 6, /* vec_stmt_cost. */
1056 0, /* vec_to_scalar_cost. */
1057 2, /* scalar_to_vec_cost. */
1058 2, /* vec_align_load_cost. */
1059 2, /* vec_unalign_load_cost. */
1060 2, /* vec_store_cost. */
1061 2, /* cond_taken_branch_cost. */
1062 1, /* cond_not_taken_branch_cost. */
1065 struct processor_costs bdver1_cost = {
1066 COSTS_N_INSNS (1), /* cost of an add instruction */
1067 COSTS_N_INSNS (1), /* cost of a lea instruction */
1068 COSTS_N_INSNS (1), /* variable shift costs */
1069 COSTS_N_INSNS (1), /* constant shift costs */
1070 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1071 COSTS_N_INSNS (4), /* HI */
1072 COSTS_N_INSNS (4), /* SI */
1073 COSTS_N_INSNS (6), /* DI */
1074 COSTS_N_INSNS (6)}, /* other */
1075 0, /* cost of multiply per each bit set */
1076 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1077 COSTS_N_INSNS (35), /* HI */
1078 COSTS_N_INSNS (51), /* SI */
1079 COSTS_N_INSNS (83), /* DI */
1080 COSTS_N_INSNS (83)}, /* other */
1081 COSTS_N_INSNS (1), /* cost of movsx */
1082 COSTS_N_INSNS (1), /* cost of movzx */
1083 8, /* "large" insn */
1085 4, /* cost for loading QImode using movzbl */
1086 {5, 5, 4}, /* cost of loading integer registers
1087 in QImode, HImode and SImode.
1088 Relative to reg-reg move (2). */
1089 {4, 4, 4}, /* cost of storing integer registers */
1090 2, /* cost of reg,reg fld/fst */
1091 {5, 5, 12}, /* cost of loading fp registers
1092 in SFmode, DFmode and XFmode */
1093 {4, 4, 8}, /* cost of storing fp registers
1094 in SFmode, DFmode and XFmode */
1095 2, /* cost of moving MMX register */
1096 {4, 4}, /* cost of loading MMX registers
1097 in SImode and DImode */
1098 {4, 4}, /* cost of storing MMX registers
1099 in SImode and DImode */
1100 2, /* cost of moving SSE register */
1101 {4, 4, 4}, /* cost of loading SSE registers
1102 in SImode, DImode and TImode */
1103 {4, 4, 4}, /* cost of storing SSE registers
1104 in SImode, DImode and TImode */
1105 2, /* MMX or SSE register to integer */
1107 MOVD reg64, xmmreg Double FSTORE 4
1108 MOVD reg32, xmmreg Double FSTORE 4
1110 MOVD reg64, xmmreg Double FADD 3
1112 MOVD reg32, xmmreg Double FADD 3
1114 16, /* size of l1 cache. */
1115 2048, /* size of l2 cache. */
1116 64, /* size of prefetch block */
1117 /* New AMD processors never drop prefetches; if they cannot be performed
1118 immediately, they are queued. We set number of simultaneous prefetches
1119 to a large constant to reflect this (it probably is not a good idea not
1120 to limit number of prefetches at all, as their execution also takes some
1122 100, /* number of parallel prefetches */
1123 2, /* Branch cost */
1124 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1125 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1126 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1127 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1128 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1129 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1131 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1132 very small blocks it is better to use loop. For large blocks, libcall
1133 can do nontemporary accesses and beat inline considerably. */
1134 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
1135 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1136 {{libcall, {{8, loop}, {24, unrolled_loop},
1137 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1138 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1139 6, /* scalar_stmt_cost. */
1140 4, /* scalar load_cost. */
1141 4, /* scalar_store_cost. */
1142 6, /* vec_stmt_cost. */
1143 0, /* vec_to_scalar_cost. */
1144 2, /* scalar_to_vec_cost. */
1145 4, /* vec_align_load_cost. */
1146 4, /* vec_unalign_load_cost. */
1147 4, /* vec_store_cost. */
1148 2, /* cond_taken_branch_cost. */
1149 1, /* cond_not_taken_branch_cost. */
1153 struct processor_costs pentium4_cost = {
1154 COSTS_N_INSNS (1), /* cost of an add instruction */
1155 COSTS_N_INSNS (3), /* cost of a lea instruction */
1156 COSTS_N_INSNS (4), /* variable shift costs */
1157 COSTS_N_INSNS (4), /* constant shift costs */
1158 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1159 COSTS_N_INSNS (15), /* HI */
1160 COSTS_N_INSNS (15), /* SI */
1161 COSTS_N_INSNS (15), /* DI */
1162 COSTS_N_INSNS (15)}, /* other */
1163 0, /* cost of multiply per each bit set */
1164 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1165 COSTS_N_INSNS (56), /* HI */
1166 COSTS_N_INSNS (56), /* SI */
1167 COSTS_N_INSNS (56), /* DI */
1168 COSTS_N_INSNS (56)}, /* other */
1169 COSTS_N_INSNS (1), /* cost of movsx */
1170 COSTS_N_INSNS (1), /* cost of movzx */
1171 16, /* "large" insn */
1173 2, /* cost for loading QImode using movzbl */
1174 {4, 5, 4}, /* cost of loading integer registers
1175 in QImode, HImode and SImode.
1176 Relative to reg-reg move (2). */
1177 {2, 3, 2}, /* cost of storing integer registers */
1178 2, /* cost of reg,reg fld/fst */
1179 {2, 2, 6}, /* cost of loading fp registers
1180 in SFmode, DFmode and XFmode */
1181 {4, 4, 6}, /* cost of storing fp registers
1182 in SFmode, DFmode and XFmode */
1183 2, /* cost of moving MMX register */
1184 {2, 2}, /* cost of loading MMX registers
1185 in SImode and DImode */
1186 {2, 2}, /* cost of storing MMX registers
1187 in SImode and DImode */
1188 12, /* cost of moving SSE register */
1189 {12, 12, 12}, /* cost of loading SSE registers
1190 in SImode, DImode and TImode */
1191 {2, 2, 8}, /* cost of storing SSE registers
1192 in SImode, DImode and TImode */
1193 10, /* MMX or SSE register to integer */
1194 8, /* size of l1 cache. */
1195 256, /* size of l2 cache. */
1196 64, /* size of prefetch block */
1197 6, /* number of parallel prefetches */
1198 2, /* Branch cost */
1199 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1200 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1201 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1202 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1203 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1204 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1205 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1206 DUMMY_STRINGOP_ALGS},
1207 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1209 DUMMY_STRINGOP_ALGS},
1210 1, /* scalar_stmt_cost. */
1211 1, /* scalar load_cost. */
1212 1, /* scalar_store_cost. */
1213 1, /* vec_stmt_cost. */
1214 1, /* vec_to_scalar_cost. */
1215 1, /* scalar_to_vec_cost. */
1216 1, /* vec_align_load_cost. */
1217 2, /* vec_unalign_load_cost. */
1218 1, /* vec_store_cost. */
1219 3, /* cond_taken_branch_cost. */
1220 1, /* cond_not_taken_branch_cost. */
1224 struct processor_costs nocona_cost = {
1225 COSTS_N_INSNS (1), /* cost of an add instruction */
1226 COSTS_N_INSNS (1), /* cost of a lea instruction */
1227 COSTS_N_INSNS (1), /* variable shift costs */
1228 COSTS_N_INSNS (1), /* constant shift costs */
1229 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1230 COSTS_N_INSNS (10), /* HI */
1231 COSTS_N_INSNS (10), /* SI */
1232 COSTS_N_INSNS (10), /* DI */
1233 COSTS_N_INSNS (10)}, /* other */
1234 0, /* cost of multiply per each bit set */
1235 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1236 COSTS_N_INSNS (66), /* HI */
1237 COSTS_N_INSNS (66), /* SI */
1238 COSTS_N_INSNS (66), /* DI */
1239 COSTS_N_INSNS (66)}, /* other */
1240 COSTS_N_INSNS (1), /* cost of movsx */
1241 COSTS_N_INSNS (1), /* cost of movzx */
1242 16, /* "large" insn */
1243 17, /* MOVE_RATIO */
1244 4, /* cost for loading QImode using movzbl */
1245 {4, 4, 4}, /* cost of loading integer registers
1246 in QImode, HImode and SImode.
1247 Relative to reg-reg move (2). */
1248 {4, 4, 4}, /* cost of storing integer registers */
1249 3, /* cost of reg,reg fld/fst */
1250 {12, 12, 12}, /* cost of loading fp registers
1251 in SFmode, DFmode and XFmode */
1252 {4, 4, 4}, /* cost of storing fp registers
1253 in SFmode, DFmode and XFmode */
1254 6, /* cost of moving MMX register */
1255 {12, 12}, /* cost of loading MMX registers
1256 in SImode and DImode */
1257 {12, 12}, /* cost of storing MMX registers
1258 in SImode and DImode */
1259 6, /* cost of moving SSE register */
1260 {12, 12, 12}, /* cost of loading SSE registers
1261 in SImode, DImode and TImode */
1262 {12, 12, 12}, /* cost of storing SSE registers
1263 in SImode, DImode and TImode */
1264 8, /* MMX or SSE register to integer */
1265 8, /* size of l1 cache. */
1266 1024, /* size of l2 cache. */
1267 128, /* size of prefetch block */
1268 8, /* number of parallel prefetches */
1269 1, /* Branch cost */
1270 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1271 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1272 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1273 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1274 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1275 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1276 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1277 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1278 {100000, unrolled_loop}, {-1, libcall}}}},
1279 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1281 {libcall, {{24, loop}, {64, unrolled_loop},
1282 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1283 1, /* scalar_stmt_cost. */
1284 1, /* scalar load_cost. */
1285 1, /* scalar_store_cost. */
1286 1, /* vec_stmt_cost. */
1287 1, /* vec_to_scalar_cost. */
1288 1, /* scalar_to_vec_cost. */
1289 1, /* vec_align_load_cost. */
1290 2, /* vec_unalign_load_cost. */
1291 1, /* vec_store_cost. */
1292 3, /* cond_taken_branch_cost. */
1293 1, /* cond_not_taken_branch_cost. */
1297 struct processor_costs core2_cost = {
1298 COSTS_N_INSNS (1), /* cost of an add instruction */
1299 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1300 COSTS_N_INSNS (1), /* variable shift costs */
1301 COSTS_N_INSNS (1), /* constant shift costs */
1302 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1303 COSTS_N_INSNS (3), /* HI */
1304 COSTS_N_INSNS (3), /* SI */
1305 COSTS_N_INSNS (3), /* DI */
1306 COSTS_N_INSNS (3)}, /* other */
1307 0, /* cost of multiply per each bit set */
1308 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1309 COSTS_N_INSNS (22), /* HI */
1310 COSTS_N_INSNS (22), /* SI */
1311 COSTS_N_INSNS (22), /* DI */
1312 COSTS_N_INSNS (22)}, /* other */
1313 COSTS_N_INSNS (1), /* cost of movsx */
1314 COSTS_N_INSNS (1), /* cost of movzx */
1315 8, /* "large" insn */
1316 16, /* MOVE_RATIO */
1317 2, /* cost for loading QImode using movzbl */
1318 {6, 6, 6}, /* cost of loading integer registers
1319 in QImode, HImode and SImode.
1320 Relative to reg-reg move (2). */
1321 {4, 4, 4}, /* cost of storing integer registers */
1322 2, /* cost of reg,reg fld/fst */
1323 {6, 6, 6}, /* cost of loading fp registers
1324 in SFmode, DFmode and XFmode */
1325 {4, 4, 4}, /* cost of storing fp registers
1326 in SFmode, DFmode and XFmode */
1327 2, /* cost of moving MMX register */
1328 {6, 6}, /* cost of loading MMX registers
1329 in SImode and DImode */
1330 {4, 4}, /* cost of storing MMX registers
1331 in SImode and DImode */
1332 2, /* cost of moving SSE register */
1333 {6, 6, 6}, /* cost of loading SSE registers
1334 in SImode, DImode and TImode */
1335 {4, 4, 4}, /* cost of storing SSE registers
1336 in SImode, DImode and TImode */
1337 2, /* MMX or SSE register to integer */
1338 32, /* size of l1 cache. */
1339 2048, /* size of l2 cache. */
1340 128, /* size of prefetch block */
1341 8, /* number of parallel prefetches */
1342 3, /* Branch cost */
1343 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1344 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1345 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1346 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1347 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1348 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1349 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1350 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1351 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1352 {{libcall, {{8, loop}, {15, unrolled_loop},
1353 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1354 {libcall, {{24, loop}, {32, unrolled_loop},
1355 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1356 1, /* scalar_stmt_cost. */
1357 1, /* scalar load_cost. */
1358 1, /* scalar_store_cost. */
1359 1, /* vec_stmt_cost. */
1360 1, /* vec_to_scalar_cost. */
1361 1, /* scalar_to_vec_cost. */
1362 1, /* vec_align_load_cost. */
1363 2, /* vec_unalign_load_cost. */
1364 1, /* vec_store_cost. */
1365 3, /* cond_taken_branch_cost. */
1366 1, /* cond_not_taken_branch_cost. */
1370 struct processor_costs atom_cost = {
1371 COSTS_N_INSNS (1), /* cost of an add instruction */
1372 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1373 COSTS_N_INSNS (1), /* variable shift costs */
1374 COSTS_N_INSNS (1), /* constant shift costs */
1375 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1376 COSTS_N_INSNS (4), /* HI */
1377 COSTS_N_INSNS (3), /* SI */
1378 COSTS_N_INSNS (4), /* DI */
1379 COSTS_N_INSNS (2)}, /* other */
1380 0, /* cost of multiply per each bit set */
1381 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1382 COSTS_N_INSNS (26), /* HI */
1383 COSTS_N_INSNS (42), /* SI */
1384 COSTS_N_INSNS (74), /* DI */
1385 COSTS_N_INSNS (74)}, /* other */
1386 COSTS_N_INSNS (1), /* cost of movsx */
1387 COSTS_N_INSNS (1), /* cost of movzx */
1388 8, /* "large" insn */
1389 17, /* MOVE_RATIO */
1390 2, /* cost for loading QImode using movzbl */
1391 {4, 4, 4}, /* cost of loading integer registers
1392 in QImode, HImode and SImode.
1393 Relative to reg-reg move (2). */
1394 {4, 4, 4}, /* cost of storing integer registers */
1395 4, /* cost of reg,reg fld/fst */
1396 {12, 12, 12}, /* cost of loading fp registers
1397 in SFmode, DFmode and XFmode */
1398 {6, 6, 8}, /* cost of storing fp registers
1399 in SFmode, DFmode and XFmode */
1400 2, /* cost of moving MMX register */
1401 {8, 8}, /* cost of loading MMX registers
1402 in SImode and DImode */
1403 {8, 8}, /* cost of storing MMX registers
1404 in SImode and DImode */
1405 2, /* cost of moving SSE register */
1406 {8, 8, 8}, /* cost of loading SSE registers
1407 in SImode, DImode and TImode */
1408 {8, 8, 8}, /* cost of storing SSE registers
1409 in SImode, DImode and TImode */
1410 5, /* MMX or SSE register to integer */
1411 32, /* size of l1 cache. */
1412 256, /* size of l2 cache. */
1413 64, /* size of prefetch block */
1414 6, /* number of parallel prefetches */
1415 3, /* Branch cost */
1416 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1417 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1418 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1419 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1420 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1421 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1422 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1423 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1424 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1425 {{libcall, {{8, loop}, {15, unrolled_loop},
1426 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1427 {libcall, {{24, loop}, {32, unrolled_loop},
1428 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1429 1, /* scalar_stmt_cost. */
1430 1, /* scalar load_cost. */
1431 1, /* scalar_store_cost. */
1432 1, /* vec_stmt_cost. */
1433 1, /* vec_to_scalar_cost. */
1434 1, /* scalar_to_vec_cost. */
1435 1, /* vec_align_load_cost. */
1436 2, /* vec_unalign_load_cost. */
1437 1, /* vec_store_cost. */
1438 3, /* cond_taken_branch_cost. */
1439 1, /* cond_not_taken_branch_cost. */
1442 /* Generic64 should produce code tuned for Nocona and K8. */
1444 struct processor_costs generic64_cost = {
1445 COSTS_N_INSNS (1), /* cost of an add instruction */
1446 /* On all chips taken into consideration lea is 2 cycles and more. With
1447 this cost however our current implementation of synth_mult results in
1448 use of unnecessary temporary registers causing regression on several
1449 SPECfp benchmarks. */
1450 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1451 COSTS_N_INSNS (1), /* variable shift costs */
1452 COSTS_N_INSNS (1), /* constant shift costs */
1453 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1454 COSTS_N_INSNS (4), /* HI */
1455 COSTS_N_INSNS (3), /* SI */
1456 COSTS_N_INSNS (4), /* DI */
1457 COSTS_N_INSNS (2)}, /* other */
1458 0, /* cost of multiply per each bit set */
1459 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1460 COSTS_N_INSNS (26), /* HI */
1461 COSTS_N_INSNS (42), /* SI */
1462 COSTS_N_INSNS (74), /* DI */
1463 COSTS_N_INSNS (74)}, /* other */
1464 COSTS_N_INSNS (1), /* cost of movsx */
1465 COSTS_N_INSNS (1), /* cost of movzx */
1466 8, /* "large" insn */
1467 17, /* MOVE_RATIO */
1468 4, /* cost for loading QImode using movzbl */
1469 {4, 4, 4}, /* cost of loading integer registers
1470 in QImode, HImode and SImode.
1471 Relative to reg-reg move (2). */
1472 {4, 4, 4}, /* cost of storing integer registers */
1473 4, /* cost of reg,reg fld/fst */
1474 {12, 12, 12}, /* cost of loading fp registers
1475 in SFmode, DFmode and XFmode */
1476 {6, 6, 8}, /* cost of storing fp registers
1477 in SFmode, DFmode and XFmode */
1478 2, /* cost of moving MMX register */
1479 {8, 8}, /* cost of loading MMX registers
1480 in SImode and DImode */
1481 {8, 8}, /* cost of storing MMX registers
1482 in SImode and DImode */
1483 2, /* cost of moving SSE register */
1484 {8, 8, 8}, /* cost of loading SSE registers
1485 in SImode, DImode and TImode */
1486 {8, 8, 8}, /* cost of storing SSE registers
1487 in SImode, DImode and TImode */
1488 5, /* MMX or SSE register to integer */
1489 32, /* size of l1 cache. */
1490 512, /* size of l2 cache. */
1491 64, /* size of prefetch block */
1492 6, /* number of parallel prefetches */
1493 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1494 value is increased to perhaps more appropriate value of 5. */
1495 3, /* Branch cost */
1496 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1497 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1498 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1501 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1502 {DUMMY_STRINGOP_ALGS,
1503 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1504 {DUMMY_STRINGOP_ALGS,
1505 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1506 1, /* scalar_stmt_cost. */
1507 1, /* scalar load_cost. */
1508 1, /* scalar_store_cost. */
1509 1, /* vec_stmt_cost. */
1510 1, /* vec_to_scalar_cost. */
1511 1, /* scalar_to_vec_cost. */
1512 1, /* vec_align_load_cost. */
1513 2, /* vec_unalign_load_cost. */
1514 1, /* vec_store_cost. */
1515 3, /* cond_taken_branch_cost. */
1516 1, /* cond_not_taken_branch_cost. */
1519 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1522 struct processor_costs generic32_cost = {
1523 COSTS_N_INSNS (1), /* cost of an add instruction */
1524 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1525 COSTS_N_INSNS (1), /* variable shift costs */
1526 COSTS_N_INSNS (1), /* constant shift costs */
1527 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1528 COSTS_N_INSNS (4), /* HI */
1529 COSTS_N_INSNS (3), /* SI */
1530 COSTS_N_INSNS (4), /* DI */
1531 COSTS_N_INSNS (2)}, /* other */
1532 0, /* cost of multiply per each bit set */
1533 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1534 COSTS_N_INSNS (26), /* HI */
1535 COSTS_N_INSNS (42), /* SI */
1536 COSTS_N_INSNS (74), /* DI */
1537 COSTS_N_INSNS (74)}, /* other */
1538 COSTS_N_INSNS (1), /* cost of movsx */
1539 COSTS_N_INSNS (1), /* cost of movzx */
1540 8, /* "large" insn */
1541 17, /* MOVE_RATIO */
1542 4, /* cost for loading QImode using movzbl */
1543 {4, 4, 4}, /* cost of loading integer registers
1544 in QImode, HImode and SImode.
1545 Relative to reg-reg move (2). */
1546 {4, 4, 4}, /* cost of storing integer registers */
1547 4, /* cost of reg,reg fld/fst */
1548 {12, 12, 12}, /* cost of loading fp registers
1549 in SFmode, DFmode and XFmode */
1550 {6, 6, 8}, /* cost of storing fp registers
1551 in SFmode, DFmode and XFmode */
1552 2, /* cost of moving MMX register */
1553 {8, 8}, /* cost of loading MMX registers
1554 in SImode and DImode */
1555 {8, 8}, /* cost of storing MMX registers
1556 in SImode and DImode */
1557 2, /* cost of moving SSE register */
1558 {8, 8, 8}, /* cost of loading SSE registers
1559 in SImode, DImode and TImode */
1560 {8, 8, 8}, /* cost of storing SSE registers
1561 in SImode, DImode and TImode */
1562 5, /* MMX or SSE register to integer */
1563 32, /* size of l1 cache. */
1564 256, /* size of l2 cache. */
1565 64, /* size of prefetch block */
1566 6, /* number of parallel prefetches */
1567 3, /* Branch cost */
1568 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1569 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1570 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1571 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1572 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1573 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1574 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1575 DUMMY_STRINGOP_ALGS},
1576 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1577 DUMMY_STRINGOP_ALGS},
1578 1, /* scalar_stmt_cost. */
1579 1, /* scalar load_cost. */
1580 1, /* scalar_store_cost. */
1581 1, /* vec_stmt_cost. */
1582 1, /* vec_to_scalar_cost. */
1583 1, /* scalar_to_vec_cost. */
1584 1, /* vec_align_load_cost. */
1585 2, /* vec_unalign_load_cost. */
1586 1, /* vec_store_cost. */
1587 3, /* cond_taken_branch_cost. */
1588 1, /* cond_not_taken_branch_cost. */
1591 const struct processor_costs *ix86_cost = &pentium_cost;
1593 /* Processor feature/optimization bitmasks. */
1594 #define m_386 (1<<PROCESSOR_I386)
1595 #define m_486 (1<<PROCESSOR_I486)
1596 #define m_PENT (1<<PROCESSOR_PENTIUM)
1597 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1598 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1599 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1600 #define m_CORE2 (1<<PROCESSOR_CORE2)
1601 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1602 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1603 #define m_ATOM (1<<PROCESSOR_ATOM)
1605 #define m_GEODE (1<<PROCESSOR_GEODE)
1606 #define m_K6 (1<<PROCESSOR_K6)
1607 #define m_K6_GEODE (m_K6 | m_GEODE)
1608 #define m_K8 (1<<PROCESSOR_K8)
1609 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1610 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1611 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1612 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1613 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1615 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32 | m_COREI7_32)
1616 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64 | m_COREI7_64)
1618 /* Generic instruction choice should be common subset of supported CPUs
1619 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1620 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1622 /* Feature tests against the various tunings. */
1623 unsigned char ix86_tune_features[X86_TUNE_LAST];
1625 /* Feature tests against the various tunings used to create ix86_tune_features
1626 based on the processor mask. */
1627 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1628 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1629 negatively, so enabling for Generic64 seems like good code size
1630 tradeoff. We can't enable it for 32bit generic because it does not
1631 work well with PPro base chips. */
1632 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1634 /* X86_TUNE_PUSH_MEMORY */
1635 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1636 | m_NOCONA | m_CORE2 | m_GENERIC,
1638 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1641 /* X86_TUNE_UNROLL_STRLEN */
1642 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1643 | m_CORE2 | m_GENERIC,
1645 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1646 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1648 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1649 on simulation result. But after P4 was made, no performance benefit
1650 was observed with branch hints. It also increases the code size.
1651 As a result, icc never generates branch hints. */
1654 /* X86_TUNE_DOUBLE_WITH_ADD */
1657 /* X86_TUNE_USE_SAHF */
1658 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1659 | m_NOCONA | m_CORE2 | m_GENERIC,
1661 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1662 partial dependencies. */
1663 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1664 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1666 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1667 register stalls on Generic32 compilation setting as well. However
1668 in current implementation the partial register stalls are not eliminated
1669 very well - they can be introduced via subregs synthesized by combine
1670 and can happen in caller/callee saving sequences. Because this option
1671 pays back little on PPro based chips and is in conflict with partial reg
1672 dependencies used by Athlon/P4 based chips, it is better to leave it off
1673 for generic32 for now. */
1676 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1677 m_CORE2 | m_GENERIC,
1679 /* X86_TUNE_USE_HIMODE_FIOP */
1680 m_386 | m_486 | m_K6_GEODE,
1682 /* X86_TUNE_USE_SIMODE_FIOP */
1683 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1685 /* X86_TUNE_USE_MOV0 */
1688 /* X86_TUNE_USE_CLTD */
1689 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1691 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1694 /* X86_TUNE_SPLIT_LONG_MOVES */
1697 /* X86_TUNE_READ_MODIFY_WRITE */
1700 /* X86_TUNE_READ_MODIFY */
1703 /* X86_TUNE_PROMOTE_QIMODE */
1704 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1705 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1707 /* X86_TUNE_FAST_PREFIX */
1708 ~(m_PENT | m_486 | m_386),
1710 /* X86_TUNE_SINGLE_STRINGOP */
1711 m_386 | m_PENT4 | m_NOCONA,
1713 /* X86_TUNE_QIMODE_MATH */
1716 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1717 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1718 might be considered for Generic32 if our scheme for avoiding partial
1719 stalls was more effective. */
1722 /* X86_TUNE_PROMOTE_QI_REGS */
1725 /* X86_TUNE_PROMOTE_HI_REGS */
1728 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1729 over esp addition. */
1730 m_386 | m_486 | m_PENT | m_PPRO,
1732 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1733 over esp addition. */
1736 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1737 over esp subtraction. */
1738 m_386 | m_486 | m_PENT | m_K6_GEODE,
1740 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1741 over esp subtraction. */
1742 m_PENT | m_K6_GEODE,
1744 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1745 for DFmode copies */
1746 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1747 | m_GENERIC | m_GEODE),
1749 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1750 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1752 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1753 conflict here in between PPro/Pentium4 based chips that thread 128bit
1754 SSE registers as single units versus K8 based chips that divide SSE
1755 registers to two 64bit halves. This knob promotes all store destinations
1756 to be 128bit to allow register renaming on 128bit SSE units, but usually
1757 results in one extra microop on 64bit SSE units. Experimental results
1758 shows that disabling this option on P4 brings over 20% SPECfp regression,
1759 while enabling it on K8 brings roughly 2.4% regression that can be partly
1760 masked by careful scheduling of moves. */
1761 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1762 | m_AMDFAM10 | m_BDVER1,
1764 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1765 m_AMDFAM10 | m_BDVER1,
1767 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1770 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1773 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1774 are resolved on SSE register parts instead of whole registers, so we may
1775 maintain just lower part of scalar values in proper format leaving the
1776 upper part undefined. */
1779 /* X86_TUNE_SSE_TYPELESS_STORES */
1782 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1783 m_PPRO | m_PENT4 | m_NOCONA,
1785 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1786 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1788 /* X86_TUNE_PROLOGUE_USING_MOVE */
1789 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1791 /* X86_TUNE_EPILOGUE_USING_MOVE */
1792 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1794 /* X86_TUNE_SHIFT1 */
1797 /* X86_TUNE_USE_FFREEP */
1800 /* X86_TUNE_INTER_UNIT_MOVES */
1801 ~(m_AMD_MULTIPLE | m_GENERIC),
1803 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1804 ~(m_AMDFAM10 | m_BDVER1),
1806 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1807 than 4 branch instructions in the 16 byte window. */
1808 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1811 /* X86_TUNE_SCHEDULE */
1812 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1815 /* X86_TUNE_USE_BT */
1816 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1818 /* X86_TUNE_USE_INCDEC */
1819 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1821 /* X86_TUNE_PAD_RETURNS */
1822 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1824 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1827 /* X86_TUNE_EXT_80387_CONSTANTS */
1828 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1829 | m_CORE2 | m_GENERIC,
1831 /* X86_TUNE_SHORTEN_X87_SSE */
1834 /* X86_TUNE_AVOID_VECTOR_DECODE */
1837 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1838 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1841 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1842 vector path on AMD machines. */
1843 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1845 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1847 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1849 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1853 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1854 but one byte longer. */
1857 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1858 operand that cannot be represented using a modRM byte. The XOR
1859 replacement is long decoded, so this split helps here as well. */
1862 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1864 m_AMDFAM10 | m_GENERIC,
1866 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1867 from integer to FP. */
1870 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1871 with a subsequent conditional jump instruction into a single
1872 compare-and-branch uop. */
1875 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1876 will impact LEA instruction selection. */
1879 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1884 /* Feature tests against the various architecture variations. */
1885 unsigned char ix86_arch_features[X86_ARCH_LAST];
1887 /* Feature tests against the various architecture variations, used to create
1888 ix86_arch_features based on the processor mask. */
1889 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1890 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1891 ~(m_386 | m_486 | m_PENT | m_K6),
1893 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1896 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1899 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1902 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1906 static const unsigned int x86_accumulate_outgoing_args
1907 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1910 static const unsigned int x86_arch_always_fancy_math_387
1911 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1912 | m_NOCONA | m_CORE2 | m_GENERIC;
1914 static enum stringop_alg stringop_alg = no_stringop;
1916 /* In case the average insn count for single function invocation is
1917 lower than this constant, emit fast (but longer) prologue and
1919 #define FAST_PROLOGUE_INSN_COUNT 20
1921 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1922 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1923 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1924 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1926 /* Array of the smallest class containing reg number REGNO, indexed by
1927 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1929 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1931 /* ax, dx, cx, bx */
1932 AREG, DREG, CREG, BREG,
1933 /* si, di, bp, sp */
1934 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1936 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1937 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1940 /* flags, fpsr, fpcr, frame */
1941 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1943 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1946 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1949 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1950 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1951 /* SSE REX registers */
1952 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1956 /* The "default" register map used in 32bit mode. */
1958 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1960 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1961 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1962 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1963 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1964 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1965 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1966 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1969 /* The "default" register map used in 64bit mode. */
1971 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1973 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1974 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1975 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1976 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1977 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1978 8,9,10,11,12,13,14,15, /* extended integer registers */
1979 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1982 /* Define the register numbers to be used in Dwarf debugging information.
1983 The SVR4 reference port C compiler uses the following register numbers
1984 in its Dwarf output code:
1985 0 for %eax (gcc regno = 0)
1986 1 for %ecx (gcc regno = 2)
1987 2 for %edx (gcc regno = 1)
1988 3 for %ebx (gcc regno = 3)
1989 4 for %esp (gcc regno = 7)
1990 5 for %ebp (gcc regno = 6)
1991 6 for %esi (gcc regno = 4)
1992 7 for %edi (gcc regno = 5)
1993 The following three DWARF register numbers are never generated by
1994 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1995 believes these numbers have these meanings.
1996 8 for %eip (no gcc equivalent)
1997 9 for %eflags (gcc regno = 17)
1998 10 for %trapno (no gcc equivalent)
1999 It is not at all clear how we should number the FP stack registers
2000 for the x86 architecture. If the version of SDB on x86/svr4 were
2001 a bit less brain dead with respect to floating-point then we would
2002 have a precedent to follow with respect to DWARF register numbers
2003 for x86 FP registers, but the SDB on x86/svr4 is so completely
2004 broken with respect to FP registers that it is hardly worth thinking
2005 of it as something to strive for compatibility with.
2006 The version of x86/svr4 SDB I have at the moment does (partially)
2007 seem to believe that DWARF register number 11 is associated with
2008 the x86 register %st(0), but that's about all. Higher DWARF
2009 register numbers don't seem to be associated with anything in
2010 particular, and even for DWARF regno 11, SDB only seems to under-
2011 stand that it should say that a variable lives in %st(0) (when
2012 asked via an `=' command) if we said it was in DWARF regno 11,
2013 but SDB still prints garbage when asked for the value of the
2014 variable in question (via a `/' command).
2015 (Also note that the labels SDB prints for various FP stack regs
2016 when doing an `x' command are all wrong.)
2017 Note that these problems generally don't affect the native SVR4
2018 C compiler because it doesn't allow the use of -O with -g and
2019 because when it is *not* optimizing, it allocates a memory
2020 location for each floating-point variable, and the memory
2021 location is what gets described in the DWARF AT_location
2022 attribute for the variable in question.
2023 Regardless of the severe mental illness of the x86/svr4 SDB, we
2024 do something sensible here and we use the following DWARF
2025 register numbers. Note that these are all stack-top-relative
2027 11 for %st(0) (gcc regno = 8)
2028 12 for %st(1) (gcc regno = 9)
2029 13 for %st(2) (gcc regno = 10)
2030 14 for %st(3) (gcc regno = 11)
2031 15 for %st(4) (gcc regno = 12)
2032 16 for %st(5) (gcc regno = 13)
2033 17 for %st(6) (gcc regno = 14)
2034 18 for %st(7) (gcc regno = 15)
2036 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2038 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2039 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2040 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2041 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2042 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2043 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2044 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2047 /* Define parameter passing and return registers. */
2049 static int const x86_64_int_parameter_registers[6] =
2051 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2054 static int const x86_64_ms_abi_int_parameter_registers[4] =
2056 CX_REG, DX_REG, R8_REG, R9_REG
2059 static int const x86_64_int_return_registers[4] =
2061 AX_REG, DX_REG, DI_REG, SI_REG
2064 /* Define the structure for the machine field in struct function. */
2066 struct GTY(()) stack_local_entry {
2067 unsigned short mode;
2070 struct stack_local_entry *next;
2073 /* Structure describing stack frame layout.
2074 Stack grows downward:
2080 saved static chain if ix86_static_chain_on_stack
2082 saved frame pointer if frame_pointer_needed
2083 <- HARD_FRAME_POINTER
2089 <- sse_regs_save_offset
2092 [va_arg registers] |
2096 [padding2] | = to_allocate
2105 int outgoing_arguments_size;
2106 HOST_WIDE_INT frame;
2108 /* The offsets relative to ARG_POINTER. */
2109 HOST_WIDE_INT frame_pointer_offset;
2110 HOST_WIDE_INT hard_frame_pointer_offset;
2111 HOST_WIDE_INT stack_pointer_offset;
2112 HOST_WIDE_INT hfp_save_offset;
2113 HOST_WIDE_INT reg_save_offset;
2114 HOST_WIDE_INT sse_reg_save_offset;
2116 /* When save_regs_using_mov is set, emit prologue using
2117 move instead of push instructions. */
2118 bool save_regs_using_mov;
2121 /* Code model option. */
2122 enum cmodel ix86_cmodel;
2124 enum asm_dialect ix86_asm_dialect = ASM_ATT;
2126 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
2128 /* Which unit we are generating floating point math for. */
2129 enum fpmath_unit ix86_fpmath;
2131 /* Which cpu are we scheduling for. */
2132 enum attr_cpu ix86_schedule;
2134 /* Which cpu are we optimizing for. */
2135 enum processor_type ix86_tune;
2137 /* Which instruction set architecture to use. */
2138 enum processor_type ix86_arch;
2140 /* true if sse prefetch instruction is not NOOP. */
2141 int x86_prefetch_sse;
2143 /* ix86_regparm_string as a number */
2144 static int ix86_regparm;
2146 /* -mstackrealign option */
2147 static const char ix86_force_align_arg_pointer_string[]
2148 = "force_align_arg_pointer";
2150 static rtx (*ix86_gen_leave) (void);
2151 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2152 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2153 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2154 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2155 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2156 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2157 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2158 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2159 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2161 /* Preferred alignment for stack boundary in bits. */
2162 unsigned int ix86_preferred_stack_boundary;
2164 /* Alignment for incoming stack boundary in bits specified at
2166 static unsigned int ix86_user_incoming_stack_boundary;
2168 /* Default alignment for incoming stack boundary in bits. */
2169 static unsigned int ix86_default_incoming_stack_boundary;
2171 /* Alignment for incoming stack boundary in bits. */
2172 unsigned int ix86_incoming_stack_boundary;
2174 /* The abi used by target. */
2175 enum calling_abi ix86_abi;
2177 /* Values 1-5: see jump.c */
2178 int ix86_branch_cost;
2180 /* Calling abi specific va_list type nodes. */
2181 static GTY(()) tree sysv_va_list_type_node;
2182 static GTY(()) tree ms_va_list_type_node;
2184 /* Variables which are this size or smaller are put in the data/bss
2185 or ldata/lbss sections. */
2187 int ix86_section_threshold = 65536;
2189 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2190 char internal_label_prefix[16];
2191 int internal_label_prefix_len;
2193 /* Fence to use after loop using movnt. */
2196 /* Register class used for passing given 64bit part of the argument.
2197 These represent classes as documented by the PS ABI, with the exception
2198 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2199 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2201 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2202 whenever possible (upper half does contain padding). */
2203 enum x86_64_reg_class
2206 X86_64_INTEGER_CLASS,
2207 X86_64_INTEGERSI_CLASS,
2214 X86_64_COMPLEX_X87_CLASS,
2218 #define MAX_CLASSES 4
2220 /* Table of constants used by fldpi, fldln2, etc.... */
2221 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2222 static bool ext_80387_constants_init = 0;
2225 static struct machine_function * ix86_init_machine_status (void);
2226 static rtx ix86_function_value (const_tree, const_tree, bool);
2227 static bool ix86_function_value_regno_p (const unsigned int);
2228 static rtx ix86_static_chain (const_tree, bool);
2229 static int ix86_function_regparm (const_tree, const_tree);
2230 static void ix86_compute_frame_layout (struct ix86_frame *);
2231 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2233 static void ix86_add_new_builtins (int);
2234 static rtx ix86_expand_vec_perm_builtin (tree);
2235 static tree ix86_canonical_va_list_type (tree);
2236 static void predict_jump (int);
2237 static unsigned int split_stack_prologue_scratch_regno (void);
2238 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2240 enum ix86_function_specific_strings
2242 IX86_FUNCTION_SPECIFIC_ARCH,
2243 IX86_FUNCTION_SPECIFIC_TUNE,
2244 IX86_FUNCTION_SPECIFIC_FPMATH,
2245 IX86_FUNCTION_SPECIFIC_MAX
2248 static char *ix86_target_string (int, int, const char *, const char *,
2249 const char *, bool);
2250 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2251 static void ix86_function_specific_save (struct cl_target_option *);
2252 static void ix86_function_specific_restore (struct cl_target_option *);
2253 static void ix86_function_specific_print (FILE *, int,
2254 struct cl_target_option *);
2255 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2256 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2257 static bool ix86_can_inline_p (tree, tree);
2258 static void ix86_set_current_function (tree);
2259 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2261 static enum calling_abi ix86_function_abi (const_tree);
2264 #ifndef SUBTARGET32_DEFAULT_CPU
2265 #define SUBTARGET32_DEFAULT_CPU "i386"
2268 /* The svr4 ABI for the i386 says that records and unions are returned
2270 #ifndef DEFAULT_PCC_STRUCT_RETURN
2271 #define DEFAULT_PCC_STRUCT_RETURN 1
2274 /* Whether -mtune= or -march= were specified */
2275 static int ix86_tune_defaulted;
2276 static int ix86_arch_specified;
2278 /* A mask of ix86_isa_flags that includes bit X if X
2279 was set or cleared on the command line. */
2280 static int ix86_isa_flags_explicit;
2282 /* Define a set of ISAs which are available when a given ISA is
2283 enabled. MMX and SSE ISAs are handled separately. */
2285 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2286 #define OPTION_MASK_ISA_3DNOW_SET \
2287 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2289 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2290 #define OPTION_MASK_ISA_SSE2_SET \
2291 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2292 #define OPTION_MASK_ISA_SSE3_SET \
2293 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2294 #define OPTION_MASK_ISA_SSSE3_SET \
2295 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2296 #define OPTION_MASK_ISA_SSE4_1_SET \
2297 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2298 #define OPTION_MASK_ISA_SSE4_2_SET \
2299 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2300 #define OPTION_MASK_ISA_AVX_SET \
2301 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2302 #define OPTION_MASK_ISA_FMA_SET \
2303 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2305 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2307 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2309 #define OPTION_MASK_ISA_SSE4A_SET \
2310 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2311 #define OPTION_MASK_ISA_FMA4_SET \
2312 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2313 | OPTION_MASK_ISA_AVX_SET)
2314 #define OPTION_MASK_ISA_XOP_SET \
2315 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2316 #define OPTION_MASK_ISA_LWP_SET \
2319 /* AES and PCLMUL need SSE2 because they use xmm registers */
2320 #define OPTION_MASK_ISA_AES_SET \
2321 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2322 #define OPTION_MASK_ISA_PCLMUL_SET \
2323 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2325 #define OPTION_MASK_ISA_ABM_SET \
2326 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2328 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2329 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2330 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2331 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2332 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2334 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2335 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2336 #define OPTION_MASK_ISA_F16C_SET \
2337 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2339 /* Define a set of ISAs which aren't available when a given ISA is
2340 disabled. MMX and SSE ISAs are handled separately. */
2342 #define OPTION_MASK_ISA_MMX_UNSET \
2343 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2344 #define OPTION_MASK_ISA_3DNOW_UNSET \
2345 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2346 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2348 #define OPTION_MASK_ISA_SSE_UNSET \
2349 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2350 #define OPTION_MASK_ISA_SSE2_UNSET \
2351 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2352 #define OPTION_MASK_ISA_SSE3_UNSET \
2353 (OPTION_MASK_ISA_SSE3 \
2354 | OPTION_MASK_ISA_SSSE3_UNSET \
2355 | OPTION_MASK_ISA_SSE4A_UNSET )
2356 #define OPTION_MASK_ISA_SSSE3_UNSET \
2357 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2358 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2359 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2360 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2361 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2362 #define OPTION_MASK_ISA_AVX_UNSET \
2363 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2364 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2365 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2367 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2369 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2371 #define OPTION_MASK_ISA_SSE4A_UNSET \
2372 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2374 #define OPTION_MASK_ISA_FMA4_UNSET \
2375 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2376 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2377 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2379 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2380 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2381 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2382 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2383 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2384 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2385 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2386 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2388 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2389 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2390 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2392 /* Vectorization library interface and handlers. */
2393 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2395 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2396 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2398 /* Processor target table, indexed by processor number */
2401 const struct processor_costs *cost; /* Processor costs */
2402 const int align_loop; /* Default alignments. */
2403 const int align_loop_max_skip;
2404 const int align_jump;
2405 const int align_jump_max_skip;
2406 const int align_func;
2409 static const struct ptt processor_target_table[PROCESSOR_max] =
2411 {&i386_cost, 4, 3, 4, 3, 4},
2412 {&i486_cost, 16, 15, 16, 15, 16},
2413 {&pentium_cost, 16, 7, 16, 7, 16},
2414 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2415 {&geode_cost, 0, 0, 0, 0, 0},
2416 {&k6_cost, 32, 7, 32, 7, 32},
2417 {&athlon_cost, 16, 7, 16, 7, 16},
2418 {&pentium4_cost, 0, 0, 0, 0, 0},
2419 {&k8_cost, 16, 7, 16, 7, 16},
2420 {&nocona_cost, 0, 0, 0, 0, 0},
2421 {&core2_cost, 16, 10, 16, 10, 16},
2422 /* Core i7 32-bit. */
2423 {&generic32_cost, 16, 10, 16, 10, 16},
2424 /* Core i7 64-bit. */
2425 {&generic64_cost, 16, 10, 16, 10, 16},
2426 {&generic32_cost, 16, 7, 16, 7, 16},
2427 {&generic64_cost, 16, 10, 16, 10, 16},
2428 {&amdfam10_cost, 32, 24, 32, 7, 32},
2429 {&bdver1_cost, 32, 24, 32, 7, 32},
2430 {&atom_cost, 16, 7, 16, 7, 16}
2433 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2461 /* Return true if a red-zone is in use. */
2464 ix86_using_red_zone (void)
2466 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2469 /* Implement TARGET_HANDLE_OPTION. */
2472 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2479 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2480 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2484 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2485 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2492 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2493 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2497 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2498 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2508 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2509 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2513 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2514 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2521 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2522 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2526 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2527 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2534 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2535 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2539 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2540 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2547 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2548 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2552 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2553 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2560 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2561 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2565 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2566 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2573 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2574 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2578 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2579 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2586 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2587 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2591 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2592 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2599 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2600 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2604 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2605 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2610 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2611 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2615 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2616 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2622 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2623 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2627 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2628 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2635 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2636 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2640 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2641 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2648 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2649 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2653 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2654 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2661 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2662 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2666 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2667 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2674 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2675 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2679 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2680 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2687 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2688 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2692 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2693 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2700 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2701 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2705 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2706 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2713 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2714 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2718 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2719 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2726 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2727 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2731 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2732 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2739 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2740 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2744 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2745 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2752 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2753 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2757 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2758 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2765 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2766 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2770 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2771 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2778 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2779 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2783 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2784 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2791 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2792 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2796 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2797 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2804 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2805 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2809 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2810 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2819 /* Return a string that documents the current -m options. The caller is
2820 responsible for freeing the string. */
2823 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2824 const char *fpmath, bool add_nl_p)
2826 struct ix86_target_opts
2828 const char *option; /* option string */
2829 int mask; /* isa mask options */
2832 /* This table is ordered so that options like -msse4.2 that imply
2833 preceding options while match those first. */
2834 static struct ix86_target_opts isa_opts[] =
2836 { "-m64", OPTION_MASK_ISA_64BIT },
2837 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2838 { "-mfma", OPTION_MASK_ISA_FMA },
2839 { "-mxop", OPTION_MASK_ISA_XOP },
2840 { "-mlwp", OPTION_MASK_ISA_LWP },
2841 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2842 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2843 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2844 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2845 { "-msse3", OPTION_MASK_ISA_SSE3 },
2846 { "-msse2", OPTION_MASK_ISA_SSE2 },
2847 { "-msse", OPTION_MASK_ISA_SSE },
2848 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2849 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2850 { "-mmmx", OPTION_MASK_ISA_MMX },
2851 { "-mabm", OPTION_MASK_ISA_ABM },
2852 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2853 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2854 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2855 { "-maes", OPTION_MASK_ISA_AES },
2856 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2857 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2858 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2859 { "-mf16c", OPTION_MASK_ISA_F16C },
2863 static struct ix86_target_opts flag_opts[] =
2865 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2866 { "-m80387", MASK_80387 },
2867 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2868 { "-malign-double", MASK_ALIGN_DOUBLE },
2869 { "-mcld", MASK_CLD },
2870 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2871 { "-mieee-fp", MASK_IEEE_FP },
2872 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2873 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2874 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2875 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2876 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2877 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2878 { "-mno-red-zone", MASK_NO_RED_ZONE },
2879 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2880 { "-mrecip", MASK_RECIP },
2881 { "-mrtd", MASK_RTD },
2882 { "-msseregparm", MASK_SSEREGPARM },
2883 { "-mstack-arg-probe", MASK_STACK_PROBE },
2884 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2885 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2886 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2887 { "-mvzeroupper", MASK_VZEROUPPER },
2890 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2893 char target_other[40];
2902 memset (opts, '\0', sizeof (opts));
2904 /* Add -march= option. */
2907 opts[num][0] = "-march=";
2908 opts[num++][1] = arch;
2911 /* Add -mtune= option. */
2914 opts[num][0] = "-mtune=";
2915 opts[num++][1] = tune;
2918 /* Pick out the options in isa options. */
2919 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2921 if ((isa & isa_opts[i].mask) != 0)
2923 opts[num++][0] = isa_opts[i].option;
2924 isa &= ~ isa_opts[i].mask;
2928 if (isa && add_nl_p)
2930 opts[num++][0] = isa_other;
2931 sprintf (isa_other, "(other isa: %#x)", isa);
2934 /* Add flag options. */
2935 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2937 if ((flags & flag_opts[i].mask) != 0)
2939 opts[num++][0] = flag_opts[i].option;
2940 flags &= ~ flag_opts[i].mask;
2944 if (flags && add_nl_p)
2946 opts[num++][0] = target_other;
2947 sprintf (target_other, "(other flags: %#x)", flags);
2950 /* Add -fpmath= option. */
2953 opts[num][0] = "-mfpmath=";
2954 opts[num++][1] = fpmath;
2961 gcc_assert (num < ARRAY_SIZE (opts));
2963 /* Size the string. */
2965 sep_len = (add_nl_p) ? 3 : 1;
2966 for (i = 0; i < num; i++)
2969 for (j = 0; j < 2; j++)
2971 len += strlen (opts[i][j]);
2974 /* Build the string. */
2975 ret = ptr = (char *) xmalloc (len);
2978 for (i = 0; i < num; i++)
2982 for (j = 0; j < 2; j++)
2983 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2990 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2998 for (j = 0; j < 2; j++)
3001 memcpy (ptr, opts[i][j], len2[j]);
3003 line_len += len2[j];
3008 gcc_assert (ret + len >= ptr);
3013 /* Return TRUE if software prefetching is beneficial for the
3017 software_prefetching_beneficial_p (void)
3021 case PROCESSOR_GEODE:
3023 case PROCESSOR_ATHLON:
3025 case PROCESSOR_AMDFAM10:
3033 /* Return true, if profiling code should be emitted before
3034 prologue. Otherwise it returns false.
3035 Note: For x86 with "hotfix" it is sorried. */
3037 ix86_profile_before_prologue (void)
3039 return flag_fentry != 0;
3042 /* Function that is callable from the debugger to print the current
3045 ix86_debug_options (void)
3047 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3048 ix86_arch_string, ix86_tune_string,
3049 ix86_fpmath_string, true);
3053 fprintf (stderr, "%s\n\n", opts);
3057 fputs ("<no options>\n\n", stderr);
3062 /* Override various settings based on options. If MAIN_ARGS_P, the
3063 options are from the command line, otherwise they are from
3067 ix86_option_override_internal (bool main_args_p)
3070 unsigned int ix86_arch_mask, ix86_tune_mask;
3071 const bool ix86_tune_specified = (ix86_tune_string != NULL);
3076 /* Comes from final.c -- no real reason to change it. */
3077 #define MAX_CODE_ALIGN 16
3085 PTA_PREFETCH_SSE = 1 << 4,
3087 PTA_3DNOW_A = 1 << 6,
3091 PTA_POPCNT = 1 << 10,
3093 PTA_SSE4A = 1 << 12,
3094 PTA_NO_SAHF = 1 << 13,
3095 PTA_SSE4_1 = 1 << 14,
3096 PTA_SSE4_2 = 1 << 15,
3098 PTA_PCLMUL = 1 << 17,
3101 PTA_MOVBE = 1 << 20,
3105 PTA_FSGSBASE = 1 << 24,
3106 PTA_RDRND = 1 << 25,
3112 const char *const name; /* processor name or nickname. */
3113 const enum processor_type processor;
3114 const enum attr_cpu schedule;
3115 const unsigned /*enum pta_flags*/ flags;
3117 const processor_alias_table[] =
3119 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3120 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3121 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3122 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3123 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3124 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3125 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3126 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
3127 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
3128 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3129 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3130 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
3131 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3133 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3135 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3136 PTA_MMX | PTA_SSE | PTA_SSE2},
3137 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3138 PTA_MMX |PTA_SSE | PTA_SSE2},
3139 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3140 PTA_MMX | PTA_SSE | PTA_SSE2},
3141 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3142 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
3143 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3144 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3145 | PTA_CX16 | PTA_NO_SAHF},
3146 {"core2", PROCESSOR_CORE2, CPU_CORE2,
3147 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3148 | PTA_SSSE3 | PTA_CX16},
3149 {"corei7", PROCESSOR_COREI7_64, CPU_GENERIC64,
3150 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3151 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
3152 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3153 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3154 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
3155 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3156 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
3157 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3158 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3159 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
3160 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3161 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3162 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3163 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
3164 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3165 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3166 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3167 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3168 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3169 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
3170 {"x86-64", PROCESSOR_K8, CPU_K8,
3171 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
3172 {"k8", PROCESSOR_K8, CPU_K8,
3173 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3174 | PTA_SSE2 | PTA_NO_SAHF},
3175 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3176 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3177 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3178 {"opteron", PROCESSOR_K8, CPU_K8,
3179 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3180 | PTA_SSE2 | PTA_NO_SAHF},
3181 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3182 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3183 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3184 {"athlon64", PROCESSOR_K8, CPU_K8,
3185 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3186 | PTA_SSE2 | PTA_NO_SAHF},
3187 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3188 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3189 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
3190 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3191 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3192 | PTA_SSE2 | PTA_NO_SAHF},
3193 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3194 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3195 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3196 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3197 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3198 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
3199 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3200 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3201 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
3202 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
3203 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
3204 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3205 0 /* flags are only used for -march switch. */ },
3206 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3207 PTA_64BIT /* flags are only used for -march switch. */ },
3210 int const pta_size = ARRAY_SIZE (processor_alias_table);
3212 /* Set up prefix/suffix so the error messages refer to either the command
3213 line argument, or the attribute(target). */
3222 prefix = "option(\"";
3227 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3228 SUBTARGET_OVERRIDE_OPTIONS;
3231 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3232 SUBSUBTARGET_OVERRIDE_OPTIONS;
3235 /* -fPIC is the default for x86_64. */
3236 if (TARGET_MACHO && TARGET_64BIT)
3239 /* Need to check -mtune=generic first. */
3240 if (ix86_tune_string)
3242 if (!strcmp (ix86_tune_string, "generic")
3243 || !strcmp (ix86_tune_string, "i686")
3244 /* As special support for cross compilers we read -mtune=native
3245 as -mtune=generic. With native compilers we won't see the
3246 -mtune=native, as it was changed by the driver. */
3247 || !strcmp (ix86_tune_string, "native"))
3250 ix86_tune_string = "generic64";
3252 ix86_tune_string = "generic32";
3254 /* If this call is for setting the option attribute, allow the
3255 generic32/generic64 that was previously set. */
3256 else if (!main_args_p
3257 && (!strcmp (ix86_tune_string, "generic32")
3258 || !strcmp (ix86_tune_string, "generic64")))
3260 else if (!strncmp (ix86_tune_string, "generic", 7))
3261 error ("bad value (%s) for %stune=%s %s",
3262 ix86_tune_string, prefix, suffix, sw);
3263 else if (!strcmp (ix86_tune_string, "x86-64"))
3264 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3265 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3266 prefix, suffix, prefix, suffix, prefix, suffix);
3270 if (ix86_arch_string)
3271 ix86_tune_string = ix86_arch_string;
3272 if (!ix86_tune_string)
3274 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3275 ix86_tune_defaulted = 1;
3278 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3279 need to use a sensible tune option. */
3280 if (!strcmp (ix86_tune_string, "generic")
3281 || !strcmp (ix86_tune_string, "x86-64")
3282 || !strcmp (ix86_tune_string, "i686"))
3285 ix86_tune_string = "generic64";
3287 ix86_tune_string = "generic32";
3291 if (ix86_stringop_string)
3293 if (!strcmp (ix86_stringop_string, "rep_byte"))
3294 stringop_alg = rep_prefix_1_byte;
3295 else if (!strcmp (ix86_stringop_string, "libcall"))
3296 stringop_alg = libcall;
3297 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3298 stringop_alg = rep_prefix_4_byte;
3299 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3301 /* rep; movq isn't available in 32-bit code. */
3302 stringop_alg = rep_prefix_8_byte;
3303 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3304 stringop_alg = loop_1_byte;
3305 else if (!strcmp (ix86_stringop_string, "loop"))
3306 stringop_alg = loop;
3307 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3308 stringop_alg = unrolled_loop;
3310 error ("bad value (%s) for %sstringop-strategy=%s %s",
3311 ix86_stringop_string, prefix, suffix, sw);
3314 if (!ix86_arch_string)
3315 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3317 ix86_arch_specified = 1;
3319 /* Validate -mabi= value. */
3320 if (ix86_abi_string)
3322 if (strcmp (ix86_abi_string, "sysv") == 0)
3323 ix86_abi = SYSV_ABI;
3324 else if (strcmp (ix86_abi_string, "ms") == 0)
3327 error ("unknown ABI (%s) for %sabi=%s %s",
3328 ix86_abi_string, prefix, suffix, sw);
3331 ix86_abi = DEFAULT_ABI;
3333 if (ix86_cmodel_string != 0)
3335 if (!strcmp (ix86_cmodel_string, "small"))
3336 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3337 else if (!strcmp (ix86_cmodel_string, "medium"))
3338 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3339 else if (!strcmp (ix86_cmodel_string, "large"))
3340 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3342 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3343 else if (!strcmp (ix86_cmodel_string, "32"))
3344 ix86_cmodel = CM_32;
3345 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3346 ix86_cmodel = CM_KERNEL;
3348 error ("bad value (%s) for %scmodel=%s %s",
3349 ix86_cmodel_string, prefix, suffix, sw);
3353 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3354 use of rip-relative addressing. This eliminates fixups that
3355 would otherwise be needed if this object is to be placed in a
3356 DLL, and is essentially just as efficient as direct addressing. */
3357 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3358 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3359 else if (TARGET_64BIT)
3360 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3362 ix86_cmodel = CM_32;
3364 if (ix86_asm_string != 0)
3367 && !strcmp (ix86_asm_string, "intel"))
3368 ix86_asm_dialect = ASM_INTEL;
3369 else if (!strcmp (ix86_asm_string, "att"))
3370 ix86_asm_dialect = ASM_ATT;
3372 error ("bad value (%s) for %sasm=%s %s",
3373 ix86_asm_string, prefix, suffix, sw);
3375 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3376 error ("code model %qs not supported in the %s bit mode",
3377 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3378 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3379 sorry ("%i-bit mode not compiled in",
3380 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3382 for (i = 0; i < pta_size; i++)
3383 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3385 ix86_schedule = processor_alias_table[i].schedule;
3386 ix86_arch = processor_alias_table[i].processor;
3387 /* Default cpu tuning to the architecture. */
3388 ix86_tune = ix86_arch;
3390 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3391 error ("CPU you selected does not support x86-64 "
3394 if (processor_alias_table[i].flags & PTA_MMX
3395 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3396 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3397 if (processor_alias_table[i].flags & PTA_3DNOW
3398 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3399 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3400 if (processor_alias_table[i].flags & PTA_3DNOW_A
3401 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3402 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3403 if (processor_alias_table[i].flags & PTA_SSE
3404 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3405 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3406 if (processor_alias_table[i].flags & PTA_SSE2
3407 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3408 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3409 if (processor_alias_table[i].flags & PTA_SSE3
3410 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3411 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3412 if (processor_alias_table[i].flags & PTA_SSSE3
3413 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3414 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3415 if (processor_alias_table[i].flags & PTA_SSE4_1
3416 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3417 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3418 if (processor_alias_table[i].flags & PTA_SSE4_2
3419 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3420 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3421 if (processor_alias_table[i].flags & PTA_AVX
3422 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3423 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3424 if (processor_alias_table[i].flags & PTA_FMA
3425 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3426 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3427 if (processor_alias_table[i].flags & PTA_SSE4A
3428 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3429 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3430 if (processor_alias_table[i].flags & PTA_FMA4
3431 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3432 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3433 if (processor_alias_table[i].flags & PTA_XOP
3434 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3435 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3436 if (processor_alias_table[i].flags & PTA_LWP
3437 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3438 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3439 if (processor_alias_table[i].flags & PTA_ABM
3440 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3441 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3442 if (processor_alias_table[i].flags & PTA_CX16
3443 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3444 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3445 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3446 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3447 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3448 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3449 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3450 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3451 if (processor_alias_table[i].flags & PTA_MOVBE
3452 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3453 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3454 if (processor_alias_table[i].flags & PTA_AES
3455 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3456 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3457 if (processor_alias_table[i].flags & PTA_PCLMUL
3458 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3459 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3460 if (processor_alias_table[i].flags & PTA_FSGSBASE
3461 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3462 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3463 if (processor_alias_table[i].flags & PTA_RDRND
3464 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3465 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3466 if (processor_alias_table[i].flags & PTA_F16C
3467 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3468 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3469 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3470 x86_prefetch_sse = true;
3475 if (!strcmp (ix86_arch_string, "generic"))
3476 error ("generic CPU can be used only for %stune=%s %s",
3477 prefix, suffix, sw);
3478 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3479 error ("bad value (%s) for %sarch=%s %s",
3480 ix86_arch_string, prefix, suffix, sw);
3482 ix86_arch_mask = 1u << ix86_arch;
3483 for (i = 0; i < X86_ARCH_LAST; ++i)
3484 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3486 for (i = 0; i < pta_size; i++)
3487 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3489 ix86_schedule = processor_alias_table[i].schedule;
3490 ix86_tune = processor_alias_table[i].processor;
3493 if (!(processor_alias_table[i].flags & PTA_64BIT))
3495 if (ix86_tune_defaulted)
3497 ix86_tune_string = "x86-64";
3498 for (i = 0; i < pta_size; i++)
3499 if (! strcmp (ix86_tune_string,
3500 processor_alias_table[i].name))
3502 ix86_schedule = processor_alias_table[i].schedule;
3503 ix86_tune = processor_alias_table[i].processor;
3506 error ("CPU you selected does not support x86-64 "
3512 /* Adjust tuning when compiling for 32-bit ABI. */
3515 case PROCESSOR_GENERIC64:
3516 ix86_tune = PROCESSOR_GENERIC32;
3517 ix86_schedule = CPU_PENTIUMPRO;
3520 case PROCESSOR_COREI7_64:
3521 ix86_tune = PROCESSOR_COREI7_32;
3522 ix86_schedule = CPU_PENTIUMPRO;
3529 /* Intel CPUs have always interpreted SSE prefetch instructions as
3530 NOPs; so, we can enable SSE prefetch instructions even when
3531 -mtune (rather than -march) points us to a processor that has them.
3532 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3533 higher processors. */
3535 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3536 x86_prefetch_sse = true;
3540 if (ix86_tune_specified && i == pta_size)
3541 error ("bad value (%s) for %stune=%s %s",
3542 ix86_tune_string, prefix, suffix, sw);
3544 ix86_tune_mask = 1u << ix86_tune;
3545 for (i = 0; i < X86_TUNE_LAST; ++i)
3546 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3548 #ifndef USE_IX86_FRAME_POINTER
3549 #define USE_IX86_FRAME_POINTER 0
3552 #ifndef USE_X86_64_FRAME_POINTER
3553 #define USE_X86_64_FRAME_POINTER 0
3556 /* Set the default values for switches whose default depends on TARGET_64BIT
3557 in case they weren't overwritten by command line options. */
3560 if (optimize > 1 && !global_options_set.x_flag_zee)
3562 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3563 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3564 if (flag_asynchronous_unwind_tables == 2)
3565 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3566 if (flag_pcc_struct_return == 2)
3567 flag_pcc_struct_return = 0;
3571 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3572 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3573 if (flag_asynchronous_unwind_tables == 2)
3574 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3575 if (flag_pcc_struct_return == 2)
3576 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3580 ix86_cost = &ix86_size_cost;
3582 ix86_cost = processor_target_table[ix86_tune].cost;
3584 /* Arrange to set up i386_stack_locals for all functions. */
3585 init_machine_status = ix86_init_machine_status;
3587 /* Validate -mregparm= value. */
3588 if (ix86_regparm_string)
3591 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3592 i = atoi (ix86_regparm_string);
3593 if (i < 0 || i > REGPARM_MAX)
3594 error ("%sregparm=%d%s is not between 0 and %d",
3595 prefix, i, suffix, REGPARM_MAX);
3600 ix86_regparm = REGPARM_MAX;
3602 /* If the user has provided any of the -malign-* options,
3603 warn and use that value only if -falign-* is not set.
3604 Remove this code in GCC 3.2 or later. */
3605 if (ix86_align_loops_string)
3607 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3608 prefix, suffix, suffix);
3609 if (align_loops == 0)
3611 i = atoi (ix86_align_loops_string);
3612 if (i < 0 || i > MAX_CODE_ALIGN)
3613 error ("%salign-loops=%d%s is not between 0 and %d",
3614 prefix, i, suffix, MAX_CODE_ALIGN);
3616 align_loops = 1 << i;
3620 if (ix86_align_jumps_string)
3622 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3623 prefix, suffix, suffix);
3624 if (align_jumps == 0)
3626 i = atoi (ix86_align_jumps_string);
3627 if (i < 0 || i > MAX_CODE_ALIGN)
3628 error ("%salign-loops=%d%s is not between 0 and %d",
3629 prefix, i, suffix, MAX_CODE_ALIGN);
3631 align_jumps = 1 << i;
3635 if (ix86_align_funcs_string)
3637 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3638 prefix, suffix, suffix);
3639 if (align_functions == 0)
3641 i = atoi (ix86_align_funcs_string);
3642 if (i < 0 || i > MAX_CODE_ALIGN)
3643 error ("%salign-loops=%d%s is not between 0 and %d",
3644 prefix, i, suffix, MAX_CODE_ALIGN);
3646 align_functions = 1 << i;
3650 /* Default align_* from the processor table. */
3651 if (align_loops == 0)
3653 align_loops = processor_target_table[ix86_tune].align_loop;
3654 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3656 if (align_jumps == 0)
3658 align_jumps = processor_target_table[ix86_tune].align_jump;
3659 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3661 if (align_functions == 0)
3663 align_functions = processor_target_table[ix86_tune].align_func;
3666 /* Validate -mbranch-cost= value, or provide default. */
3667 ix86_branch_cost = ix86_cost->branch_cost;
3668 if (ix86_branch_cost_string)
3670 i = atoi (ix86_branch_cost_string);
3672 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3674 ix86_branch_cost = i;
3676 if (ix86_section_threshold_string)
3678 i = atoi (ix86_section_threshold_string);
3680 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3682 ix86_section_threshold = i;
3685 if (ix86_tls_dialect_string)
3687 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3688 ix86_tls_dialect = TLS_DIALECT_GNU;
3689 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3690 ix86_tls_dialect = TLS_DIALECT_GNU2;
3692 error ("bad value (%s) for %stls-dialect=%s %s",
3693 ix86_tls_dialect_string, prefix, suffix, sw);
3696 if (ix87_precision_string)
3698 i = atoi (ix87_precision_string);
3699 if (i != 32 && i != 64 && i != 80)
3700 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3705 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3707 /* Enable by default the SSE and MMX builtins. Do allow the user to
3708 explicitly disable any of these. In particular, disabling SSE and
3709 MMX for kernel code is extremely useful. */
3710 if (!ix86_arch_specified)
3712 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3713 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3716 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3720 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3722 if (!ix86_arch_specified)
3724 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3726 /* i386 ABI does not specify red zone. It still makes sense to use it
3727 when programmer takes care to stack from being destroyed. */
3728 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3729 target_flags |= MASK_NO_RED_ZONE;
3732 /* Keep nonleaf frame pointers. */
3733 if (flag_omit_frame_pointer)
3734 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3735 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3736 flag_omit_frame_pointer = 1;
3738 /* If we're doing fast math, we don't care about comparison order
3739 wrt NaNs. This lets us use a shorter comparison sequence. */
3740 if (flag_finite_math_only)
3741 target_flags &= ~MASK_IEEE_FP;
3743 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3744 since the insns won't need emulation. */
3745 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3746 target_flags &= ~MASK_NO_FANCY_MATH_387;
3748 /* Likewise, if the target doesn't have a 387, or we've specified
3749 software floating point, don't use 387 inline intrinsics. */
3751 target_flags |= MASK_NO_FANCY_MATH_387;
3753 /* Turn on MMX builtins for -msse. */
3756 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3757 x86_prefetch_sse = true;
3760 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3761 if (TARGET_SSE4_2 || TARGET_ABM)
3762 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3764 /* Validate -mpreferred-stack-boundary= value or default it to
3765 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3766 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3767 if (ix86_preferred_stack_boundary_string)
3769 int min = (TARGET_64BIT ? 4 : 2);
3770 int max = (TARGET_SEH ? 4 : 12);
3772 i = atoi (ix86_preferred_stack_boundary_string);
3773 if (i < min || i > max)
3776 error ("%spreferred-stack-boundary%s is not supported "
3777 "for this target", prefix, suffix);
3779 error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
3780 prefix, i, suffix, min, max);
3783 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3786 /* Set the default value for -mstackrealign. */
3787 if (ix86_force_align_arg_pointer == -1)
3788 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3790 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3792 /* Validate -mincoming-stack-boundary= value or default it to
3793 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3794 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3795 if (ix86_incoming_stack_boundary_string)
3797 i = atoi (ix86_incoming_stack_boundary_string);
3798 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3799 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3800 i, TARGET_64BIT ? 4 : 2);
3803 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3804 ix86_incoming_stack_boundary
3805 = ix86_user_incoming_stack_boundary;
3809 /* Accept -msseregparm only if at least SSE support is enabled. */
3810 if (TARGET_SSEREGPARM
3812 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3814 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3815 if (ix86_fpmath_string != 0)
3817 if (! strcmp (ix86_fpmath_string, "387"))
3818 ix86_fpmath = FPMATH_387;
3819 else if (! strcmp (ix86_fpmath_string, "sse"))
3823 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3824 ix86_fpmath = FPMATH_387;
3827 ix86_fpmath = FPMATH_SSE;
3829 else if (! strcmp (ix86_fpmath_string, "387,sse")
3830 || ! strcmp (ix86_fpmath_string, "387+sse")
3831 || ! strcmp (ix86_fpmath_string, "sse,387")
3832 || ! strcmp (ix86_fpmath_string, "sse+387")
3833 || ! strcmp (ix86_fpmath_string, "both"))
3837 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3838 ix86_fpmath = FPMATH_387;
3840 else if (!TARGET_80387)
3842 warning (0, "387 instruction set disabled, using SSE arithmetics");
3843 ix86_fpmath = FPMATH_SSE;
3846 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3849 error ("bad value (%s) for %sfpmath=%s %s",
3850 ix86_fpmath_string, prefix, suffix, sw);
3853 /* If the i387 is disabled, then do not return values in it. */
3855 target_flags &= ~MASK_FLOAT_RETURNS;
3857 /* Use external vectorized library in vectorizing intrinsics. */
3858 if (ix86_veclibabi_string)
3860 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3861 ix86_veclib_handler = ix86_veclibabi_svml;
3862 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3863 ix86_veclib_handler = ix86_veclibabi_acml;
3865 error ("unknown vectorization library ABI type (%s) for "
3866 "%sveclibabi=%s %s", ix86_veclibabi_string,
3867 prefix, suffix, sw);
3870 if ((!USE_IX86_FRAME_POINTER
3871 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3872 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3874 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3876 /* ??? Unwind info is not correct around the CFG unless either a frame
3877 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3878 unwind info generation to be aware of the CFG and propagating states
3880 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3881 || flag_exceptions || flag_non_call_exceptions)
3882 && flag_omit_frame_pointer
3883 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3885 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3886 warning (0, "unwind tables currently require either a frame pointer "
3887 "or %saccumulate-outgoing-args%s for correctness",
3889 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3892 /* If stack probes are required, the space used for large function
3893 arguments on the stack must also be probed, so enable
3894 -maccumulate-outgoing-args so this happens in the prologue. */
3895 if (TARGET_STACK_PROBE
3896 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3898 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3899 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3900 "for correctness", prefix, suffix);
3901 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3904 /* For sane SSE instruction set generation we need fcomi instruction.
3905 It is safe to enable all CMOVE instructions. */
3909 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3912 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3913 p = strchr (internal_label_prefix, 'X');
3914 internal_label_prefix_len = p - internal_label_prefix;
3918 /* When scheduling description is not available, disable scheduler pass
3919 so it won't slow down the compilation and make x87 code slower. */
3920 if (!TARGET_SCHEDULE)
3921 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3923 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3924 ix86_cost->simultaneous_prefetches,
3925 global_options.x_param_values,
3926 global_options_set.x_param_values);
3927 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3928 global_options.x_param_values,
3929 global_options_set.x_param_values);
3930 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3931 global_options.x_param_values,
3932 global_options_set.x_param_values);
3933 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3934 global_options.x_param_values,
3935 global_options_set.x_param_values);
3937 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3938 if (flag_prefetch_loop_arrays < 0
3941 && software_prefetching_beneficial_p ())
3942 flag_prefetch_loop_arrays = 1;
3944 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3945 can be optimized to ap = __builtin_next_arg (0). */
3946 if (!TARGET_64BIT && !flag_split_stack)
3947 targetm.expand_builtin_va_start = NULL;
3951 ix86_gen_leave = gen_leave_rex64;
3952 ix86_gen_add3 = gen_adddi3;
3953 ix86_gen_sub3 = gen_subdi3;
3954 ix86_gen_sub3_carry = gen_subdi3_carry;
3955 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3956 ix86_gen_monitor = gen_sse3_monitor64;
3957 ix86_gen_andsp = gen_anddi3;
3958 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3959 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3960 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3964 ix86_gen_leave = gen_leave;
3965 ix86_gen_add3 = gen_addsi3;
3966 ix86_gen_sub3 = gen_subsi3;
3967 ix86_gen_sub3_carry = gen_subsi3_carry;
3968 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3969 ix86_gen_monitor = gen_sse3_monitor;
3970 ix86_gen_andsp = gen_andsi3;
3971 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3972 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3973 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3977 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3979 target_flags |= MASK_CLD & ~target_flags_explicit;
3982 if (!TARGET_64BIT && flag_pic)
3984 if (flag_fentry > 0)
3985 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3988 else if (TARGET_SEH)
3990 if (flag_fentry == 0)
3991 sorry ("-mno-fentry isn't compatible with SEH");
3994 else if (flag_fentry < 0)
3996 #if defined(PROFILE_BEFORE_PROLOGUE)
4003 /* Save the initial options in case the user does function specific options */
4005 target_option_default_node = target_option_current_node
4006 = build_target_option_node ();
4010 /* Enable vzeroupper pass by default for TARGET_AVX. */
4011 if (!(target_flags_explicit & MASK_VZEROUPPER))
4012 target_flags |= MASK_VZEROUPPER;
4016 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4017 target_flags &= ~MASK_VZEROUPPER;
4021 /* Return TRUE if type TYPE and mode MODE use 256bit AVX modes. */
4024 use_avx256_p (enum machine_mode mode, const_tree type)
4026 return (VALID_AVX256_REG_MODE (mode)
4028 && TREE_CODE (type) == VECTOR_TYPE
4029 && int_size_in_bytes (type) == 32));
4032 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4035 function_pass_avx256_p (const_rtx val)
4040 if (REG_P (val) && VALID_AVX256_REG_MODE (GET_MODE (val)))
4043 if (GET_CODE (val) == PARALLEL)
4048 for (i = XVECLEN (val, 0) - 1; i >= 0; i--)
4050 r = XVECEXP (val, 0, i);
4051 if (GET_CODE (r) == EXPR_LIST
4053 && REG_P (XEXP (r, 0))
4054 && (GET_MODE (XEXP (r, 0)) == OImode
4055 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r, 0)))))
4063 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4066 ix86_option_override (void)
4068 ix86_option_override_internal (true);
4071 /* Update register usage after having seen the compiler flags. */
4074 ix86_conditional_register_usage (void)
4079 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4081 if (fixed_regs[i] > 1)
4082 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
4083 if (call_used_regs[i] > 1)
4084 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
4087 /* The PIC register, if it exists, is fixed. */
4088 j = PIC_OFFSET_TABLE_REGNUM;
4089 if (j != INVALID_REGNUM)
4090 fixed_regs[j] = call_used_regs[j] = 1;
4092 /* The MS_ABI changes the set of call-used registers. */
4093 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
4095 call_used_regs[SI_REG] = 0;
4096 call_used_regs[DI_REG] = 0;
4097 call_used_regs[XMM6_REG] = 0;
4098 call_used_regs[XMM7_REG] = 0;
4099 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4100 call_used_regs[i] = 0;
4103 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4104 other call-clobbered regs for 64-bit. */
4107 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4109 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4110 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4111 && call_used_regs[i])
4112 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4115 /* If MMX is disabled, squash the registers. */
4117 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4118 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4119 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4121 /* If SSE is disabled, squash the registers. */
4123 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4124 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4125 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4127 /* If the FPU is disabled, squash the registers. */
4128 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4129 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4130 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4131 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4133 /* If 32-bit, squash the 64-bit registers. */
4136 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4138 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4144 /* Save the current options */
4147 ix86_function_specific_save (struct cl_target_option *ptr)
4149 ptr->arch = ix86_arch;
4150 ptr->schedule = ix86_schedule;
4151 ptr->tune = ix86_tune;
4152 ptr->fpmath = ix86_fpmath;
4153 ptr->branch_cost = ix86_branch_cost;
4154 ptr->tune_defaulted = ix86_tune_defaulted;
4155 ptr->arch_specified = ix86_arch_specified;
4156 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4157 ptr->ix86_target_flags_explicit = target_flags_explicit;
4159 /* The fields are char but the variables are not; make sure the
4160 values fit in the fields. */
4161 gcc_assert (ptr->arch == ix86_arch);
4162 gcc_assert (ptr->schedule == ix86_schedule);
4163 gcc_assert (ptr->tune == ix86_tune);
4164 gcc_assert (ptr->fpmath == ix86_fpmath);
4165 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4168 /* Restore the current options */
4171 ix86_function_specific_restore (struct cl_target_option *ptr)
4173 enum processor_type old_tune = ix86_tune;
4174 enum processor_type old_arch = ix86_arch;
4175 unsigned int ix86_arch_mask, ix86_tune_mask;
4178 ix86_arch = (enum processor_type) ptr->arch;
4179 ix86_schedule = (enum attr_cpu) ptr->schedule;
4180 ix86_tune = (enum processor_type) ptr->tune;
4181 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
4182 ix86_branch_cost = ptr->branch_cost;
4183 ix86_tune_defaulted = ptr->tune_defaulted;
4184 ix86_arch_specified = ptr->arch_specified;
4185 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
4186 target_flags_explicit = ptr->ix86_target_flags_explicit;
4188 /* Recreate the arch feature tests if the arch changed */
4189 if (old_arch != ix86_arch)
4191 ix86_arch_mask = 1u << ix86_arch;
4192 for (i = 0; i < X86_ARCH_LAST; ++i)
4193 ix86_arch_features[i]
4194 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4197 /* Recreate the tune optimization tests */
4198 if (old_tune != ix86_tune)
4200 ix86_tune_mask = 1u << ix86_tune;
4201 for (i = 0; i < X86_TUNE_LAST; ++i)
4202 ix86_tune_features[i]
4203 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4207 /* Print the current options */
4210 ix86_function_specific_print (FILE *file, int indent,
4211 struct cl_target_option *ptr)
4214 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4215 NULL, NULL, NULL, false);
4217 fprintf (file, "%*sarch = %d (%s)\n",
4220 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4221 ? cpu_names[ptr->arch]
4224 fprintf (file, "%*stune = %d (%s)\n",
4227 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4228 ? cpu_names[ptr->tune]
4231 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
4232 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
4233 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
4234 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4238 fprintf (file, "%*s%s\n", indent, "", target_string);
4239 free (target_string);
4244 /* Inner function to process the attribute((target(...))), take an argument and
4245 set the current options from the argument. If we have a list, recursively go
4249 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
4254 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4255 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4256 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4257 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4272 enum ix86_opt_type type;
4277 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4278 IX86_ATTR_ISA ("abm", OPT_mabm),
4279 IX86_ATTR_ISA ("aes", OPT_maes),
4280 IX86_ATTR_ISA ("avx", OPT_mavx),
4281 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4282 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4283 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4284 IX86_ATTR_ISA ("sse", OPT_msse),
4285 IX86_ATTR_ISA ("sse2", OPT_msse2),
4286 IX86_ATTR_ISA ("sse3", OPT_msse3),
4287 IX86_ATTR_ISA ("sse4", OPT_msse4),
4288 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4289 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4290 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4291 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4292 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4293 IX86_ATTR_ISA ("xop", OPT_mxop),
4294 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4295 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4296 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4297 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4299 /* string options */
4300 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4301 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
4302 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4305 IX86_ATTR_YES ("cld",
4309 IX86_ATTR_NO ("fancy-math-387",
4310 OPT_mfancy_math_387,
4311 MASK_NO_FANCY_MATH_387),
4313 IX86_ATTR_YES ("ieee-fp",
4317 IX86_ATTR_YES ("inline-all-stringops",
4318 OPT_minline_all_stringops,
4319 MASK_INLINE_ALL_STRINGOPS),
4321 IX86_ATTR_YES ("inline-stringops-dynamically",
4322 OPT_minline_stringops_dynamically,
4323 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4325 IX86_ATTR_NO ("align-stringops",
4326 OPT_mno_align_stringops,
4327 MASK_NO_ALIGN_STRINGOPS),
4329 IX86_ATTR_YES ("recip",
4335 /* If this is a list, recurse to get the options. */
4336 if (TREE_CODE (args) == TREE_LIST)
4340 for (; args; args = TREE_CHAIN (args))
4341 if (TREE_VALUE (args)
4342 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4348 else if (TREE_CODE (args) != STRING_CST)
4351 /* Handle multiple arguments separated by commas. */
4352 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4354 while (next_optstr && *next_optstr != '\0')
4356 char *p = next_optstr;
4358 char *comma = strchr (next_optstr, ',');
4359 const char *opt_string;
4360 size_t len, opt_len;
4365 enum ix86_opt_type type = ix86_opt_unknown;
4371 len = comma - next_optstr;
4372 next_optstr = comma + 1;
4380 /* Recognize no-xxx. */
4381 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4390 /* Find the option. */
4393 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4395 type = attrs[i].type;
4396 opt_len = attrs[i].len;
4397 if (ch == attrs[i].string[0]
4398 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4399 && memcmp (p, attrs[i].string, opt_len) == 0)
4402 mask = attrs[i].mask;
4403 opt_string = attrs[i].string;
4408 /* Process the option. */
4411 error ("attribute(target(\"%s\")) is unknown", orig_p);
4415 else if (type == ix86_opt_isa)
4416 ix86_handle_option (opt, p, opt_set_p);
4418 else if (type == ix86_opt_yes || type == ix86_opt_no)
4420 if (type == ix86_opt_no)
4421 opt_set_p = !opt_set_p;
4424 target_flags |= mask;
4426 target_flags &= ~mask;
4429 else if (type == ix86_opt_str)
4433 error ("option(\"%s\") was already specified", opt_string);
4437 p_strings[opt] = xstrdup (p + opt_len);
4447 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4450 ix86_valid_target_attribute_tree (tree args)
4452 const char *orig_arch_string = ix86_arch_string;
4453 const char *orig_tune_string = ix86_tune_string;
4454 const char *orig_fpmath_string = ix86_fpmath_string;
4455 int orig_tune_defaulted = ix86_tune_defaulted;
4456 int orig_arch_specified = ix86_arch_specified;
4457 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4460 struct cl_target_option *def
4461 = TREE_TARGET_OPTION (target_option_default_node);
4463 /* Process each of the options on the chain. */
4464 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4467 /* If the changed options are different from the default, rerun
4468 ix86_option_override_internal, and then save the options away.
4469 The string options are are attribute options, and will be undone
4470 when we copy the save structure. */
4471 if (ix86_isa_flags != def->x_ix86_isa_flags
4472 || target_flags != def->x_target_flags
4473 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4474 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4475 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4477 /* If we are using the default tune= or arch=, undo the string assigned,
4478 and use the default. */
4479 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4480 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4481 else if (!orig_arch_specified)
4482 ix86_arch_string = NULL;
4484 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4485 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4486 else if (orig_tune_defaulted)
4487 ix86_tune_string = NULL;
4489 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4490 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4491 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4492 else if (!TARGET_64BIT && TARGET_SSE)
4493 ix86_fpmath_string = "sse,387";
4495 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4496 ix86_option_override_internal (false);
4498 /* Add any builtin functions with the new isa if any. */
4499 ix86_add_new_builtins (ix86_isa_flags);
4501 /* Save the current options unless we are validating options for
4503 t = build_target_option_node ();
4505 ix86_arch_string = orig_arch_string;
4506 ix86_tune_string = orig_tune_string;
4507 ix86_fpmath_string = orig_fpmath_string;
4509 /* Free up memory allocated to hold the strings */
4510 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4511 if (option_strings[i])
4512 free (option_strings[i]);
4518 /* Hook to validate attribute((target("string"))). */
4521 ix86_valid_target_attribute_p (tree fndecl,
4522 tree ARG_UNUSED (name),
4524 int ARG_UNUSED (flags))
4526 struct cl_target_option cur_target;
4528 tree old_optimize = build_optimization_node ();
4529 tree new_target, new_optimize;
4530 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4532 /* If the function changed the optimization levels as well as setting target
4533 options, start with the optimizations specified. */
4534 if (func_optimize && func_optimize != old_optimize)
4535 cl_optimization_restore (&global_options,
4536 TREE_OPTIMIZATION (func_optimize));
4538 /* The target attributes may also change some optimization flags, so update
4539 the optimization options if necessary. */
4540 cl_target_option_save (&cur_target, &global_options);
4541 new_target = ix86_valid_target_attribute_tree (args);
4542 new_optimize = build_optimization_node ();
4549 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4551 if (old_optimize != new_optimize)
4552 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4555 cl_target_option_restore (&global_options, &cur_target);
4557 if (old_optimize != new_optimize)
4558 cl_optimization_restore (&global_options,
4559 TREE_OPTIMIZATION (old_optimize));
4565 /* Hook to determine if one function can safely inline another. */
4568 ix86_can_inline_p (tree caller, tree callee)
4571 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4572 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4574 /* If callee has no option attributes, then it is ok to inline. */
4578 /* If caller has no option attributes, but callee does then it is not ok to
4580 else if (!caller_tree)
4585 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4586 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4588 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4589 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4591 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4592 != callee_opts->x_ix86_isa_flags)
4595 /* See if we have the same non-isa options. */
4596 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4599 /* See if arch, tune, etc. are the same. */
4600 else if (caller_opts->arch != callee_opts->arch)
4603 else if (caller_opts->tune != callee_opts->tune)
4606 else if (caller_opts->fpmath != callee_opts->fpmath)
4609 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4620 /* Remember the last target of ix86_set_current_function. */
4621 static GTY(()) tree ix86_previous_fndecl;
4623 /* Establish appropriate back-end context for processing the function
4624 FNDECL. The argument might be NULL to indicate processing at top
4625 level, outside of any function scope. */
4627 ix86_set_current_function (tree fndecl)
4629 /* Only change the context if the function changes. This hook is called
4630 several times in the course of compiling a function, and we don't want to
4631 slow things down too much or call target_reinit when it isn't safe. */
4632 if (fndecl && fndecl != ix86_previous_fndecl)
4634 tree old_tree = (ix86_previous_fndecl
4635 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4638 tree new_tree = (fndecl
4639 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4642 ix86_previous_fndecl = fndecl;
4643 if (old_tree == new_tree)
4648 cl_target_option_restore (&global_options,
4649 TREE_TARGET_OPTION (new_tree));
4655 struct cl_target_option *def
4656 = TREE_TARGET_OPTION (target_option_current_node);
4658 cl_target_option_restore (&global_options, def);
4665 /* Return true if this goes in large data/bss. */
4668 ix86_in_large_data_p (tree exp)
4670 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4673 /* Functions are never large data. */
4674 if (TREE_CODE (exp) == FUNCTION_DECL)
4677 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4679 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4680 if (strcmp (section, ".ldata") == 0
4681 || strcmp (section, ".lbss") == 0)
4687 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4689 /* If this is an incomplete type with size 0, then we can't put it
4690 in data because it might be too big when completed. */
4691 if (!size || size > ix86_section_threshold)
4698 /* Switch to the appropriate section for output of DECL.
4699 DECL is either a `VAR_DECL' node or a constant of some sort.
4700 RELOC indicates whether forming the initial value of DECL requires
4701 link-time relocations. */
4703 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4707 x86_64_elf_select_section (tree decl, int reloc,
4708 unsigned HOST_WIDE_INT align)
4710 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4711 && ix86_in_large_data_p (decl))
4713 const char *sname = NULL;
4714 unsigned int flags = SECTION_WRITE;
4715 switch (categorize_decl_for_section (decl, reloc))
4720 case SECCAT_DATA_REL:
4721 sname = ".ldata.rel";
4723 case SECCAT_DATA_REL_LOCAL:
4724 sname = ".ldata.rel.local";
4726 case SECCAT_DATA_REL_RO:
4727 sname = ".ldata.rel.ro";
4729 case SECCAT_DATA_REL_RO_LOCAL:
4730 sname = ".ldata.rel.ro.local";
4734 flags |= SECTION_BSS;
4737 case SECCAT_RODATA_MERGE_STR:
4738 case SECCAT_RODATA_MERGE_STR_INIT:
4739 case SECCAT_RODATA_MERGE_CONST:
4743 case SECCAT_SRODATA:
4750 /* We don't split these for medium model. Place them into
4751 default sections and hope for best. */
4756 /* We might get called with string constants, but get_named_section
4757 doesn't like them as they are not DECLs. Also, we need to set
4758 flags in that case. */
4760 return get_section (sname, flags, NULL);
4761 return get_named_section (decl, sname, reloc);
4764 return default_elf_select_section (decl, reloc, align);
4767 /* Build up a unique section name, expressed as a
4768 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4769 RELOC indicates whether the initial value of EXP requires
4770 link-time relocations. */
4772 static void ATTRIBUTE_UNUSED
4773 x86_64_elf_unique_section (tree decl, int reloc)
4775 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4776 && ix86_in_large_data_p (decl))
4778 const char *prefix = NULL;
4779 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4780 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4782 switch (categorize_decl_for_section (decl, reloc))
4785 case SECCAT_DATA_REL:
4786 case SECCAT_DATA_REL_LOCAL:
4787 case SECCAT_DATA_REL_RO:
4788 case SECCAT_DATA_REL_RO_LOCAL:
4789 prefix = one_only ? ".ld" : ".ldata";
4792 prefix = one_only ? ".lb" : ".lbss";
4795 case SECCAT_RODATA_MERGE_STR:
4796 case SECCAT_RODATA_MERGE_STR_INIT:
4797 case SECCAT_RODATA_MERGE_CONST:
4798 prefix = one_only ? ".lr" : ".lrodata";
4800 case SECCAT_SRODATA:
4807 /* We don't split these for medium model. Place them into
4808 default sections and hope for best. */
4813 const char *name, *linkonce;
4816 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4817 name = targetm.strip_name_encoding (name);
4819 /* If we're using one_only, then there needs to be a .gnu.linkonce
4820 prefix to the section name. */
4821 linkonce = one_only ? ".gnu.linkonce" : "";
4823 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4825 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4829 default_unique_section (decl, reloc);
4832 #ifdef COMMON_ASM_OP
4833 /* This says how to output assembler code to declare an
4834 uninitialized external linkage data object.
4836 For medium model x86-64 we need to use .largecomm opcode for
4839 x86_elf_aligned_common (FILE *file,
4840 const char *name, unsigned HOST_WIDE_INT size,
4843 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4844 && size > (unsigned int)ix86_section_threshold)
4845 fputs (".largecomm\t", file);
4847 fputs (COMMON_ASM_OP, file);
4848 assemble_name (file, name);
4849 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4850 size, align / BITS_PER_UNIT);
4854 /* Utility function for targets to use in implementing
4855 ASM_OUTPUT_ALIGNED_BSS. */
4858 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4859 const char *name, unsigned HOST_WIDE_INT size,
4862 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4863 && size > (unsigned int)ix86_section_threshold)
4864 switch_to_section (get_named_section (decl, ".lbss", 0));
4866 switch_to_section (bss_section);
4867 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4868 #ifdef ASM_DECLARE_OBJECT_NAME
4869 last_assemble_variable_decl = decl;
4870 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4872 /* Standard thing is just output label for the object. */
4873 ASM_OUTPUT_LABEL (file, name);
4874 #endif /* ASM_DECLARE_OBJECT_NAME */
4875 ASM_OUTPUT_SKIP (file, size ? size : 1);
4878 static const struct default_options ix86_option_optimization_table[] =
4880 /* Turn off -fschedule-insns by default. It tends to make the
4881 problem with not enough registers even worse. */
4882 #ifdef INSN_SCHEDULING
4883 { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
4886 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4887 SUBTARGET_OPTIMIZATION_OPTIONS,
4889 { OPT_LEVELS_NONE, 0, NULL, 0 }
4892 /* Implement TARGET_OPTION_INIT_STRUCT. */
4895 ix86_option_init_struct (struct gcc_options *opts)
4898 /* The Darwin libraries never set errno, so we might as well
4899 avoid calling them when that's the only reason we would. */
4900 opts->x_flag_errno_math = 0;
4902 opts->x_flag_pcc_struct_return = 2;
4903 opts->x_flag_asynchronous_unwind_tables = 2;
4904 opts->x_flag_vect_cost_model = 1;
4907 /* Decide whether we must probe the stack before any space allocation
4908 on this target. It's essentially TARGET_STACK_PROBE except when
4909 -fstack-check causes the stack to be already probed differently. */
4912 ix86_target_stack_probe (void)
4914 /* Do not probe the stack twice if static stack checking is enabled. */
4915 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4918 return TARGET_STACK_PROBE;
4921 /* Decide whether we can make a sibling call to a function. DECL is the
4922 declaration of the function being targeted by the call and EXP is the
4923 CALL_EXPR representing the call. */
4926 ix86_function_ok_for_sibcall (tree decl, tree exp)
4928 tree type, decl_or_type;
4931 /* If we are generating position-independent code, we cannot sibcall
4932 optimize any indirect call, or a direct call to a global function,
4933 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4937 && (!decl || !targetm.binds_local_p (decl)))
4940 /* If we need to align the outgoing stack, then sibcalling would
4941 unalign the stack, which may break the called function. */
4942 if (ix86_minimum_incoming_stack_boundary (true)
4943 < PREFERRED_STACK_BOUNDARY)
4948 decl_or_type = decl;
4949 type = TREE_TYPE (decl);
4953 /* We're looking at the CALL_EXPR, we need the type of the function. */
4954 type = CALL_EXPR_FN (exp); /* pointer expression */
4955 type = TREE_TYPE (type); /* pointer type */
4956 type = TREE_TYPE (type); /* function type */
4957 decl_or_type = type;
4960 /* Check that the return value locations are the same. Like
4961 if we are returning floats on the 80387 register stack, we cannot
4962 make a sibcall from a function that doesn't return a float to a
4963 function that does or, conversely, from a function that does return
4964 a float to a function that doesn't; the necessary stack adjustment
4965 would not be executed. This is also the place we notice
4966 differences in the return value ABI. Note that it is ok for one
4967 of the functions to have void return type as long as the return
4968 value of the other is passed in a register. */
4969 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4970 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4972 if (STACK_REG_P (a) || STACK_REG_P (b))
4974 if (!rtx_equal_p (a, b))
4977 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4979 /* Disable sibcall if we need to generate vzeroupper after
4981 if (TARGET_VZEROUPPER
4982 && cfun->machine->callee_return_avx256_p
4983 && !cfun->machine->caller_return_avx256_p)
4986 else if (!rtx_equal_p (a, b))
4991 /* The SYSV ABI has more call-clobbered registers;
4992 disallow sibcalls from MS to SYSV. */
4993 if (cfun->machine->call_abi == MS_ABI
4994 && ix86_function_type_abi (type) == SYSV_ABI)
4999 /* If this call is indirect, we'll need to be able to use a
5000 call-clobbered register for the address of the target function.
5001 Make sure that all such registers are not used for passing
5002 parameters. Note that DLLIMPORT functions are indirect. */
5004 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5006 if (ix86_function_regparm (type, NULL) >= 3)
5008 /* ??? Need to count the actual number of registers to be used,
5009 not the possible number of registers. Fix later. */
5015 /* Otherwise okay. That also includes certain types of indirect calls. */
5019 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5020 and "sseregparm" calling convention attributes;
5021 arguments as in struct attribute_spec.handler. */
5024 ix86_handle_cconv_attribute (tree *node, tree name,
5026 int flags ATTRIBUTE_UNUSED,
5029 if (TREE_CODE (*node) != FUNCTION_TYPE
5030 && TREE_CODE (*node) != METHOD_TYPE
5031 && TREE_CODE (*node) != FIELD_DECL
5032 && TREE_CODE (*node) != TYPE_DECL)
5034 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5036 *no_add_attrs = true;
5040 /* Can combine regparm with all attributes but fastcall. */
5041 if (is_attribute_p ("regparm", name))
5045 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5047 error ("fastcall and regparm attributes are not compatible");
5050 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5052 error ("regparam and thiscall attributes are not compatible");
5055 cst = TREE_VALUE (args);
5056 if (TREE_CODE (cst) != INTEGER_CST)
5058 warning (OPT_Wattributes,
5059 "%qE attribute requires an integer constant argument",
5061 *no_add_attrs = true;
5063 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5065 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5067 *no_add_attrs = true;
5075 /* Do not warn when emulating the MS ABI. */
5076 if ((TREE_CODE (*node) != FUNCTION_TYPE
5077 && TREE_CODE (*node) != METHOD_TYPE)
5078 || ix86_function_type_abi (*node) != MS_ABI)
5079 warning (OPT_Wattributes, "%qE attribute ignored",
5081 *no_add_attrs = true;
5085 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5086 if (is_attribute_p ("fastcall", name))
5088 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5090 error ("fastcall and cdecl attributes are not compatible");
5092 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5094 error ("fastcall and stdcall attributes are not compatible");
5096 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5098 error ("fastcall and regparm attributes are not compatible");
5100 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5102 error ("fastcall and thiscall attributes are not compatible");
5106 /* Can combine stdcall with fastcall (redundant), regparm and
5108 else if (is_attribute_p ("stdcall", name))
5110 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5112 error ("stdcall and cdecl attributes are not compatible");
5114 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5116 error ("stdcall and fastcall attributes are not compatible");
5118 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5120 error ("stdcall and thiscall attributes are not compatible");
5124 /* Can combine cdecl with regparm and sseregparm. */
5125 else if (is_attribute_p ("cdecl", name))
5127 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5129 error ("stdcall and cdecl attributes are not compatible");
5131 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5133 error ("fastcall and cdecl attributes are not compatible");
5135 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5137 error ("cdecl and thiscall attributes are not compatible");
5140 else if (is_attribute_p ("thiscall", name))
5142 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5143 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5145 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5147 error ("stdcall and thiscall attributes are not compatible");
5149 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5151 error ("fastcall and thiscall attributes are not compatible");
5153 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5155 error ("cdecl and thiscall attributes are not compatible");
5159 /* Can combine sseregparm with all attributes. */
5164 /* Return 0 if the attributes for two types are incompatible, 1 if they
5165 are compatible, and 2 if they are nearly compatible (which causes a
5166 warning to be generated). */
5169 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5171 /* Check for mismatch of non-default calling convention. */
5172 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
5174 if (TREE_CODE (type1) != FUNCTION_TYPE
5175 && TREE_CODE (type1) != METHOD_TYPE)
5178 /* Check for mismatched fastcall/regparm types. */
5179 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
5180 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
5181 || (ix86_function_regparm (type1, NULL)
5182 != ix86_function_regparm (type2, NULL)))
5185 /* Check for mismatched sseregparm types. */
5186 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
5187 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
5190 /* Check for mismatched thiscall types. */
5191 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
5192 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
5195 /* Check for mismatched return types (cdecl vs stdcall). */
5196 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
5197 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
5203 /* Return the regparm value for a function with the indicated TYPE and DECL.
5204 DECL may be NULL when calling function indirectly
5205 or considering a libcall. */
5208 ix86_function_regparm (const_tree type, const_tree decl)
5214 return (ix86_function_type_abi (type) == SYSV_ABI
5215 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5217 regparm = ix86_regparm;
5218 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5221 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5225 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
5228 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
5231 /* Use register calling convention for local functions when possible. */
5233 && TREE_CODE (decl) == FUNCTION_DECL
5235 && !(profile_flag && !flag_fentry))
5237 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5238 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5241 int local_regparm, globals = 0, regno;
5243 /* Make sure no regparm register is taken by a
5244 fixed register variable. */
5245 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5246 if (fixed_regs[local_regparm])
5249 /* We don't want to use regparm(3) for nested functions as
5250 these use a static chain pointer in the third argument. */
5251 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5254 /* In 32-bit mode save a register for the split stack. */
5255 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5258 /* Each fixed register usage increases register pressure,
5259 so less registers should be used for argument passing.
5260 This functionality can be overriden by an explicit
5262 for (regno = 0; regno <= DI_REG; regno++)
5263 if (fixed_regs[regno])
5267 = globals < local_regparm ? local_regparm - globals : 0;
5269 if (local_regparm > regparm)
5270 regparm = local_regparm;
5277 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5278 DFmode (2) arguments in SSE registers for a function with the
5279 indicated TYPE and DECL. DECL may be NULL when calling function
5280 indirectly or considering a libcall. Otherwise return 0. */
5283 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5285 gcc_assert (!TARGET_64BIT);
5287 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5288 by the sseregparm attribute. */
5289 if (TARGET_SSEREGPARM
5290 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5297 error ("Calling %qD with attribute sseregparm without "
5298 "SSE/SSE2 enabled", decl);
5300 error ("Calling %qT with attribute sseregparm without "
5301 "SSE/SSE2 enabled", type);
5309 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5310 (and DFmode for SSE2) arguments in SSE registers. */
5311 if (decl && TARGET_SSE_MATH && optimize
5312 && !(profile_flag && !flag_fentry))
5314 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5315 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5317 return TARGET_SSE2 ? 2 : 1;
5323 /* Return true if EAX is live at the start of the function. Used by
5324 ix86_expand_prologue to determine if we need special help before
5325 calling allocate_stack_worker. */
5328 ix86_eax_live_at_start_p (void)
5330 /* Cheat. Don't bother working forward from ix86_function_regparm
5331 to the function type to whether an actual argument is located in
5332 eax. Instead just look at cfg info, which is still close enough
5333 to correct at this point. This gives false positives for broken
5334 functions that might use uninitialized data that happens to be
5335 allocated in eax, but who cares? */
5336 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5339 /* Value is the number of bytes of arguments automatically
5340 popped when returning from a subroutine call.
5341 FUNDECL is the declaration node of the function (as a tree),
5342 FUNTYPE is the data type of the function (as a tree),
5343 or for a library call it is an identifier node for the subroutine name.
5344 SIZE is the number of bytes of arguments passed on the stack.
5346 On the 80386, the RTD insn may be used to pop them if the number
5347 of args is fixed, but if the number is variable then the caller
5348 must pop them all. RTD can't be used for library calls now
5349 because the library is compiled with the Unix compiler.
5350 Use of RTD is a selectable option, since it is incompatible with
5351 standard Unix calling sequences. If the option is not selected,
5352 the caller must always pop the args.
5354 The attribute stdcall is equivalent to RTD on a per module basis. */
5357 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5361 /* None of the 64-bit ABIs pop arguments. */
5365 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5367 /* Cdecl functions override -mrtd, and never pop the stack. */
5368 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5370 /* Stdcall and fastcall functions will pop the stack if not
5372 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5373 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5374 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5377 if (rtd && ! stdarg_p (funtype))
5381 /* Lose any fake structure return argument if it is passed on the stack. */
5382 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5383 && !KEEP_AGGREGATE_RETURN_POINTER)
5385 int nregs = ix86_function_regparm (funtype, fundecl);
5387 return GET_MODE_SIZE (Pmode);
5393 /* Argument support functions. */
5395 /* Return true when register may be used to pass function parameters. */
5397 ix86_function_arg_regno_p (int regno)
5400 const int *parm_regs;
5405 return (regno < REGPARM_MAX
5406 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5408 return (regno < REGPARM_MAX
5409 || (TARGET_MMX && MMX_REGNO_P (regno)
5410 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5411 || (TARGET_SSE && SSE_REGNO_P (regno)
5412 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5417 if (SSE_REGNO_P (regno) && TARGET_SSE)
5422 if (TARGET_SSE && SSE_REGNO_P (regno)
5423 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5427 /* TODO: The function should depend on current function ABI but
5428 builtins.c would need updating then. Therefore we use the
5431 /* RAX is used as hidden argument to va_arg functions. */
5432 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5435 if (ix86_abi == MS_ABI)
5436 parm_regs = x86_64_ms_abi_int_parameter_registers;
5438 parm_regs = x86_64_int_parameter_registers;
5439 for (i = 0; i < (ix86_abi == MS_ABI
5440 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5441 if (regno == parm_regs[i])
5446 /* Return if we do not know how to pass TYPE solely in registers. */
5449 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5451 if (must_pass_in_stack_var_size_or_pad (mode, type))
5454 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5455 The layout_type routine is crafty and tries to trick us into passing
5456 currently unsupported vector types on the stack by using TImode. */
5457 return (!TARGET_64BIT && mode == TImode
5458 && type && TREE_CODE (type) != VECTOR_TYPE);
5461 /* It returns the size, in bytes, of the area reserved for arguments passed
5462 in registers for the function represented by fndecl dependent to the used
5465 ix86_reg_parm_stack_space (const_tree fndecl)
5467 enum calling_abi call_abi = SYSV_ABI;
5468 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5469 call_abi = ix86_function_abi (fndecl);
5471 call_abi = ix86_function_type_abi (fndecl);
5472 if (call_abi == MS_ABI)
5477 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5480 ix86_function_type_abi (const_tree fntype)
5482 if (TARGET_64BIT && fntype != NULL)
5484 enum calling_abi abi = ix86_abi;
5485 if (abi == SYSV_ABI)
5487 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5490 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5498 ix86_function_ms_hook_prologue (const_tree fn)
5500 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5502 if (decl_function_context (fn) != NULL_TREE)
5503 error_at (DECL_SOURCE_LOCATION (fn),
5504 "ms_hook_prologue is not compatible with nested function");
5511 static enum calling_abi
5512 ix86_function_abi (const_tree fndecl)
5516 return ix86_function_type_abi (TREE_TYPE (fndecl));
5519 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5522 ix86_cfun_abi (void)
5524 if (! cfun || ! TARGET_64BIT)
5526 return cfun->machine->call_abi;
5529 /* Write the extra assembler code needed to declare a function properly. */
5532 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5535 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5539 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5540 unsigned int filler_cc = 0xcccccccc;
5542 for (i = 0; i < filler_count; i += 4)
5543 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5546 #ifdef SUBTARGET_ASM_UNWIND_INIT
5547 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5550 ASM_OUTPUT_LABEL (asm_out_file, fname);
5552 /* Output magic byte marker, if hot-patch attribute is set. */
5557 /* leaq [%rsp + 0], %rsp */
5558 asm_fprintf (asm_out_file, ASM_BYTE
5559 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5563 /* movl.s %edi, %edi
5565 movl.s %esp, %ebp */
5566 asm_fprintf (asm_out_file, ASM_BYTE
5567 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5573 extern void init_regs (void);
5575 /* Implementation of call abi switching target hook. Specific to FNDECL
5576 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5577 for more details. */
5579 ix86_call_abi_override (const_tree fndecl)
5581 if (fndecl == NULL_TREE)
5582 cfun->machine->call_abi = ix86_abi;
5584 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5587 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5588 re-initialization of init_regs each time we switch function context since
5589 this is needed only during RTL expansion. */
5591 ix86_maybe_switch_abi (void)
5594 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5598 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5599 for a call to a function whose data type is FNTYPE.
5600 For a library call, FNTYPE is 0. */
5603 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5604 tree fntype, /* tree ptr for function decl */
5605 rtx libname, /* SYMBOL_REF of library name or 0 */
5609 struct cgraph_local_info *i;
5612 memset (cum, 0, sizeof (*cum));
5614 /* Initialize for the current callee. */
5617 cfun->machine->callee_pass_avx256_p = false;
5618 cfun->machine->callee_return_avx256_p = false;
5623 i = cgraph_local_info (fndecl);
5624 cum->call_abi = ix86_function_abi (fndecl);
5625 fnret_type = TREE_TYPE (TREE_TYPE (fndecl));
5630 cum->call_abi = ix86_function_type_abi (fntype);
5632 fnret_type = TREE_TYPE (fntype);
5637 if (TARGET_VZEROUPPER && fnret_type)
5639 rtx fnret_value = ix86_function_value (fnret_type, fntype,
5641 if (function_pass_avx256_p (fnret_value))
5643 /* The return value of this function uses 256bit AVX modes. */
5644 cfun->machine->use_avx256_p = true;
5646 cfun->machine->callee_return_avx256_p = true;
5648 cfun->machine->caller_return_avx256_p = true;
5652 cum->caller = caller;
5654 /* Set up the number of registers to use for passing arguments. */
5656 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5657 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5658 "or subtarget optimization implying it");
5659 cum->nregs = ix86_regparm;
5662 cum->nregs = (cum->call_abi == SYSV_ABI
5663 ? X86_64_REGPARM_MAX
5664 : X86_64_MS_REGPARM_MAX);
5668 cum->sse_nregs = SSE_REGPARM_MAX;
5671 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5672 ? X86_64_SSE_REGPARM_MAX
5673 : X86_64_MS_SSE_REGPARM_MAX);
5677 cum->mmx_nregs = MMX_REGPARM_MAX;
5678 cum->warn_avx = true;
5679 cum->warn_sse = true;
5680 cum->warn_mmx = true;
5682 /* Because type might mismatch in between caller and callee, we need to
5683 use actual type of function for local calls.
5684 FIXME: cgraph_analyze can be told to actually record if function uses
5685 va_start so for local functions maybe_vaarg can be made aggressive
5687 FIXME: once typesytem is fixed, we won't need this code anymore. */
5689 fntype = TREE_TYPE (fndecl);
5690 cum->maybe_vaarg = (fntype
5691 ? (!prototype_p (fntype) || stdarg_p (fntype))
5696 /* If there are variable arguments, then we won't pass anything
5697 in registers in 32-bit mode. */
5698 if (stdarg_p (fntype))
5709 /* Use ecx and edx registers if function has fastcall attribute,
5710 else look for regparm information. */
5713 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5716 cum->fastcall = 1; /* Same first register as in fastcall. */
5718 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5724 cum->nregs = ix86_function_regparm (fntype, fndecl);
5727 /* Set up the number of SSE registers used for passing SFmode
5728 and DFmode arguments. Warn for mismatching ABI. */
5729 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5733 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5734 But in the case of vector types, it is some vector mode.
5736 When we have only some of our vector isa extensions enabled, then there
5737 are some modes for which vector_mode_supported_p is false. For these
5738 modes, the generic vector support in gcc will choose some non-vector mode
5739 in order to implement the type. By computing the natural mode, we'll
5740 select the proper ABI location for the operand and not depend on whatever
5741 the middle-end decides to do with these vector types.
5743 The midde-end can't deal with the vector types > 16 bytes. In this
5744 case, we return the original mode and warn ABI change if CUM isn't
5747 static enum machine_mode
5748 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5750 enum machine_mode mode = TYPE_MODE (type);
5752 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5754 HOST_WIDE_INT size = int_size_in_bytes (type);
5755 if ((size == 8 || size == 16 || size == 32)
5756 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5757 && TYPE_VECTOR_SUBPARTS (type) > 1)
5759 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5761 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5762 mode = MIN_MODE_VECTOR_FLOAT;
5764 mode = MIN_MODE_VECTOR_INT;
5766 /* Get the mode which has this inner mode and number of units. */
5767 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5768 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5769 && GET_MODE_INNER (mode) == innermode)
5771 if (size == 32 && !TARGET_AVX)
5773 static bool warnedavx;
5780 warning (0, "AVX vector argument without AVX "
5781 "enabled changes the ABI");
5783 return TYPE_MODE (type);
5796 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5797 this may not agree with the mode that the type system has chosen for the
5798 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5799 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5802 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5807 if (orig_mode != BLKmode)
5808 tmp = gen_rtx_REG (orig_mode, regno);
5811 tmp = gen_rtx_REG (mode, regno);
5812 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5813 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5819 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5820 of this code is to classify each 8bytes of incoming argument by the register
5821 class and assign registers accordingly. */
5823 /* Return the union class of CLASS1 and CLASS2.
5824 See the x86-64 PS ABI for details. */
5826 static enum x86_64_reg_class
5827 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5829 /* Rule #1: If both classes are equal, this is the resulting class. */
5830 if (class1 == class2)
5833 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5835 if (class1 == X86_64_NO_CLASS)
5837 if (class2 == X86_64_NO_CLASS)
5840 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5841 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5842 return X86_64_MEMORY_CLASS;
5844 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5845 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5846 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5847 return X86_64_INTEGERSI_CLASS;
5848 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5849 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5850 return X86_64_INTEGER_CLASS;
5852 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5854 if (class1 == X86_64_X87_CLASS
5855 || class1 == X86_64_X87UP_CLASS
5856 || class1 == X86_64_COMPLEX_X87_CLASS
5857 || class2 == X86_64_X87_CLASS
5858 || class2 == X86_64_X87UP_CLASS
5859 || class2 == X86_64_COMPLEX_X87_CLASS)
5860 return X86_64_MEMORY_CLASS;
5862 /* Rule #6: Otherwise class SSE is used. */
5863 return X86_64_SSE_CLASS;
5866 /* Classify the argument of type TYPE and mode MODE.
5867 CLASSES will be filled by the register class used to pass each word
5868 of the operand. The number of words is returned. In case the parameter
5869 should be passed in memory, 0 is returned. As a special case for zero
5870 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5872 BIT_OFFSET is used internally for handling records and specifies offset
5873 of the offset in bits modulo 256 to avoid overflow cases.
5875 See the x86-64 PS ABI for details.
5879 classify_argument (enum machine_mode mode, const_tree type,
5880 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5882 HOST_WIDE_INT bytes =
5883 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5884 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5886 /* Variable sized entities are always passed/returned in memory. */
5890 if (mode != VOIDmode
5891 && targetm.calls.must_pass_in_stack (mode, type))
5894 if (type && AGGREGATE_TYPE_P (type))
5898 enum x86_64_reg_class subclasses[MAX_CLASSES];
5900 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5904 for (i = 0; i < words; i++)
5905 classes[i] = X86_64_NO_CLASS;
5907 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5908 signalize memory class, so handle it as special case. */
5911 classes[0] = X86_64_NO_CLASS;
5915 /* Classify each field of record and merge classes. */
5916 switch (TREE_CODE (type))
5919 /* And now merge the fields of structure. */
5920 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5922 if (TREE_CODE (field) == FIELD_DECL)
5926 if (TREE_TYPE (field) == error_mark_node)
5929 /* Bitfields are always classified as integer. Handle them
5930 early, since later code would consider them to be
5931 misaligned integers. */
5932 if (DECL_BIT_FIELD (field))
5934 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5935 i < ((int_bit_position (field) + (bit_offset % 64))
5936 + tree_low_cst (DECL_SIZE (field), 0)
5939 merge_classes (X86_64_INTEGER_CLASS,
5946 type = TREE_TYPE (field);
5948 /* Flexible array member is ignored. */
5949 if (TYPE_MODE (type) == BLKmode
5950 && TREE_CODE (type) == ARRAY_TYPE
5951 && TYPE_SIZE (type) == NULL_TREE
5952 && TYPE_DOMAIN (type) != NULL_TREE
5953 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5958 if (!warned && warn_psabi)
5961 inform (input_location,
5962 "The ABI of passing struct with"
5963 " a flexible array member has"
5964 " changed in GCC 4.4");
5968 num = classify_argument (TYPE_MODE (type), type,
5970 (int_bit_position (field)
5971 + bit_offset) % 256);
5974 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5975 for (i = 0; i < num && (i + pos) < words; i++)
5977 merge_classes (subclasses[i], classes[i + pos]);
5984 /* Arrays are handled as small records. */
5987 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5988 TREE_TYPE (type), subclasses, bit_offset);
5992 /* The partial classes are now full classes. */
5993 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5994 subclasses[0] = X86_64_SSE_CLASS;
5995 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5996 && !((bit_offset % 64) == 0 && bytes == 4))
5997 subclasses[0] = X86_64_INTEGER_CLASS;
5999 for (i = 0; i < words; i++)
6000 classes[i] = subclasses[i % num];
6005 case QUAL_UNION_TYPE:
6006 /* Unions are similar to RECORD_TYPE but offset is always 0.
6008 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6010 if (TREE_CODE (field) == FIELD_DECL)
6014 if (TREE_TYPE (field) == error_mark_node)
6017 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6018 TREE_TYPE (field), subclasses,
6022 for (i = 0; i < num; i++)
6023 classes[i] = merge_classes (subclasses[i], classes[i]);
6034 /* When size > 16 bytes, if the first one isn't
6035 X86_64_SSE_CLASS or any other ones aren't
6036 X86_64_SSEUP_CLASS, everything should be passed in
6038 if (classes[0] != X86_64_SSE_CLASS)
6041 for (i = 1; i < words; i++)
6042 if (classes[i] != X86_64_SSEUP_CLASS)
6046 /* Final merger cleanup. */
6047 for (i = 0; i < words; i++)
6049 /* If one class is MEMORY, everything should be passed in
6051 if (classes[i] == X86_64_MEMORY_CLASS)
6054 /* The X86_64_SSEUP_CLASS should be always preceded by
6055 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6056 if (classes[i] == X86_64_SSEUP_CLASS
6057 && classes[i - 1] != X86_64_SSE_CLASS
6058 && classes[i - 1] != X86_64_SSEUP_CLASS)
6060 /* The first one should never be X86_64_SSEUP_CLASS. */
6061 gcc_assert (i != 0);
6062 classes[i] = X86_64_SSE_CLASS;
6065 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6066 everything should be passed in memory. */
6067 if (classes[i] == X86_64_X87UP_CLASS
6068 && (classes[i - 1] != X86_64_X87_CLASS))
6072 /* The first one should never be X86_64_X87UP_CLASS. */
6073 gcc_assert (i != 0);
6074 if (!warned && warn_psabi)
6077 inform (input_location,
6078 "The ABI of passing union with long double"
6079 " has changed in GCC 4.4");
6087 /* Compute alignment needed. We align all types to natural boundaries with
6088 exception of XFmode that is aligned to 64bits. */
6089 if (mode != VOIDmode && mode != BLKmode)
6091 int mode_alignment = GET_MODE_BITSIZE (mode);
6094 mode_alignment = 128;
6095 else if (mode == XCmode)
6096 mode_alignment = 256;
6097 if (COMPLEX_MODE_P (mode))
6098 mode_alignment /= 2;
6099 /* Misaligned fields are always returned in memory. */
6100 if (bit_offset % mode_alignment)
6104 /* for V1xx modes, just use the base mode */
6105 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6106 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6107 mode = GET_MODE_INNER (mode);
6109 /* Classification of atomic types. */
6114 classes[0] = X86_64_SSE_CLASS;
6117 classes[0] = X86_64_SSE_CLASS;
6118 classes[1] = X86_64_SSEUP_CLASS;
6128 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6132 classes[0] = X86_64_INTEGERSI_CLASS;
6135 else if (size <= 64)
6137 classes[0] = X86_64_INTEGER_CLASS;
6140 else if (size <= 64+32)
6142 classes[0] = X86_64_INTEGER_CLASS;
6143 classes[1] = X86_64_INTEGERSI_CLASS;
6146 else if (size <= 64+64)
6148 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6156 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6160 /* OImode shouldn't be used directly. */
6165 if (!(bit_offset % 64))
6166 classes[0] = X86_64_SSESF_CLASS;
6168 classes[0] = X86_64_SSE_CLASS;
6171 classes[0] = X86_64_SSEDF_CLASS;
6174 classes[0] = X86_64_X87_CLASS;
6175 classes[1] = X86_64_X87UP_CLASS;
6178 classes[0] = X86_64_SSE_CLASS;
6179 classes[1] = X86_64_SSEUP_CLASS;
6182 classes[0] = X86_64_SSE_CLASS;
6183 if (!(bit_offset % 64))
6189 if (!warned && warn_psabi)
6192 inform (input_location,
6193 "The ABI of passing structure with complex float"
6194 " member has changed in GCC 4.4");
6196 classes[1] = X86_64_SSESF_CLASS;
6200 classes[0] = X86_64_SSEDF_CLASS;
6201 classes[1] = X86_64_SSEDF_CLASS;
6204 classes[0] = X86_64_COMPLEX_X87_CLASS;
6207 /* This modes is larger than 16 bytes. */
6215 classes[0] = X86_64_SSE_CLASS;
6216 classes[1] = X86_64_SSEUP_CLASS;
6217 classes[2] = X86_64_SSEUP_CLASS;
6218 classes[3] = X86_64_SSEUP_CLASS;
6226 classes[0] = X86_64_SSE_CLASS;
6227 classes[1] = X86_64_SSEUP_CLASS;
6235 classes[0] = X86_64_SSE_CLASS;
6241 gcc_assert (VECTOR_MODE_P (mode));
6246 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6248 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6249 classes[0] = X86_64_INTEGERSI_CLASS;
6251 classes[0] = X86_64_INTEGER_CLASS;
6252 classes[1] = X86_64_INTEGER_CLASS;
6253 return 1 + (bytes > 8);
6257 /* Examine the argument and return set number of register required in each
6258 class. Return 0 iff parameter should be passed in memory. */
6260 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6261 int *int_nregs, int *sse_nregs)
6263 enum x86_64_reg_class regclass[MAX_CLASSES];
6264 int n = classify_argument (mode, type, regclass, 0);
6270 for (n--; n >= 0; n--)
6271 switch (regclass[n])
6273 case X86_64_INTEGER_CLASS:
6274 case X86_64_INTEGERSI_CLASS:
6277 case X86_64_SSE_CLASS:
6278 case X86_64_SSESF_CLASS:
6279 case X86_64_SSEDF_CLASS:
6282 case X86_64_NO_CLASS:
6283 case X86_64_SSEUP_CLASS:
6285 case X86_64_X87_CLASS:
6286 case X86_64_X87UP_CLASS:
6290 case X86_64_COMPLEX_X87_CLASS:
6291 return in_return ? 2 : 0;
6292 case X86_64_MEMORY_CLASS:
6298 /* Construct container for the argument used by GCC interface. See
6299 FUNCTION_ARG for the detailed description. */
6302 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6303 const_tree type, int in_return, int nintregs, int nsseregs,
6304 const int *intreg, int sse_regno)
6306 /* The following variables hold the static issued_error state. */
6307 static bool issued_sse_arg_error;
6308 static bool issued_sse_ret_error;
6309 static bool issued_x87_ret_error;
6311 enum machine_mode tmpmode;
6313 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6314 enum x86_64_reg_class regclass[MAX_CLASSES];
6318 int needed_sseregs, needed_intregs;
6319 rtx exp[MAX_CLASSES];
6322 n = classify_argument (mode, type, regclass, 0);
6325 if (!examine_argument (mode, type, in_return, &needed_intregs,
6328 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6331 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6332 some less clueful developer tries to use floating-point anyway. */
6333 if (needed_sseregs && !TARGET_SSE)
6337 if (!issued_sse_ret_error)
6339 error ("SSE register return with SSE disabled");
6340 issued_sse_ret_error = true;
6343 else if (!issued_sse_arg_error)
6345 error ("SSE register argument with SSE disabled");
6346 issued_sse_arg_error = true;
6351 /* Likewise, error if the ABI requires us to return values in the
6352 x87 registers and the user specified -mno-80387. */
6353 if (!TARGET_80387 && in_return)
6354 for (i = 0; i < n; i++)
6355 if (regclass[i] == X86_64_X87_CLASS
6356 || regclass[i] == X86_64_X87UP_CLASS
6357 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6359 if (!issued_x87_ret_error)
6361 error ("x87 register return with x87 disabled");
6362 issued_x87_ret_error = true;
6367 /* First construct simple cases. Avoid SCmode, since we want to use
6368 single register to pass this type. */
6369 if (n == 1 && mode != SCmode)
6370 switch (regclass[0])
6372 case X86_64_INTEGER_CLASS:
6373 case X86_64_INTEGERSI_CLASS:
6374 return gen_rtx_REG (mode, intreg[0]);
6375 case X86_64_SSE_CLASS:
6376 case X86_64_SSESF_CLASS:
6377 case X86_64_SSEDF_CLASS:
6378 if (mode != BLKmode)
6379 return gen_reg_or_parallel (mode, orig_mode,
6380 SSE_REGNO (sse_regno));
6382 case X86_64_X87_CLASS:
6383 case X86_64_COMPLEX_X87_CLASS:
6384 return gen_rtx_REG (mode, FIRST_STACK_REG);
6385 case X86_64_NO_CLASS:
6386 /* Zero sized array, struct or class. */
6391 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6392 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6393 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6395 && regclass[0] == X86_64_SSE_CLASS
6396 && regclass[1] == X86_64_SSEUP_CLASS
6397 && regclass[2] == X86_64_SSEUP_CLASS
6398 && regclass[3] == X86_64_SSEUP_CLASS
6400 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6403 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6404 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6405 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6406 && regclass[1] == X86_64_INTEGER_CLASS
6407 && (mode == CDImode || mode == TImode || mode == TFmode)
6408 && intreg[0] + 1 == intreg[1])
6409 return gen_rtx_REG (mode, intreg[0]);
6411 /* Otherwise figure out the entries of the PARALLEL. */
6412 for (i = 0; i < n; i++)
6416 switch (regclass[i])
6418 case X86_64_NO_CLASS:
6420 case X86_64_INTEGER_CLASS:
6421 case X86_64_INTEGERSI_CLASS:
6422 /* Merge TImodes on aligned occasions here too. */
6423 if (i * 8 + 8 > bytes)
6424 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6425 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6429 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6430 if (tmpmode == BLKmode)
6432 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6433 gen_rtx_REG (tmpmode, *intreg),
6437 case X86_64_SSESF_CLASS:
6438 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6439 gen_rtx_REG (SFmode,
6440 SSE_REGNO (sse_regno)),
6444 case X86_64_SSEDF_CLASS:
6445 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6446 gen_rtx_REG (DFmode,
6447 SSE_REGNO (sse_regno)),
6451 case X86_64_SSE_CLASS:
6459 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6469 && regclass[1] == X86_64_SSEUP_CLASS
6470 && regclass[2] == X86_64_SSEUP_CLASS
6471 && regclass[3] == X86_64_SSEUP_CLASS);
6478 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6479 gen_rtx_REG (tmpmode,
6480 SSE_REGNO (sse_regno)),
6489 /* Empty aligned struct, union or class. */
6493 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6494 for (i = 0; i < nexps; i++)
6495 XVECEXP (ret, 0, i) = exp [i];
6499 /* Update the data in CUM to advance over an argument of mode MODE
6500 and data type TYPE. (TYPE is null for libcalls where that information
6501 may not be available.) */
6504 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6505 const_tree type, HOST_WIDE_INT bytes,
6506 HOST_WIDE_INT words)
6522 cum->words += words;
6523 cum->nregs -= words;
6524 cum->regno += words;
6526 if (cum->nregs <= 0)
6534 /* OImode shouldn't be used directly. */
6538 if (cum->float_in_sse < 2)
6541 if (cum->float_in_sse < 1)
6558 if (!type || !AGGREGATE_TYPE_P (type))
6560 cum->sse_words += words;
6561 cum->sse_nregs -= 1;
6562 cum->sse_regno += 1;
6563 if (cum->sse_nregs <= 0)
6577 if (!type || !AGGREGATE_TYPE_P (type))
6579 cum->mmx_words += words;
6580 cum->mmx_nregs -= 1;
6581 cum->mmx_regno += 1;
6582 if (cum->mmx_nregs <= 0)
6593 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6594 const_tree type, HOST_WIDE_INT words, bool named)
6596 int int_nregs, sse_nregs;
6598 /* Unnamed 256bit vector mode parameters are passed on stack. */
6599 if (!named && VALID_AVX256_REG_MODE (mode))
6602 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6603 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6605 cum->nregs -= int_nregs;
6606 cum->sse_nregs -= sse_nregs;
6607 cum->regno += int_nregs;
6608 cum->sse_regno += sse_nregs;
6612 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6613 cum->words = (cum->words + align - 1) & ~(align - 1);
6614 cum->words += words;
6619 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6620 HOST_WIDE_INT words)
6622 /* Otherwise, this should be passed indirect. */
6623 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6625 cum->words += words;
6633 /* Update the data in CUM to advance over an argument of mode MODE and
6634 data type TYPE. (TYPE is null for libcalls where that information
6635 may not be available.) */
6638 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6639 const_tree type, bool named)
6641 HOST_WIDE_INT bytes, words;
6643 if (mode == BLKmode)
6644 bytes = int_size_in_bytes (type);
6646 bytes = GET_MODE_SIZE (mode);
6647 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6650 mode = type_natural_mode (type, NULL);
6652 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6653 function_arg_advance_ms_64 (cum, bytes, words);
6654 else if (TARGET_64BIT)
6655 function_arg_advance_64 (cum, mode, type, words, named);
6657 function_arg_advance_32 (cum, mode, type, bytes, words);
6660 /* Define where to put the arguments to a function.
6661 Value is zero to push the argument on the stack,
6662 or a hard register in which to store the argument.
6664 MODE is the argument's machine mode.
6665 TYPE is the data type of the argument (as a tree).
6666 This is null for libcalls where that information may
6668 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6669 the preceding args and about the function being called.
6670 NAMED is nonzero if this argument is a named parameter
6671 (otherwise it is an extra parameter matching an ellipsis). */
6674 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6675 enum machine_mode orig_mode, const_tree type,
6676 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6678 static bool warnedsse, warnedmmx;
6680 /* Avoid the AL settings for the Unix64 ABI. */
6681 if (mode == VOIDmode)
6697 if (words <= cum->nregs)
6699 int regno = cum->regno;
6701 /* Fastcall allocates the first two DWORD (SImode) or
6702 smaller arguments to ECX and EDX if it isn't an
6708 || (type && AGGREGATE_TYPE_P (type)))
6711 /* ECX not EAX is the first allocated register. */
6712 if (regno == AX_REG)
6715 return gen_rtx_REG (mode, regno);
6720 if (cum->float_in_sse < 2)
6723 if (cum->float_in_sse < 1)
6727 /* In 32bit, we pass TImode in xmm registers. */
6734 if (!type || !AGGREGATE_TYPE_P (type))
6736 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6739 warning (0, "SSE vector argument without SSE enabled "
6743 return gen_reg_or_parallel (mode, orig_mode,
6744 cum->sse_regno + FIRST_SSE_REG);
6749 /* OImode shouldn't be used directly. */
6758 if (!type || !AGGREGATE_TYPE_P (type))
6761 return gen_reg_or_parallel (mode, orig_mode,
6762 cum->sse_regno + FIRST_SSE_REG);
6772 if (!type || !AGGREGATE_TYPE_P (type))
6774 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6777 warning (0, "MMX vector argument without MMX enabled "
6781 return gen_reg_or_parallel (mode, orig_mode,
6782 cum->mmx_regno + FIRST_MMX_REG);
6791 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6792 enum machine_mode orig_mode, const_tree type, bool named)
6794 /* Handle a hidden AL argument containing number of registers
6795 for varargs x86-64 functions. */
6796 if (mode == VOIDmode)
6797 return GEN_INT (cum->maybe_vaarg
6798 ? (cum->sse_nregs < 0
6799 ? X86_64_SSE_REGPARM_MAX
6814 /* Unnamed 256bit vector mode parameters are passed on stack. */
6820 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6822 &x86_64_int_parameter_registers [cum->regno],
6827 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6828 enum machine_mode orig_mode, bool named,
6829 HOST_WIDE_INT bytes)
6833 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6834 We use value of -2 to specify that current function call is MSABI. */
6835 if (mode == VOIDmode)
6836 return GEN_INT (-2);
6838 /* If we've run out of registers, it goes on the stack. */
6839 if (cum->nregs == 0)
6842 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6844 /* Only floating point modes are passed in anything but integer regs. */
6845 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6848 regno = cum->regno + FIRST_SSE_REG;
6853 /* Unnamed floating parameters are passed in both the
6854 SSE and integer registers. */
6855 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6856 t2 = gen_rtx_REG (mode, regno);
6857 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6858 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6859 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6862 /* Handle aggregated types passed in register. */
6863 if (orig_mode == BLKmode)
6865 if (bytes > 0 && bytes <= 8)
6866 mode = (bytes > 4 ? DImode : SImode);
6867 if (mode == BLKmode)
6871 return gen_reg_or_parallel (mode, orig_mode, regno);
6874 /* Return where to put the arguments to a function.
6875 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6877 MODE is the argument's machine mode. TYPE is the data type of the
6878 argument. It is null for libcalls where that information may not be
6879 available. CUM gives information about the preceding args and about
6880 the function being called. NAMED is nonzero if this argument is a
6881 named parameter (otherwise it is an extra parameter matching an
6885 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6886 const_tree type, bool named)
6888 enum machine_mode mode = omode;
6889 HOST_WIDE_INT bytes, words;
6892 if (mode == BLKmode)
6893 bytes = int_size_in_bytes (type);
6895 bytes = GET_MODE_SIZE (mode);
6896 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6898 /* To simplify the code below, represent vector types with a vector mode
6899 even if MMX/SSE are not active. */
6900 if (type && TREE_CODE (type) == VECTOR_TYPE)
6901 mode = type_natural_mode (type, cum);
6903 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6904 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
6905 else if (TARGET_64BIT)
6906 arg = function_arg_64 (cum, mode, omode, type, named);
6908 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
6910 if (TARGET_VZEROUPPER && function_pass_avx256_p (arg))
6912 /* This argument uses 256bit AVX modes. */
6913 cfun->machine->use_avx256_p = true;
6915 cfun->machine->callee_pass_avx256_p = true;
6917 cfun->machine->caller_pass_avx256_p = true;
6923 /* A C expression that indicates when an argument must be passed by
6924 reference. If nonzero for an argument, a copy of that argument is
6925 made in memory and a pointer to the argument is passed instead of
6926 the argument itself. The pointer is passed in whatever way is
6927 appropriate for passing a pointer to that type. */
6930 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6931 enum machine_mode mode ATTRIBUTE_UNUSED,
6932 const_tree type, bool named ATTRIBUTE_UNUSED)
6934 /* See Windows x64 Software Convention. */
6935 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6937 int msize = (int) GET_MODE_SIZE (mode);
6940 /* Arrays are passed by reference. */
6941 if (TREE_CODE (type) == ARRAY_TYPE)
6944 if (AGGREGATE_TYPE_P (type))
6946 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6947 are passed by reference. */
6948 msize = int_size_in_bytes (type);
6952 /* __m128 is passed by reference. */
6954 case 1: case 2: case 4: case 8:
6960 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6966 /* Return true when TYPE should be 128bit aligned for 32bit argument
6967 passing ABI. XXX: This function is obsolete and is only used for
6968 checking psABI compatibility with previous versions of GCC. */
6971 ix86_compat_aligned_value_p (const_tree type)
6973 enum machine_mode mode = TYPE_MODE (type);
6974 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6978 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6980 if (TYPE_ALIGN (type) < 128)
6983 if (AGGREGATE_TYPE_P (type))
6985 /* Walk the aggregates recursively. */
6986 switch (TREE_CODE (type))
6990 case QUAL_UNION_TYPE:
6994 /* Walk all the structure fields. */
6995 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6997 if (TREE_CODE (field) == FIELD_DECL
6998 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7005 /* Just for use if some languages passes arrays by value. */
7006 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7017 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7018 XXX: This function is obsolete and is only used for checking psABI
7019 compatibility with previous versions of GCC. */
7022 ix86_compat_function_arg_boundary (enum machine_mode mode,
7023 const_tree type, int align)
7025 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7026 natural boundaries. */
7027 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7029 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7030 make an exception for SSE modes since these require 128bit
7033 The handling here differs from field_alignment. ICC aligns MMX
7034 arguments to 4 byte boundaries, while structure fields are aligned
7035 to 8 byte boundaries. */
7038 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7039 align = PARM_BOUNDARY;
7043 if (!ix86_compat_aligned_value_p (type))
7044 align = PARM_BOUNDARY;
7047 if (align > BIGGEST_ALIGNMENT)
7048 align = BIGGEST_ALIGNMENT;
7052 /* Return true when TYPE should be 128bit aligned for 32bit argument
7056 ix86_contains_aligned_value_p (const_tree type)
7058 enum machine_mode mode = TYPE_MODE (type);
7060 if (mode == XFmode || mode == XCmode)
7063 if (TYPE_ALIGN (type) < 128)
7066 if (AGGREGATE_TYPE_P (type))
7068 /* Walk the aggregates recursively. */
7069 switch (TREE_CODE (type))
7073 case QUAL_UNION_TYPE:
7077 /* Walk all the structure fields. */
7078 for (field = TYPE_FIELDS (type);
7080 field = DECL_CHAIN (field))
7082 if (TREE_CODE (field) == FIELD_DECL
7083 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7090 /* Just for use if some languages passes arrays by value. */
7091 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7100 return TYPE_ALIGN (type) >= 128;
7105 /* Gives the alignment boundary, in bits, of an argument with the
7106 specified mode and type. */
7109 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7114 /* Since the main variant type is used for call, we convert it to
7115 the main variant type. */
7116 type = TYPE_MAIN_VARIANT (type);
7117 align = TYPE_ALIGN (type);
7120 align = GET_MODE_ALIGNMENT (mode);
7121 if (align < PARM_BOUNDARY)
7122 align = PARM_BOUNDARY;
7126 int saved_align = align;
7130 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7133 if (mode == XFmode || mode == XCmode)
7134 align = PARM_BOUNDARY;
7136 else if (!ix86_contains_aligned_value_p (type))
7137 align = PARM_BOUNDARY;
7140 align = PARM_BOUNDARY;
7145 && align != ix86_compat_function_arg_boundary (mode, type,
7149 inform (input_location,
7150 "The ABI of passing parameter with %dbyte"
7151 " alignment has changed in GCC 4.6",
7152 align / BITS_PER_UNIT);
7159 /* Return true if N is a possible register number of function value. */
7162 ix86_function_value_regno_p (const unsigned int regno)
7169 case FIRST_FLOAT_REG:
7170 /* TODO: The function should depend on current function ABI but
7171 builtins.c would need updating then. Therefore we use the
7173 if (TARGET_64BIT && ix86_abi == MS_ABI)
7175 return TARGET_FLOAT_RETURNS_IN_80387;
7181 if (TARGET_MACHO || TARGET_64BIT)
7189 /* Define how to find the value returned by a function.
7190 VALTYPE is the data type of the value (as a tree).
7191 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7192 otherwise, FUNC is 0. */
7195 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7196 const_tree fntype, const_tree fn)
7200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7201 we normally prevent this case when mmx is not available. However
7202 some ABIs may require the result to be returned like DImode. */
7203 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7204 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
7206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7207 we prevent this case when sse is not available. However some ABIs
7208 may require the result to be returned like integer TImode. */
7209 else if (mode == TImode
7210 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7211 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
7213 /* 32-byte vector modes in %ymm0. */
7214 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7215 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
7217 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7218 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7219 regno = FIRST_FLOAT_REG;
7221 /* Most things go in %eax. */
7224 /* Override FP return register with %xmm0 for local functions when
7225 SSE math is enabled or for functions with sseregparm attribute. */
7226 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7228 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7229 if ((sse_level >= 1 && mode == SFmode)
7230 || (sse_level == 2 && mode == DFmode))
7231 regno = FIRST_SSE_REG;
7234 /* OImode shouldn't be used directly. */
7235 gcc_assert (mode != OImode);
7237 return gen_rtx_REG (orig_mode, regno);
7241 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7246 /* Handle libcalls, which don't provide a type node. */
7247 if (valtype == NULL)
7259 return gen_rtx_REG (mode, FIRST_SSE_REG);
7262 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
7266 return gen_rtx_REG (mode, AX_REG);
7270 ret = construct_container (mode, orig_mode, valtype, 1,
7271 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7272 x86_64_int_return_registers, 0);
7274 /* For zero sized structures, construct_container returns NULL, but we
7275 need to keep rest of compiler happy by returning meaningful value. */
7277 ret = gen_rtx_REG (orig_mode, AX_REG);
7283 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
7285 unsigned int regno = AX_REG;
7289 switch (GET_MODE_SIZE (mode))
7292 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7293 && !COMPLEX_MODE_P (mode))
7294 regno = FIRST_SSE_REG;
7298 if (mode == SFmode || mode == DFmode)
7299 regno = FIRST_SSE_REG;
7305 return gen_rtx_REG (orig_mode, regno);
7309 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7310 enum machine_mode orig_mode, enum machine_mode mode)
7312 const_tree fn, fntype;
7315 if (fntype_or_decl && DECL_P (fntype_or_decl))
7316 fn = fntype_or_decl;
7317 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7319 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7320 return function_value_ms_64 (orig_mode, mode);
7321 else if (TARGET_64BIT)
7322 return function_value_64 (orig_mode, mode, valtype);
7324 return function_value_32 (orig_mode, mode, fntype, fn);
7328 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7329 bool outgoing ATTRIBUTE_UNUSED)
7331 enum machine_mode mode, orig_mode;
7333 orig_mode = TYPE_MODE (valtype);
7334 mode = type_natural_mode (valtype, NULL);
7335 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7339 ix86_libcall_value (enum machine_mode mode)
7341 return ix86_function_value_1 (NULL, NULL, mode, mode);
7344 /* Return true iff type is returned in memory. */
7346 static bool ATTRIBUTE_UNUSED
7347 return_in_memory_32 (const_tree type, enum machine_mode mode)
7351 if (mode == BLKmode)
7354 size = int_size_in_bytes (type);
7356 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7359 if (VECTOR_MODE_P (mode) || mode == TImode)
7361 /* User-created vectors small enough to fit in EAX. */
7365 /* MMX/3dNow values are returned in MM0,
7366 except when it doesn't exits or the ABI prescribes otherwise. */
7368 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7370 /* SSE values are returned in XMM0, except when it doesn't exist. */
7374 /* AVX values are returned in YMM0, except when it doesn't exist. */
7385 /* OImode shouldn't be used directly. */
7386 gcc_assert (mode != OImode);
7391 static bool ATTRIBUTE_UNUSED
7392 return_in_memory_64 (const_tree type, enum machine_mode mode)
7394 int needed_intregs, needed_sseregs;
7395 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7398 static bool ATTRIBUTE_UNUSED
7399 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7401 HOST_WIDE_INT size = int_size_in_bytes (type);
7403 /* __m128 is returned in xmm0. */
7404 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7405 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7408 /* Otherwise, the size must be exactly in [1248]. */
7409 return size != 1 && size != 2 && size != 4 && size != 8;
7413 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7415 #ifdef SUBTARGET_RETURN_IN_MEMORY
7416 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7418 const enum machine_mode mode = type_natural_mode (type, NULL);
7422 if (ix86_function_type_abi (fntype) == MS_ABI)
7423 return return_in_memory_ms_64 (type, mode);
7425 return return_in_memory_64 (type, mode);
7428 return return_in_memory_32 (type, mode);
7432 /* When returning SSE vector types, we have a choice of either
7433 (1) being abi incompatible with a -march switch, or
7434 (2) generating an error.
7435 Given no good solution, I think the safest thing is one warning.
7436 The user won't be able to use -Werror, but....
7438 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7439 called in response to actually generating a caller or callee that
7440 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7441 via aggregate_value_p for general type probing from tree-ssa. */
7444 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7446 static bool warnedsse, warnedmmx;
7448 if (!TARGET_64BIT && type)
7450 /* Look at the return type of the function, not the function type. */
7451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7453 if (!TARGET_SSE && !warnedsse)
7456 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7459 warning (0, "SSE vector return without SSE enabled "
7464 if (!TARGET_MMX && !warnedmmx)
7466 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7469 warning (0, "MMX vector return without MMX enabled "
7479 /* Create the va_list data type. */
7481 /* Returns the calling convention specific va_list date type.
7482 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7485 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7487 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7489 /* For i386 we use plain pointer to argument area. */
7490 if (!TARGET_64BIT || abi == MS_ABI)
7491 return build_pointer_type (char_type_node);
7493 record = lang_hooks.types.make_type (RECORD_TYPE);
7494 type_decl = build_decl (BUILTINS_LOCATION,
7495 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7497 f_gpr = build_decl (BUILTINS_LOCATION,
7498 FIELD_DECL, get_identifier ("gp_offset"),
7499 unsigned_type_node);
7500 f_fpr = build_decl (BUILTINS_LOCATION,
7501 FIELD_DECL, get_identifier ("fp_offset"),
7502 unsigned_type_node);
7503 f_ovf = build_decl (BUILTINS_LOCATION,
7504 FIELD_DECL, get_identifier ("overflow_arg_area"),
7506 f_sav = build_decl (BUILTINS_LOCATION,
7507 FIELD_DECL, get_identifier ("reg_save_area"),
7510 va_list_gpr_counter_field = f_gpr;
7511 va_list_fpr_counter_field = f_fpr;
7513 DECL_FIELD_CONTEXT (f_gpr) = record;
7514 DECL_FIELD_CONTEXT (f_fpr) = record;
7515 DECL_FIELD_CONTEXT (f_ovf) = record;
7516 DECL_FIELD_CONTEXT (f_sav) = record;
7518 TYPE_STUB_DECL (record) = type_decl;
7519 TYPE_NAME (record) = type_decl;
7520 TYPE_FIELDS (record) = f_gpr;
7521 DECL_CHAIN (f_gpr) = f_fpr;
7522 DECL_CHAIN (f_fpr) = f_ovf;
7523 DECL_CHAIN (f_ovf) = f_sav;
7525 layout_type (record);
7527 /* The correct type is an array type of one element. */
7528 return build_array_type (record, build_index_type (size_zero_node));
7531 /* Setup the builtin va_list data type and for 64-bit the additional
7532 calling convention specific va_list data types. */
7535 ix86_build_builtin_va_list (void)
7537 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7539 /* Initialize abi specific va_list builtin types. */
7543 if (ix86_abi == MS_ABI)
7545 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7546 if (TREE_CODE (t) != RECORD_TYPE)
7547 t = build_variant_type_copy (t);
7548 sysv_va_list_type_node = t;
7553 if (TREE_CODE (t) != RECORD_TYPE)
7554 t = build_variant_type_copy (t);
7555 sysv_va_list_type_node = t;
7557 if (ix86_abi != MS_ABI)
7559 t = ix86_build_builtin_va_list_abi (MS_ABI);
7560 if (TREE_CODE (t) != RECORD_TYPE)
7561 t = build_variant_type_copy (t);
7562 ms_va_list_type_node = t;
7567 if (TREE_CODE (t) != RECORD_TYPE)
7568 t = build_variant_type_copy (t);
7569 ms_va_list_type_node = t;
7576 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7579 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7585 /* GPR size of varargs save area. */
7586 if (cfun->va_list_gpr_size)
7587 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7589 ix86_varargs_gpr_size = 0;
7591 /* FPR size of varargs save area. We don't need it if we don't pass
7592 anything in SSE registers. */
7593 if (TARGET_SSE && cfun->va_list_fpr_size)
7594 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7596 ix86_varargs_fpr_size = 0;
7598 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7601 save_area = frame_pointer_rtx;
7602 set = get_varargs_alias_set ();
7604 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7605 if (max > X86_64_REGPARM_MAX)
7606 max = X86_64_REGPARM_MAX;
7608 for (i = cum->regno; i < max; i++)
7610 mem = gen_rtx_MEM (Pmode,
7611 plus_constant (save_area, i * UNITS_PER_WORD));
7612 MEM_NOTRAP_P (mem) = 1;
7613 set_mem_alias_set (mem, set);
7614 emit_move_insn (mem, gen_rtx_REG (Pmode,
7615 x86_64_int_parameter_registers[i]));
7618 if (ix86_varargs_fpr_size)
7620 enum machine_mode smode;
7623 /* Now emit code to save SSE registers. The AX parameter contains number
7624 of SSE parameter registers used to call this function, though all we
7625 actually check here is the zero/non-zero status. */
7627 label = gen_label_rtx ();
7628 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7629 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7632 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7633 we used movdqa (i.e. TImode) instead? Perhaps even better would
7634 be if we could determine the real mode of the data, via a hook
7635 into pass_stdarg. Ignore all that for now. */
7637 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7638 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7640 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7641 if (max > X86_64_SSE_REGPARM_MAX)
7642 max = X86_64_SSE_REGPARM_MAX;
7644 for (i = cum->sse_regno; i < max; ++i)
7646 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7647 mem = gen_rtx_MEM (smode, mem);
7648 MEM_NOTRAP_P (mem) = 1;
7649 set_mem_alias_set (mem, set);
7650 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7652 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7660 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7662 alias_set_type set = get_varargs_alias_set ();
7665 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7669 mem = gen_rtx_MEM (Pmode,
7670 plus_constant (virtual_incoming_args_rtx,
7671 i * UNITS_PER_WORD));
7672 MEM_NOTRAP_P (mem) = 1;
7673 set_mem_alias_set (mem, set);
7675 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7676 emit_move_insn (mem, reg);
7681 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7682 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7685 CUMULATIVE_ARGS next_cum;
7688 /* This argument doesn't appear to be used anymore. Which is good,
7689 because the old code here didn't suppress rtl generation. */
7690 gcc_assert (!no_rtl);
7695 fntype = TREE_TYPE (current_function_decl);
7697 /* For varargs, we do not want to skip the dummy va_dcl argument.
7698 For stdargs, we do want to skip the last named argument. */
7700 if (stdarg_p (fntype))
7701 ix86_function_arg_advance (&next_cum, mode, type, true);
7703 if (cum->call_abi == MS_ABI)
7704 setup_incoming_varargs_ms_64 (&next_cum);
7706 setup_incoming_varargs_64 (&next_cum);
7709 /* Checks if TYPE is of kind va_list char *. */
7712 is_va_list_char_pointer (tree type)
7716 /* For 32-bit it is always true. */
7719 canonic = ix86_canonical_va_list_type (type);
7720 return (canonic == ms_va_list_type_node
7721 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7724 /* Implement va_start. */
7727 ix86_va_start (tree valist, rtx nextarg)
7729 HOST_WIDE_INT words, n_gpr, n_fpr;
7730 tree f_gpr, f_fpr, f_ovf, f_sav;
7731 tree gpr, fpr, ovf, sav, t;
7735 if (flag_split_stack
7736 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7738 unsigned int scratch_regno;
7740 /* When we are splitting the stack, we can't refer to the stack
7741 arguments using internal_arg_pointer, because they may be on
7742 the old stack. The split stack prologue will arrange to
7743 leave a pointer to the old stack arguments in a scratch
7744 register, which we here copy to a pseudo-register. The split
7745 stack prologue can't set the pseudo-register directly because
7746 it (the prologue) runs before any registers have been saved. */
7748 scratch_regno = split_stack_prologue_scratch_regno ();
7749 if (scratch_regno != INVALID_REGNUM)
7753 reg = gen_reg_rtx (Pmode);
7754 cfun->machine->split_stack_varargs_pointer = reg;
7757 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7761 push_topmost_sequence ();
7762 emit_insn_after (seq, entry_of_function ());
7763 pop_topmost_sequence ();
7767 /* Only 64bit target needs something special. */
7768 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7770 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7771 std_expand_builtin_va_start (valist, nextarg);
7776 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7777 next = expand_binop (ptr_mode, add_optab,
7778 cfun->machine->split_stack_varargs_pointer,
7779 crtl->args.arg_offset_rtx,
7780 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7781 convert_move (va_r, next, 0);
7786 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7787 f_fpr = DECL_CHAIN (f_gpr);
7788 f_ovf = DECL_CHAIN (f_fpr);
7789 f_sav = DECL_CHAIN (f_ovf);
7791 valist = build_simple_mem_ref (valist);
7792 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7793 /* The following should be folded into the MEM_REF offset. */
7794 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7796 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7798 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7800 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7803 /* Count number of gp and fp argument registers used. */
7804 words = crtl->args.info.words;
7805 n_gpr = crtl->args.info.regno;
7806 n_fpr = crtl->args.info.sse_regno;
7808 if (cfun->va_list_gpr_size)
7810 type = TREE_TYPE (gpr);
7811 t = build2 (MODIFY_EXPR, type,
7812 gpr, build_int_cst (type, n_gpr * 8));
7813 TREE_SIDE_EFFECTS (t) = 1;
7814 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7817 if (TARGET_SSE && cfun->va_list_fpr_size)
7819 type = TREE_TYPE (fpr);
7820 t = build2 (MODIFY_EXPR, type, fpr,
7821 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7822 TREE_SIDE_EFFECTS (t) = 1;
7823 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7826 /* Find the overflow area. */
7827 type = TREE_TYPE (ovf);
7828 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7829 ovf_rtx = crtl->args.internal_arg_pointer;
7831 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7832 t = make_tree (type, ovf_rtx);
7834 t = build2 (POINTER_PLUS_EXPR, type, t,
7835 size_int (words * UNITS_PER_WORD));
7836 t = build2 (MODIFY_EXPR, type, ovf, t);
7837 TREE_SIDE_EFFECTS (t) = 1;
7838 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7840 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7842 /* Find the register save area.
7843 Prologue of the function save it right above stack frame. */
7844 type = TREE_TYPE (sav);
7845 t = make_tree (type, frame_pointer_rtx);
7846 if (!ix86_varargs_gpr_size)
7847 t = build2 (POINTER_PLUS_EXPR, type, t,
7848 size_int (-8 * X86_64_REGPARM_MAX));
7849 t = build2 (MODIFY_EXPR, type, sav, t);
7850 TREE_SIDE_EFFECTS (t) = 1;
7851 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7855 /* Implement va_arg. */
7858 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7861 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7862 tree f_gpr, f_fpr, f_ovf, f_sav;
7863 tree gpr, fpr, ovf, sav, t;
7865 tree lab_false, lab_over = NULL_TREE;
7870 enum machine_mode nat_mode;
7871 unsigned int arg_boundary;
7873 /* Only 64bit target needs something special. */
7874 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7875 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7877 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7878 f_fpr = DECL_CHAIN (f_gpr);
7879 f_ovf = DECL_CHAIN (f_fpr);
7880 f_sav = DECL_CHAIN (f_ovf);
7882 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7883 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7884 valist = build_va_arg_indirect_ref (valist);
7885 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7886 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7887 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7889 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7891 type = build_pointer_type (type);
7892 size = int_size_in_bytes (type);
7893 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7895 nat_mode = type_natural_mode (type, NULL);
7904 /* Unnamed 256bit vector mode parameters are passed on stack. */
7905 if (ix86_cfun_abi () == SYSV_ABI)
7912 container = construct_container (nat_mode, TYPE_MODE (type),
7913 type, 0, X86_64_REGPARM_MAX,
7914 X86_64_SSE_REGPARM_MAX, intreg,
7919 /* Pull the value out of the saved registers. */
7921 addr = create_tmp_var (ptr_type_node, "addr");
7925 int needed_intregs, needed_sseregs;
7927 tree int_addr, sse_addr;
7929 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7930 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7932 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7934 need_temp = (!REG_P (container)
7935 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7936 || TYPE_ALIGN (type) > 128));
7938 /* In case we are passing structure, verify that it is consecutive block
7939 on the register save area. If not we need to do moves. */
7940 if (!need_temp && !REG_P (container))
7942 /* Verify that all registers are strictly consecutive */
7943 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7947 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7949 rtx slot = XVECEXP (container, 0, i);
7950 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7951 || INTVAL (XEXP (slot, 1)) != i * 16)
7959 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7961 rtx slot = XVECEXP (container, 0, i);
7962 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7963 || INTVAL (XEXP (slot, 1)) != i * 8)
7975 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7976 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7979 /* First ensure that we fit completely in registers. */
7982 t = build_int_cst (TREE_TYPE (gpr),
7983 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7984 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7985 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7986 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7987 gimplify_and_add (t, pre_p);
7991 t = build_int_cst (TREE_TYPE (fpr),
7992 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7993 + X86_64_REGPARM_MAX * 8);
7994 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7995 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7996 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7997 gimplify_and_add (t, pre_p);
8000 /* Compute index to start of area used for integer regs. */
8003 /* int_addr = gpr + sav; */
8004 t = fold_convert (sizetype, gpr);
8005 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8006 gimplify_assign (int_addr, t, pre_p);
8010 /* sse_addr = fpr + sav; */
8011 t = fold_convert (sizetype, fpr);
8012 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
8013 gimplify_assign (sse_addr, t, pre_p);
8017 int i, prev_size = 0;
8018 tree temp = create_tmp_var (type, "va_arg_tmp");
8021 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8022 gimplify_assign (addr, t, pre_p);
8024 for (i = 0; i < XVECLEN (container, 0); i++)
8026 rtx slot = XVECEXP (container, 0, i);
8027 rtx reg = XEXP (slot, 0);
8028 enum machine_mode mode = GET_MODE (reg);
8034 tree dest_addr, dest;
8035 int cur_size = GET_MODE_SIZE (mode);
8037 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8038 prev_size = INTVAL (XEXP (slot, 1));
8039 if (prev_size + cur_size > size)
8041 cur_size = size - prev_size;
8042 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8043 if (mode == BLKmode)
8046 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8047 if (mode == GET_MODE (reg))
8048 addr_type = build_pointer_type (piece_type);
8050 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8052 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8055 if (SSE_REGNO_P (REGNO (reg)))
8057 src_addr = sse_addr;
8058 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8062 src_addr = int_addr;
8063 src_offset = REGNO (reg) * 8;
8065 src_addr = fold_convert (addr_type, src_addr);
8066 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
8067 size_int (src_offset));
8069 dest_addr = fold_convert (daddr_type, addr);
8070 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
8071 size_int (prev_size));
8072 if (cur_size == GET_MODE_SIZE (mode))
8074 src = build_va_arg_indirect_ref (src_addr);
8075 dest = build_va_arg_indirect_ref (dest_addr);
8077 gimplify_assign (dest, src, pre_p);
8082 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
8083 3, dest_addr, src_addr,
8084 size_int (cur_size));
8085 gimplify_and_add (copy, pre_p);
8087 prev_size += cur_size;
8093 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8094 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8095 gimplify_assign (gpr, t, pre_p);
8100 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8101 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8102 gimplify_assign (fpr, t, pre_p);
8105 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8107 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8110 /* ... otherwise out of the overflow area. */
8112 /* When we align parameter on stack for caller, if the parameter
8113 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8114 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8115 here with caller. */
8116 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
8117 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8118 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8120 /* Care for on-stack alignment if needed. */
8121 if (arg_boundary <= 64 || size == 0)
8125 HOST_WIDE_INT align = arg_boundary / 8;
8126 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
8127 size_int (align - 1));
8128 t = fold_convert (sizetype, t);
8129 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8131 t = fold_convert (TREE_TYPE (ovf), t);
8134 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8135 gimplify_assign (addr, t, pre_p);
8137 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
8138 size_int (rsize * UNITS_PER_WORD));
8139 gimplify_assign (unshare_expr (ovf), t, pre_p);
8142 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8144 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8145 addr = fold_convert (ptrtype, addr);
8148 addr = build_va_arg_indirect_ref (addr);
8149 return build_va_arg_indirect_ref (addr);
8152 /* Return true if OPNUM's MEM should be matched
8153 in movabs* patterns. */
8156 ix86_check_movabs (rtx insn, int opnum)
8160 set = PATTERN (insn);
8161 if (GET_CODE (set) == PARALLEL)
8162 set = XVECEXP (set, 0, 0);
8163 gcc_assert (GET_CODE (set) == SET);
8164 mem = XEXP (set, opnum);
8165 while (GET_CODE (mem) == SUBREG)
8166 mem = SUBREG_REG (mem);
8167 gcc_assert (MEM_P (mem));
8168 return volatile_ok || !MEM_VOLATILE_P (mem);
8171 /* Initialize the table of extra 80387 mathematical constants. */
8174 init_ext_80387_constants (void)
8176 static const char * cst[5] =
8178 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8179 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8180 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8181 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8182 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8186 for (i = 0; i < 5; i++)
8188 real_from_string (&ext_80387_constants_table[i], cst[i]);
8189 /* Ensure each constant is rounded to XFmode precision. */
8190 real_convert (&ext_80387_constants_table[i],
8191 XFmode, &ext_80387_constants_table[i]);
8194 ext_80387_constants_init = 1;
8197 /* Return non-zero if the constant is something that
8198 can be loaded with a special instruction. */
8201 standard_80387_constant_p (rtx x)
8203 enum machine_mode mode = GET_MODE (x);
8207 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8210 if (x == CONST0_RTX (mode))
8212 if (x == CONST1_RTX (mode))
8215 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8217 /* For XFmode constants, try to find a special 80387 instruction when
8218 optimizing for size or on those CPUs that benefit from them. */
8220 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8224 if (! ext_80387_constants_init)
8225 init_ext_80387_constants ();
8227 for (i = 0; i < 5; i++)
8228 if (real_identical (&r, &ext_80387_constants_table[i]))
8232 /* Load of the constant -0.0 or -1.0 will be split as
8233 fldz;fchs or fld1;fchs sequence. */
8234 if (real_isnegzero (&r))
8236 if (real_identical (&r, &dconstm1))
8242 /* Return the opcode of the special instruction to be used to load
8246 standard_80387_constant_opcode (rtx x)
8248 switch (standard_80387_constant_p (x))
8272 /* Return the CONST_DOUBLE representing the 80387 constant that is
8273 loaded by the specified special instruction. The argument IDX
8274 matches the return value from standard_80387_constant_p. */
8277 standard_80387_constant_rtx (int idx)
8281 if (! ext_80387_constants_init)
8282 init_ext_80387_constants ();
8298 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8302 /* Return 1 if X is all 0s and 2 if x is all 1s
8303 in supported SSE vector mode. */
8306 standard_sse_constant_p (rtx x)
8308 enum machine_mode mode = GET_MODE (x);
8310 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8312 if (vector_all_ones_operand (x, mode))
8328 /* Return the opcode of the special instruction to be used to load
8332 standard_sse_constant_opcode (rtx insn, rtx x)
8334 switch (standard_sse_constant_p (x))
8337 switch (get_attr_mode (insn))
8340 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8342 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8343 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8345 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
8347 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8348 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
8350 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
8352 return "vxorps\t%x0, %x0, %x0";
8354 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8355 return "vxorps\t%x0, %x0, %x0";
8357 return "vxorpd\t%x0, %x0, %x0";
8359 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
8360 return "vxorps\t%x0, %x0, %x0";
8362 return "vpxor\t%x0, %x0, %x0";
8367 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
8374 /* Returns true if OP contains a symbol reference */
8377 symbolic_reference_mentioned_p (rtx op)
8382 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8385 fmt = GET_RTX_FORMAT (GET_CODE (op));
8386 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8392 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8393 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8397 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8404 /* Return true if it is appropriate to emit `ret' instructions in the
8405 body of a function. Do this only if the epilogue is simple, needing a
8406 couple of insns. Prior to reloading, we can't tell how many registers
8407 must be saved, so return false then. Return false if there is no frame
8408 marker to de-allocate. */
8411 ix86_can_use_return_insn_p (void)
8413 struct ix86_frame frame;
8415 if (! reload_completed || frame_pointer_needed)
8418 /* Don't allow more than 32k pop, since that's all we can do
8419 with one instruction. */
8420 if (crtl->args.pops_args && crtl->args.size >= 32768)
8423 ix86_compute_frame_layout (&frame);
8424 return (frame.stack_pointer_offset == UNITS_PER_WORD
8425 && (frame.nregs + frame.nsseregs) == 0);
8428 /* Value should be nonzero if functions must have frame pointers.
8429 Zero means the frame pointer need not be set up (and parms may
8430 be accessed via the stack pointer) in functions that seem suitable. */
8433 ix86_frame_pointer_required (void)
8435 /* If we accessed previous frames, then the generated code expects
8436 to be able to access the saved ebp value in our frame. */
8437 if (cfun->machine->accesses_prev_frame)
8440 /* Several x86 os'es need a frame pointer for other reasons,
8441 usually pertaining to setjmp. */
8442 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8445 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8446 turns off the frame pointer by default. Turn it back on now if
8447 we've not got a leaf function. */
8448 if (TARGET_OMIT_LEAF_FRAME_POINTER
8449 && (!current_function_is_leaf
8450 || ix86_current_function_calls_tls_descriptor))
8453 if (crtl->profile && !flag_fentry)
8459 /* Record that the current function accesses previous call frames. */
8462 ix86_setup_frame_addresses (void)
8464 cfun->machine->accesses_prev_frame = 1;
8467 #ifndef USE_HIDDEN_LINKONCE
8468 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
8469 # define USE_HIDDEN_LINKONCE 1
8471 # define USE_HIDDEN_LINKONCE 0
8475 static int pic_labels_used;
8477 /* Fills in the label name that should be used for a pc thunk for
8478 the given register. */
8481 get_pc_thunk_name (char name[32], unsigned int regno)
8483 gcc_assert (!TARGET_64BIT);
8485 if (USE_HIDDEN_LINKONCE)
8486 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
8488 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8492 /* This function generates code for -fpic that loads %ebx with
8493 the return address of the caller and then returns. */
8496 ix86_code_end (void)
8501 for (regno = AX_REG; regno <= SP_REG; regno++)
8506 if (!(pic_labels_used & (1 << regno)))
8509 get_pc_thunk_name (name, regno);
8511 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8512 get_identifier (name),
8513 build_function_type (void_type_node, void_list_node));
8514 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8515 NULL_TREE, void_type_node);
8516 TREE_PUBLIC (decl) = 1;
8517 TREE_STATIC (decl) = 1;
8522 switch_to_section (darwin_sections[text_coal_section]);
8523 fputs ("\t.weak_definition\t", asm_out_file);
8524 assemble_name (asm_out_file, name);
8525 fputs ("\n\t.private_extern\t", asm_out_file);
8526 assemble_name (asm_out_file, name);
8527 putc ('\n', asm_out_file);
8528 ASM_OUTPUT_LABEL (asm_out_file, name);
8529 DECL_WEAK (decl) = 1;
8533 if (USE_HIDDEN_LINKONCE)
8535 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8537 targetm.asm_out.unique_section (decl, 0);
8538 switch_to_section (get_named_section (decl, NULL, 0));
8540 targetm.asm_out.globalize_label (asm_out_file, name);
8541 fputs ("\t.hidden\t", asm_out_file);
8542 assemble_name (asm_out_file, name);
8543 putc ('\n', asm_out_file);
8544 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8548 switch_to_section (text_section);
8549 ASM_OUTPUT_LABEL (asm_out_file, name);
8552 DECL_INITIAL (decl) = make_node (BLOCK);
8553 current_function_decl = decl;
8554 init_function_start (decl);
8555 first_function_block_is_cold = false;
8556 /* Make sure unwind info is emitted for the thunk if needed. */
8557 final_start_function (emit_barrier (), asm_out_file, 1);
8559 /* Pad stack IP move with 4 instructions (two NOPs count
8560 as one instruction). */
8561 if (TARGET_PAD_SHORT_FUNCTION)
8566 fputs ("\tnop\n", asm_out_file);
8569 xops[0] = gen_rtx_REG (Pmode, regno);
8570 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8571 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8572 fputs ("\tret\n", asm_out_file);
8573 final_end_function ();
8574 init_insn_lengths ();
8575 free_after_compilation (cfun);
8577 current_function_decl = NULL;
8580 if (flag_split_stack)
8581 file_end_indicate_split_stack ();
8584 /* Emit code for the SET_GOT patterns. */
8587 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8593 if (TARGET_VXWORKS_RTP && flag_pic)
8595 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8596 xops[2] = gen_rtx_MEM (Pmode,
8597 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8598 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8600 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8601 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8602 an unadorned address. */
8603 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8604 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8605 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8609 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8611 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8613 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8616 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8619 output_asm_insn ("call\t%a2", xops);
8620 #ifdef DWARF2_UNWIND_INFO
8621 /* The call to next label acts as a push. */
8622 if (dwarf2out_do_frame ())
8626 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8627 gen_rtx_PLUS (Pmode,
8630 RTX_FRAME_RELATED_P (insn) = 1;
8631 dwarf2out_frame_debug (insn, true);
8638 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8639 is what will be referenced by the Mach-O PIC subsystem. */
8641 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8644 targetm.asm_out.internal_label (asm_out_file, "L",
8645 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8649 output_asm_insn ("pop%z0\t%0", xops);
8650 #ifdef DWARF2_UNWIND_INFO
8651 /* The pop is a pop and clobbers dest, but doesn't restore it
8652 for unwind info purposes. */
8653 if (dwarf2out_do_frame ())
8657 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8658 dwarf2out_frame_debug (insn, true);
8659 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8660 gen_rtx_PLUS (Pmode,
8663 RTX_FRAME_RELATED_P (insn) = 1;
8664 dwarf2out_frame_debug (insn, true);
8673 get_pc_thunk_name (name, REGNO (dest));
8674 pic_labels_used |= 1 << REGNO (dest);
8676 #ifdef DWARF2_UNWIND_INFO
8677 /* Ensure all queued register saves are flushed before the
8679 if (dwarf2out_do_frame ())
8680 dwarf2out_flush_queued_reg_saves ();
8682 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8683 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8684 output_asm_insn ("call\t%X2", xops);
8685 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8686 is what will be referenced by the Mach-O PIC subsystem. */
8689 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8691 targetm.asm_out.internal_label (asm_out_file, "L",
8692 CODE_LABEL_NUMBER (label));
8699 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8700 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8702 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8707 /* Generate an "push" pattern for input ARG. */
8712 struct machine_function *m = cfun->machine;
8714 if (m->fs.cfa_reg == stack_pointer_rtx)
8715 m->fs.cfa_offset += UNITS_PER_WORD;
8716 m->fs.sp_offset += UNITS_PER_WORD;
8718 return gen_rtx_SET (VOIDmode,
8720 gen_rtx_PRE_DEC (Pmode,
8721 stack_pointer_rtx)),
8725 /* Generate an "pop" pattern for input ARG. */
8730 return gen_rtx_SET (VOIDmode,
8733 gen_rtx_POST_INC (Pmode,
8734 stack_pointer_rtx)));
8737 /* Return >= 0 if there is an unused call-clobbered register available
8738 for the entire function. */
8741 ix86_select_alt_pic_regnum (void)
8743 if (current_function_is_leaf
8745 && !ix86_current_function_calls_tls_descriptor)
8748 /* Can't use the same register for both PIC and DRAP. */
8750 drap = REGNO (crtl->drap_reg);
8753 for (i = 2; i >= 0; --i)
8754 if (i != drap && !df_regs_ever_live_p (i))
8758 return INVALID_REGNUM;
8761 /* Return 1 if we need to save REGNO. */
8763 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8765 if (pic_offset_table_rtx
8766 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8767 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8769 || crtl->calls_eh_return
8770 || crtl->uses_const_pool))
8772 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8777 if (crtl->calls_eh_return && maybe_eh_return)
8782 unsigned test = EH_RETURN_DATA_REGNO (i);
8783 if (test == INVALID_REGNUM)
8790 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8793 return (df_regs_ever_live_p (regno)
8794 && !call_used_regs[regno]
8795 && !fixed_regs[regno]
8796 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8799 /* Return number of saved general prupose registers. */
8802 ix86_nsaved_regs (void)
8807 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8808 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8813 /* Return number of saved SSE registrers. */
8816 ix86_nsaved_sseregs (void)
8821 if (ix86_cfun_abi () != MS_ABI)
8823 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8824 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8829 /* Given FROM and TO register numbers, say whether this elimination is
8830 allowed. If stack alignment is needed, we can only replace argument
8831 pointer with hard frame pointer, or replace frame pointer with stack
8832 pointer. Otherwise, frame pointer elimination is automatically
8833 handled and all other eliminations are valid. */
8836 ix86_can_eliminate (const int from, const int to)
8838 if (stack_realign_fp)
8839 return ((from == ARG_POINTER_REGNUM
8840 && to == HARD_FRAME_POINTER_REGNUM)
8841 || (from == FRAME_POINTER_REGNUM
8842 && to == STACK_POINTER_REGNUM));
8844 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8847 /* Return the offset between two registers, one to be eliminated, and the other
8848 its replacement, at the start of a routine. */
8851 ix86_initial_elimination_offset (int from, int to)
8853 struct ix86_frame frame;
8854 ix86_compute_frame_layout (&frame);
8856 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8857 return frame.hard_frame_pointer_offset;
8858 else if (from == FRAME_POINTER_REGNUM
8859 && to == HARD_FRAME_POINTER_REGNUM)
8860 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8863 gcc_assert (to == STACK_POINTER_REGNUM);
8865 if (from == ARG_POINTER_REGNUM)
8866 return frame.stack_pointer_offset;
8868 gcc_assert (from == FRAME_POINTER_REGNUM);
8869 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8873 /* In a dynamically-aligned function, we can't know the offset from
8874 stack pointer to frame pointer, so we must ensure that setjmp
8875 eliminates fp against the hard fp (%ebp) rather than trying to
8876 index from %esp up to the top of the frame across a gap that is
8877 of unknown (at compile-time) size. */
8879 ix86_builtin_setjmp_frame_value (void)
8881 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8884 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8885 field in the TCB, so they can not be used together. */
8888 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8892 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8894 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8897 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8900 error ("%<-fsplit-stack%> requires "
8901 "assembler support for CFI directives");
8909 /* When using -fsplit-stack, the allocation routines set a field in
8910 the TCB to the bottom of the stack plus this much space, measured
8913 #define SPLIT_STACK_AVAILABLE 256
8915 /* Fill structure ix86_frame about frame of currently computed function. */
8918 ix86_compute_frame_layout (struct ix86_frame *frame)
8920 unsigned int stack_alignment_needed;
8921 HOST_WIDE_INT offset;
8922 unsigned int preferred_alignment;
8923 HOST_WIDE_INT size = get_frame_size ();
8924 HOST_WIDE_INT to_allocate;
8926 frame->nregs = ix86_nsaved_regs ();
8927 frame->nsseregs = ix86_nsaved_sseregs ();
8929 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8930 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8932 /* MS ABI seem to require stack alignment to be always 16 except for function
8933 prologues and leaf. */
8934 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8935 && (!current_function_is_leaf || cfun->calls_alloca != 0
8936 || ix86_current_function_calls_tls_descriptor))
8938 preferred_alignment = 16;
8939 stack_alignment_needed = 16;
8940 crtl->preferred_stack_boundary = 128;
8941 crtl->stack_alignment_needed = 128;
8944 gcc_assert (!size || stack_alignment_needed);
8945 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8946 gcc_assert (preferred_alignment <= stack_alignment_needed);
8948 /* For SEH we have to limit the amount of code movement into the prologue.
8949 At present we do this via a BLOCKAGE, at which point there's very little
8950 scheduling that can be done, which means that there's very little point
8951 in doing anything except PUSHs. */
8953 cfun->machine->use_fast_prologue_epilogue = false;
8955 /* During reload iteration the amount of registers saved can change.
8956 Recompute the value as needed. Do not recompute when amount of registers
8957 didn't change as reload does multiple calls to the function and does not
8958 expect the decision to change within single iteration. */
8959 else if (!optimize_function_for_size_p (cfun)
8960 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8962 int count = frame->nregs;
8963 struct cgraph_node *node = cgraph_node (current_function_decl);
8965 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8967 /* The fast prologue uses move instead of push to save registers. This
8968 is significantly longer, but also executes faster as modern hardware
8969 can execute the moves in parallel, but can't do that for push/pop.
8971 Be careful about choosing what prologue to emit: When function takes
8972 many instructions to execute we may use slow version as well as in
8973 case function is known to be outside hot spot (this is known with
8974 feedback only). Weight the size of function by number of registers
8975 to save as it is cheap to use one or two push instructions but very
8976 slow to use many of them. */
8978 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8979 if (node->frequency < NODE_FREQUENCY_NORMAL
8980 || (flag_branch_probabilities
8981 && node->frequency < NODE_FREQUENCY_HOT))
8982 cfun->machine->use_fast_prologue_epilogue = false;
8984 cfun->machine->use_fast_prologue_epilogue
8985 = !expensive_function_p (count);
8987 if (TARGET_PROLOGUE_USING_MOVE
8988 && cfun->machine->use_fast_prologue_epilogue)
8989 frame->save_regs_using_mov = true;
8991 frame->save_regs_using_mov = false;
8993 /* If static stack checking is enabled and done with probes, the registers
8994 need to be saved before allocating the frame. */
8995 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8996 frame->save_regs_using_mov = false;
8998 /* Skip return address. */
8999 offset = UNITS_PER_WORD;
9001 /* Skip pushed static chain. */
9002 if (ix86_static_chain_on_stack)
9003 offset += UNITS_PER_WORD;
9005 /* Skip saved base pointer. */
9006 if (frame_pointer_needed)
9007 offset += UNITS_PER_WORD;
9008 frame->hfp_save_offset = offset;
9010 /* The traditional frame pointer location is at the top of the frame. */
9011 frame->hard_frame_pointer_offset = offset;
9013 /* Register save area */
9014 offset += frame->nregs * UNITS_PER_WORD;
9015 frame->reg_save_offset = offset;
9017 /* Align and set SSE register save area. */
9018 if (frame->nsseregs)
9020 /* The only ABI that has saved SSE registers (Win64) also has a
9021 16-byte aligned default stack, and thus we don't need to be
9022 within the re-aligned local stack frame to save them. */
9023 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9024 offset = (offset + 16 - 1) & -16;
9025 offset += frame->nsseregs * 16;
9027 frame->sse_reg_save_offset = offset;
9029 /* The re-aligned stack starts here. Values before this point are not
9030 directly comparable with values below this point. In order to make
9031 sure that no value happens to be the same before and after, force
9032 the alignment computation below to add a non-zero value. */
9033 if (stack_realign_fp)
9034 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9037 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9038 offset += frame->va_arg_size;
9040 /* Align start of frame for local function. */
9041 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9043 /* Frame pointer points here. */
9044 frame->frame_pointer_offset = offset;
9048 /* Add outgoing arguments area. Can be skipped if we eliminated
9049 all the function calls as dead code.
9050 Skipping is however impossible when function calls alloca. Alloca
9051 expander assumes that last crtl->outgoing_args_size
9052 of stack frame are unused. */
9053 if (ACCUMULATE_OUTGOING_ARGS
9054 && (!current_function_is_leaf || cfun->calls_alloca
9055 || ix86_current_function_calls_tls_descriptor))
9057 offset += crtl->outgoing_args_size;
9058 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9061 frame->outgoing_arguments_size = 0;
9063 /* Align stack boundary. Only needed if we're calling another function
9065 if (!current_function_is_leaf || cfun->calls_alloca
9066 || ix86_current_function_calls_tls_descriptor)
9067 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9069 /* We've reached end of stack frame. */
9070 frame->stack_pointer_offset = offset;
9072 /* Size prologue needs to allocate. */
9073 to_allocate = offset - frame->sse_reg_save_offset;
9075 if ((!to_allocate && frame->nregs <= 1)
9076 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9077 frame->save_regs_using_mov = false;
9079 if (ix86_using_red_zone ()
9080 && current_function_sp_is_unchanging
9081 && current_function_is_leaf
9082 && !ix86_current_function_calls_tls_descriptor)
9084 frame->red_zone_size = to_allocate;
9085 if (frame->save_regs_using_mov)
9086 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9087 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9088 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9091 frame->red_zone_size = 0;
9092 frame->stack_pointer_offset -= frame->red_zone_size;
9094 /* The SEH frame pointer location is near the bottom of the frame.
9095 This is enforced by the fact that the difference between the
9096 stack pointer and the frame pointer is limited to 240 bytes in
9097 the unwind data structure. */
9102 /* If we can leave the frame pointer where it is, do so. */
9103 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9104 if (diff > 240 || (diff & 15) != 0)
9106 /* Ideally we'd determine what portion of the local stack frame
9107 (within the constraint of the lowest 240) is most heavily used.
9108 But without that complication, simply bias the frame pointer
9109 by 128 bytes so as to maximize the amount of the local stack
9110 frame that is addressable with 8-bit offsets. */
9111 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9116 /* This is semi-inlined memory_address_length, but simplified
9117 since we know that we're always dealing with reg+offset, and
9118 to avoid having to create and discard all that rtl. */
9121 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9127 /* EBP and R13 cannot be encoded without an offset. */
9128 len = (regno == BP_REG || regno == R13_REG);
9130 else if (IN_RANGE (offset, -128, 127))
9133 /* ESP and R12 must be encoded with a SIB byte. */
9134 if (regno == SP_REG || regno == R12_REG)
9140 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9141 The valid base registers are taken from CFUN->MACHINE->FS. */
9144 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9146 const struct machine_function *m = cfun->machine;
9147 rtx base_reg = NULL;
9148 HOST_WIDE_INT base_offset = 0;
9150 if (m->use_fast_prologue_epilogue)
9152 /* Choose the base register most likely to allow the most scheduling
9153 opportunities. Generally FP is valid througout the function,
9154 while DRAP must be reloaded within the epilogue. But choose either
9155 over the SP due to increased encoding size. */
9159 base_reg = hard_frame_pointer_rtx;
9160 base_offset = m->fs.fp_offset - cfa_offset;
9162 else if (m->fs.drap_valid)
9164 base_reg = crtl->drap_reg;
9165 base_offset = 0 - cfa_offset;
9167 else if (m->fs.sp_valid)
9169 base_reg = stack_pointer_rtx;
9170 base_offset = m->fs.sp_offset - cfa_offset;
9175 HOST_WIDE_INT toffset;
9178 /* Choose the base register with the smallest address encoding.
9179 With a tie, choose FP > DRAP > SP. */
9182 base_reg = stack_pointer_rtx;
9183 base_offset = m->fs.sp_offset - cfa_offset;
9184 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9186 if (m->fs.drap_valid)
9188 toffset = 0 - cfa_offset;
9189 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9192 base_reg = crtl->drap_reg;
9193 base_offset = toffset;
9199 toffset = m->fs.fp_offset - cfa_offset;
9200 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9203 base_reg = hard_frame_pointer_rtx;
9204 base_offset = toffset;
9209 gcc_assert (base_reg != NULL);
9211 return plus_constant (base_reg, base_offset);
9214 /* Emit code to save registers in the prologue. */
9217 ix86_emit_save_regs (void)
9222 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9223 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9225 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
9226 RTX_FRAME_RELATED_P (insn) = 1;
9230 /* Emit a single register save at CFA - CFA_OFFSET. */
9233 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9234 HOST_WIDE_INT cfa_offset)
9236 struct machine_function *m = cfun->machine;
9237 rtx reg = gen_rtx_REG (mode, regno);
9238 rtx mem, addr, base, insn;
9240 addr = choose_baseaddr (cfa_offset);
9241 mem = gen_frame_mem (mode, addr);
9243 /* For SSE saves, we need to indicate the 128-bit alignment. */
9244 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9246 insn = emit_move_insn (mem, reg);
9247 RTX_FRAME_RELATED_P (insn) = 1;
9250 if (GET_CODE (base) == PLUS)
9251 base = XEXP (base, 0);
9252 gcc_checking_assert (REG_P (base));
9254 /* When saving registers into a re-aligned local stack frame, avoid
9255 any tricky guessing by dwarf2out. */
9256 if (m->fs.realigned)
9258 gcc_checking_assert (stack_realign_drap);
9260 if (regno == REGNO (crtl->drap_reg))
9262 /* A bit of a hack. We force the DRAP register to be saved in
9263 the re-aligned stack frame, which provides us with a copy
9264 of the CFA that will last past the prologue. Install it. */
9265 gcc_checking_assert (cfun->machine->fs.fp_valid);
9266 addr = plus_constant (hard_frame_pointer_rtx,
9267 cfun->machine->fs.fp_offset - cfa_offset);
9268 mem = gen_rtx_MEM (mode, addr);
9269 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9273 /* The frame pointer is a stable reference within the
9274 aligned frame. Use it. */
9275 gcc_checking_assert (cfun->machine->fs.fp_valid);
9276 addr = plus_constant (hard_frame_pointer_rtx,
9277 cfun->machine->fs.fp_offset - cfa_offset);
9278 mem = gen_rtx_MEM (mode, addr);
9279 add_reg_note (insn, REG_CFA_EXPRESSION,
9280 gen_rtx_SET (VOIDmode, mem, reg));
9284 /* The memory may not be relative to the current CFA register,
9285 which means that we may need to generate a new pattern for
9286 use by the unwind info. */
9287 else if (base != m->fs.cfa_reg)
9289 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
9290 mem = gen_rtx_MEM (mode, addr);
9291 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9295 /* Emit code to save registers using MOV insns.
9296 First register is stored at CFA - CFA_OFFSET. */
9298 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9302 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9303 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9305 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
9306 cfa_offset -= UNITS_PER_WORD;
9310 /* Emit code to save SSE registers using MOV insns.
9311 First register is stored at CFA - CFA_OFFSET. */
9313 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9317 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9318 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9320 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9325 static GTY(()) rtx queued_cfa_restores;
9327 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9328 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9329 Don't add the note if the previously saved value will be left untouched
9330 within stack red-zone till return, as unwinders can find the same value
9331 in the register and on the stack. */
9334 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9336 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
9341 add_reg_note (insn, REG_CFA_RESTORE, reg);
9342 RTX_FRAME_RELATED_P (insn) = 1;
9346 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9349 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9352 ix86_add_queued_cfa_restore_notes (rtx insn)
9355 if (!queued_cfa_restores)
9357 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9359 XEXP (last, 1) = REG_NOTES (insn);
9360 REG_NOTES (insn) = queued_cfa_restores;
9361 queued_cfa_restores = NULL_RTX;
9362 RTX_FRAME_RELATED_P (insn) = 1;
9365 /* Expand prologue or epilogue stack adjustment.
9366 The pattern exist to put a dependency on all ebp-based memory accesses.
9367 STYLE should be negative if instructions should be marked as frame related,
9368 zero if %r11 register is live and cannot be freely used and positive
9372 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9373 int style, bool set_cfa)
9375 struct machine_function *m = cfun->machine;
9379 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9380 else if (x86_64_immediate_operand (offset, DImode))
9381 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9385 /* r11 is used by indirect sibcall return as well, set before the
9386 epilogue and used after the epilogue. */
9388 tmp = gen_rtx_REG (DImode, R11_REG);
9391 gcc_assert (src != hard_frame_pointer_rtx
9392 && dest != hard_frame_pointer_rtx);
9393 tmp = hard_frame_pointer_rtx;
9395 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9397 RTX_FRAME_RELATED_P (insn) = 1;
9399 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9402 insn = emit_insn (insn);
9404 ix86_add_queued_cfa_restore_notes (insn);
9410 gcc_assert (m->fs.cfa_reg == src);
9411 m->fs.cfa_offset += INTVAL (offset);
9412 m->fs.cfa_reg = dest;
9414 r = gen_rtx_PLUS (Pmode, src, offset);
9415 r = gen_rtx_SET (VOIDmode, dest, r);
9416 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9417 RTX_FRAME_RELATED_P (insn) = 1;
9420 RTX_FRAME_RELATED_P (insn) = 1;
9422 if (dest == stack_pointer_rtx)
9424 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9425 bool valid = m->fs.sp_valid;
9427 if (src == hard_frame_pointer_rtx)
9429 valid = m->fs.fp_valid;
9430 ooffset = m->fs.fp_offset;
9432 else if (src == crtl->drap_reg)
9434 valid = m->fs.drap_valid;
9439 /* Else there are two possibilities: SP itself, which we set
9440 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9441 taken care of this by hand along the eh_return path. */
9442 gcc_checking_assert (src == stack_pointer_rtx
9443 || offset == const0_rtx);
9446 m->fs.sp_offset = ooffset - INTVAL (offset);
9447 m->fs.sp_valid = valid;
9451 /* Find an available register to be used as dynamic realign argument
9452 pointer regsiter. Such a register will be written in prologue and
9453 used in begin of body, so it must not be
9454 1. parameter passing register.
9456 We reuse static-chain register if it is available. Otherwise, we
9457 use DI for i386 and R13 for x86-64. We chose R13 since it has
9460 Return: the regno of chosen register. */
9463 find_drap_reg (void)
9465 tree decl = cfun->decl;
9469 /* Use R13 for nested function or function need static chain.
9470 Since function with tail call may use any caller-saved
9471 registers in epilogue, DRAP must not use caller-saved
9472 register in such case. */
9473 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9480 /* Use DI for nested function or function need static chain.
9481 Since function with tail call may use any caller-saved
9482 registers in epilogue, DRAP must not use caller-saved
9483 register in such case. */
9484 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9487 /* Reuse static chain register if it isn't used for parameter
9489 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
9490 && !lookup_attribute ("fastcall",
9491 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
9492 && !lookup_attribute ("thiscall",
9493 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
9500 /* Return minimum incoming stack alignment. */
9503 ix86_minimum_incoming_stack_boundary (bool sibcall)
9505 unsigned int incoming_stack_boundary;
9507 /* Prefer the one specified at command line. */
9508 if (ix86_user_incoming_stack_boundary)
9509 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9510 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9511 if -mstackrealign is used, it isn't used for sibcall check and
9512 estimated stack alignment is 128bit. */
9515 && ix86_force_align_arg_pointer
9516 && crtl->stack_alignment_estimated == 128)
9517 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9519 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9521 /* Incoming stack alignment can be changed on individual functions
9522 via force_align_arg_pointer attribute. We use the smallest
9523 incoming stack boundary. */
9524 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9525 && lookup_attribute (ix86_force_align_arg_pointer_string,
9526 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9527 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9529 /* The incoming stack frame has to be aligned at least at
9530 parm_stack_boundary. */
9531 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9532 incoming_stack_boundary = crtl->parm_stack_boundary;
9534 /* Stack at entrance of main is aligned by runtime. We use the
9535 smallest incoming stack boundary. */
9536 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9537 && DECL_NAME (current_function_decl)
9538 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9539 && DECL_FILE_SCOPE_P (current_function_decl))
9540 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9542 return incoming_stack_boundary;
9545 /* Update incoming stack boundary and estimated stack alignment. */
9548 ix86_update_stack_boundary (void)
9550 ix86_incoming_stack_boundary
9551 = ix86_minimum_incoming_stack_boundary (false);
9553 /* x86_64 vararg needs 16byte stack alignment for register save
9557 && crtl->stack_alignment_estimated < 128)
9558 crtl->stack_alignment_estimated = 128;
9561 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9562 needed or an rtx for DRAP otherwise. */
9565 ix86_get_drap_rtx (void)
9567 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9568 crtl->need_drap = true;
9570 if (stack_realign_drap)
9572 /* Assign DRAP to vDRAP and returns vDRAP */
9573 unsigned int regno = find_drap_reg ();
9578 arg_ptr = gen_rtx_REG (Pmode, regno);
9579 crtl->drap_reg = arg_ptr;
9582 drap_vreg = copy_to_reg (arg_ptr);
9586 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9589 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9590 RTX_FRAME_RELATED_P (insn) = 1;
9598 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9601 ix86_internal_arg_pointer (void)
9603 return virtual_incoming_args_rtx;
9606 struct scratch_reg {
9611 /* Return a short-lived scratch register for use on function entry.
9612 In 32-bit mode, it is valid only after the registers are saved
9613 in the prologue. This register must be released by means of
9614 release_scratch_register_on_entry once it is dead. */
9617 get_scratch_register_on_entry (struct scratch_reg *sr)
9625 /* We always use R11 in 64-bit mode. */
9630 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9632 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9633 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9634 int regparm = ix86_function_regparm (fntype, decl);
9636 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9638 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9639 for the static chain register. */
9640 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9641 && drap_regno != AX_REG)
9643 else if (regparm < 2 && drap_regno != DX_REG)
9645 /* ecx is the static chain register. */
9646 else if (regparm < 3 && !fastcall_p && !static_chain_p
9647 && drap_regno != CX_REG)
9649 else if (ix86_save_reg (BX_REG, true))
9651 /* esi is the static chain register. */
9652 else if (!(regparm == 3 && static_chain_p)
9653 && ix86_save_reg (SI_REG, true))
9655 else if (ix86_save_reg (DI_REG, true))
9659 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9664 sr->reg = gen_rtx_REG (Pmode, regno);
9667 rtx insn = emit_insn (gen_push (sr->reg));
9668 RTX_FRAME_RELATED_P (insn) = 1;
9672 /* Release a scratch register obtained from the preceding function. */
9675 release_scratch_register_on_entry (struct scratch_reg *sr)
9679 rtx x, insn = emit_insn (gen_pop (sr->reg));
9681 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9682 RTX_FRAME_RELATED_P (insn) = 1;
9683 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9684 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9685 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9689 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9691 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9694 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9696 /* We skip the probe for the first interval + a small dope of 4 words and
9697 probe that many bytes past the specified size to maintain a protection
9698 area at the botton of the stack. */
9699 const int dope = 4 * UNITS_PER_WORD;
9700 rtx size_rtx = GEN_INT (size);
9702 /* See if we have a constant small number of probes to generate. If so,
9703 that's the easy case. The run-time loop is made up of 11 insns in the
9704 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9705 for n # of intervals. */
9706 if (size <= 5 * PROBE_INTERVAL)
9708 HOST_WIDE_INT i, adjust;
9709 bool first_probe = true;
9711 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9712 values of N from 1 until it exceeds SIZE. If only one probe is
9713 needed, this will not generate any code. Then adjust and probe
9714 to PROBE_INTERVAL + SIZE. */
9715 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9719 adjust = 2 * PROBE_INTERVAL + dope;
9720 first_probe = false;
9723 adjust = PROBE_INTERVAL;
9725 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9726 plus_constant (stack_pointer_rtx, -adjust)));
9727 emit_stack_probe (stack_pointer_rtx);
9731 adjust = size + PROBE_INTERVAL + dope;
9733 adjust = size + PROBE_INTERVAL - i;
9735 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9736 plus_constant (stack_pointer_rtx, -adjust)));
9737 emit_stack_probe (stack_pointer_rtx);
9739 /* Adjust back to account for the additional first interval. */
9740 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9741 plus_constant (stack_pointer_rtx,
9742 PROBE_INTERVAL + dope)));
9745 /* Otherwise, do the same as above, but in a loop. Note that we must be
9746 extra careful with variables wrapping around because we might be at
9747 the very top (or the very bottom) of the address space and we have
9748 to be able to handle this case properly; in particular, we use an
9749 equality test for the loop condition. */
9752 HOST_WIDE_INT rounded_size;
9753 struct scratch_reg sr;
9755 get_scratch_register_on_entry (&sr);
9758 /* Step 1: round SIZE to the previous multiple of the interval. */
9760 rounded_size = size & -PROBE_INTERVAL;
9763 /* Step 2: compute initial and final value of the loop counter. */
9765 /* SP = SP_0 + PROBE_INTERVAL. */
9766 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9767 plus_constant (stack_pointer_rtx,
9768 - (PROBE_INTERVAL + dope))));
9770 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9771 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9772 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9773 gen_rtx_PLUS (Pmode, sr.reg,
9774 stack_pointer_rtx)));
9779 while (SP != LAST_ADDR)
9781 SP = SP + PROBE_INTERVAL
9785 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9786 values of N from 1 until it is equal to ROUNDED_SIZE. */
9788 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9791 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9792 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9794 if (size != rounded_size)
9796 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9797 plus_constant (stack_pointer_rtx,
9798 rounded_size - size)));
9799 emit_stack_probe (stack_pointer_rtx);
9802 /* Adjust back to account for the additional first interval. */
9803 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9804 plus_constant (stack_pointer_rtx,
9805 PROBE_INTERVAL + dope)));
9807 release_scratch_register_on_entry (&sr);
9810 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9811 cfun->machine->fs.sp_offset += size;
9813 /* Make sure nothing is scheduled before we are done. */
9814 emit_insn (gen_blockage ());
9817 /* Adjust the stack pointer up to REG while probing it. */
9820 output_adjust_stack_and_probe (rtx reg)
9822 static int labelno = 0;
9823 char loop_lab[32], end_lab[32];
9826 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9827 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9829 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9831 /* Jump to END_LAB if SP == LAST_ADDR. */
9832 xops[0] = stack_pointer_rtx;
9834 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9835 fputs ("\tje\t", asm_out_file);
9836 assemble_name_raw (asm_out_file, end_lab);
9837 fputc ('\n', asm_out_file);
9839 /* SP = SP + PROBE_INTERVAL. */
9840 xops[1] = GEN_INT (PROBE_INTERVAL);
9841 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9844 xops[1] = const0_rtx;
9845 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9847 fprintf (asm_out_file, "\tjmp\t");
9848 assemble_name_raw (asm_out_file, loop_lab);
9849 fputc ('\n', asm_out_file);
9851 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9856 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9857 inclusive. These are offsets from the current stack pointer. */
9860 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9862 /* See if we have a constant small number of probes to generate. If so,
9863 that's the easy case. The run-time loop is made up of 7 insns in the
9864 generic case while the compile-time loop is made up of n insns for n #
9866 if (size <= 7 * PROBE_INTERVAL)
9870 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9871 it exceeds SIZE. If only one probe is needed, this will not
9872 generate any code. Then probe at FIRST + SIZE. */
9873 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9874 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9876 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9879 /* Otherwise, do the same as above, but in a loop. Note that we must be
9880 extra careful with variables wrapping around because we might be at
9881 the very top (or the very bottom) of the address space and we have
9882 to be able to handle this case properly; in particular, we use an
9883 equality test for the loop condition. */
9886 HOST_WIDE_INT rounded_size, last;
9887 struct scratch_reg sr;
9889 get_scratch_register_on_entry (&sr);
9892 /* Step 1: round SIZE to the previous multiple of the interval. */
9894 rounded_size = size & -PROBE_INTERVAL;
9897 /* Step 2: compute initial and final value of the loop counter. */
9899 /* TEST_OFFSET = FIRST. */
9900 emit_move_insn (sr.reg, GEN_INT (-first));
9902 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9903 last = first + rounded_size;
9908 while (TEST_ADDR != LAST_ADDR)
9910 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9914 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9915 until it is equal to ROUNDED_SIZE. */
9917 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9920 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9921 that SIZE is equal to ROUNDED_SIZE. */
9923 if (size != rounded_size)
9924 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9927 rounded_size - size));
9929 release_scratch_register_on_entry (&sr);
9932 /* Make sure nothing is scheduled before we are done. */
9933 emit_insn (gen_blockage ());
9936 /* Probe a range of stack addresses from REG to END, inclusive. These are
9937 offsets from the current stack pointer. */
9940 output_probe_stack_range (rtx reg, rtx end)
9942 static int labelno = 0;
9943 char loop_lab[32], end_lab[32];
9946 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9947 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9949 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9951 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9954 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9955 fputs ("\tje\t", asm_out_file);
9956 assemble_name_raw (asm_out_file, end_lab);
9957 fputc ('\n', asm_out_file);
9959 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9960 xops[1] = GEN_INT (PROBE_INTERVAL);
9961 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9963 /* Probe at TEST_ADDR. */
9964 xops[0] = stack_pointer_rtx;
9966 xops[2] = const0_rtx;
9967 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9969 fprintf (asm_out_file, "\tjmp\t");
9970 assemble_name_raw (asm_out_file, loop_lab);
9971 fputc ('\n', asm_out_file);
9973 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9978 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9979 to be generated in correct form. */
9981 ix86_finalize_stack_realign_flags (void)
9983 /* Check if stack realign is really needed after reload, and
9984 stores result in cfun */
9985 unsigned int incoming_stack_boundary
9986 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9987 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9988 unsigned int stack_realign = (incoming_stack_boundary
9989 < (current_function_is_leaf
9990 ? crtl->max_used_stack_slot_alignment
9991 : crtl->stack_alignment_needed));
9993 if (crtl->stack_realign_finalized)
9995 /* After stack_realign_needed is finalized, we can't no longer
9997 gcc_assert (crtl->stack_realign_needed == stack_realign);
10001 crtl->stack_realign_needed = stack_realign;
10002 crtl->stack_realign_finalized = true;
10006 /* Expand the prologue into a bunch of separate insns. */
10009 ix86_expand_prologue (void)
10011 struct machine_function *m = cfun->machine;
10014 struct ix86_frame frame;
10015 HOST_WIDE_INT allocate;
10016 bool int_registers_saved;
10018 ix86_finalize_stack_realign_flags ();
10020 /* DRAP should not coexist with stack_realign_fp */
10021 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10023 memset (&m->fs, 0, sizeof (m->fs));
10025 /* Initialize CFA state for before the prologue. */
10026 m->fs.cfa_reg = stack_pointer_rtx;
10027 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10029 /* Track SP offset to the CFA. We continue tracking this after we've
10030 swapped the CFA register away from SP. In the case of re-alignment
10031 this is fudged; we're interested to offsets within the local frame. */
10032 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10033 m->fs.sp_valid = true;
10035 ix86_compute_frame_layout (&frame);
10037 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10039 /* We should have already generated an error for any use of
10040 ms_hook on a nested function. */
10041 gcc_checking_assert (!ix86_static_chain_on_stack);
10043 /* Check if profiling is active and we shall use profiling before
10044 prologue variant. If so sorry. */
10045 if (crtl->profile && flag_fentry != 0)
10046 sorry ("ms_hook_prologue attribute isn't compatible "
10047 "with -mfentry for 32-bit");
10049 /* In ix86_asm_output_function_label we emitted:
10050 8b ff movl.s %edi,%edi
10052 8b ec movl.s %esp,%ebp
10054 This matches the hookable function prologue in Win32 API
10055 functions in Microsoft Windows XP Service Pack 2 and newer.
10056 Wine uses this to enable Windows apps to hook the Win32 API
10057 functions provided by Wine.
10059 What that means is that we've already set up the frame pointer. */
10061 if (frame_pointer_needed
10062 && !(crtl->drap_reg && crtl->stack_realign_needed))
10066 /* We've decided to use the frame pointer already set up.
10067 Describe this to the unwinder by pretending that both
10068 push and mov insns happen right here.
10070 Putting the unwind info here at the end of the ms_hook
10071 is done so that we can make absolutely certain we get
10072 the required byte sequence at the start of the function,
10073 rather than relying on an assembler that can produce
10074 the exact encoding required.
10076 However it does mean (in the unpatched case) that we have
10077 a 1 insn window where the asynchronous unwind info is
10078 incorrect. However, if we placed the unwind info at
10079 its correct location we would have incorrect unwind info
10080 in the patched case. Which is probably all moot since
10081 I don't expect Wine generates dwarf2 unwind info for the
10082 system libraries that use this feature. */
10084 insn = emit_insn (gen_blockage ());
10086 push = gen_push (hard_frame_pointer_rtx);
10087 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10088 stack_pointer_rtx);
10089 RTX_FRAME_RELATED_P (push) = 1;
10090 RTX_FRAME_RELATED_P (mov) = 1;
10092 RTX_FRAME_RELATED_P (insn) = 1;
10093 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10094 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10096 /* Note that gen_push incremented m->fs.cfa_offset, even
10097 though we didn't emit the push insn here. */
10098 m->fs.cfa_reg = hard_frame_pointer_rtx;
10099 m->fs.fp_offset = m->fs.cfa_offset;
10100 m->fs.fp_valid = true;
10104 /* The frame pointer is not needed so pop %ebp again.
10105 This leaves us with a pristine state. */
10106 emit_insn (gen_pop (hard_frame_pointer_rtx));
10110 /* The first insn of a function that accepts its static chain on the
10111 stack is to push the register that would be filled in by a direct
10112 call. This insn will be skipped by the trampoline. */
10113 else if (ix86_static_chain_on_stack)
10115 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10116 emit_insn (gen_blockage ());
10118 /* We don't want to interpret this push insn as a register save,
10119 only as a stack adjustment. The real copy of the register as
10120 a save will be done later, if needed. */
10121 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
10122 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10123 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10124 RTX_FRAME_RELATED_P (insn) = 1;
10127 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10128 of DRAP is needed and stack realignment is really needed after reload */
10129 if (stack_realign_drap)
10131 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10133 /* Only need to push parameter pointer reg if it is caller saved. */
10134 if (!call_used_regs[REGNO (crtl->drap_reg)])
10136 /* Push arg pointer reg */
10137 insn = emit_insn (gen_push (crtl->drap_reg));
10138 RTX_FRAME_RELATED_P (insn) = 1;
10141 /* Grab the argument pointer. */
10142 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
10143 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10144 RTX_FRAME_RELATED_P (insn) = 1;
10145 m->fs.cfa_reg = crtl->drap_reg;
10146 m->fs.cfa_offset = 0;
10148 /* Align the stack. */
10149 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10151 GEN_INT (-align_bytes)));
10152 RTX_FRAME_RELATED_P (insn) = 1;
10154 /* Replicate the return address on the stack so that return
10155 address can be reached via (argp - 1) slot. This is needed
10156 to implement macro RETURN_ADDR_RTX and intrinsic function
10157 expand_builtin_return_addr etc. */
10158 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
10159 t = gen_frame_mem (Pmode, t);
10160 insn = emit_insn (gen_push (t));
10161 RTX_FRAME_RELATED_P (insn) = 1;
10163 /* For the purposes of frame and register save area addressing,
10164 we've started over with a new frame. */
10165 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10166 m->fs.realigned = true;
10169 if (frame_pointer_needed && !m->fs.fp_valid)
10171 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10172 slower on all targets. Also sdb doesn't like it. */
10173 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10174 RTX_FRAME_RELATED_P (insn) = 1;
10176 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10178 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10179 RTX_FRAME_RELATED_P (insn) = 1;
10181 if (m->fs.cfa_reg == stack_pointer_rtx)
10182 m->fs.cfa_reg = hard_frame_pointer_rtx;
10183 m->fs.fp_offset = m->fs.sp_offset;
10184 m->fs.fp_valid = true;
10188 int_registers_saved = (frame.nregs == 0);
10190 if (!int_registers_saved)
10192 /* If saving registers via PUSH, do so now. */
10193 if (!frame.save_regs_using_mov)
10195 ix86_emit_save_regs ();
10196 int_registers_saved = true;
10197 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10200 /* When using red zone we may start register saving before allocating
10201 the stack frame saving one cycle of the prologue. However, avoid
10202 doing this if we have to probe the stack; at least on x86_64 the
10203 stack probe can turn into a call that clobbers a red zone location. */
10204 else if (ix86_using_red_zone ()
10205 && (! TARGET_STACK_PROBE
10206 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10208 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10209 int_registers_saved = true;
10213 if (stack_realign_fp)
10215 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10216 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10218 /* The computation of the size of the re-aligned stack frame means
10219 that we must allocate the size of the register save area before
10220 performing the actual alignment. Otherwise we cannot guarantee
10221 that there's enough storage above the realignment point. */
10222 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10223 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10224 GEN_INT (m->fs.sp_offset
10225 - frame.sse_reg_save_offset),
10228 /* Align the stack. */
10229 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10231 GEN_INT (-align_bytes)));
10233 /* For the purposes of register save area addressing, the stack
10234 pointer is no longer valid. As for the value of sp_offset,
10235 see ix86_compute_frame_layout, which we need to match in order
10236 to pass verification of stack_pointer_offset at the end. */
10237 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10238 m->fs.sp_valid = false;
10241 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10243 if (flag_stack_usage)
10245 /* We start to count from ARG_POINTER. */
10246 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10248 /* If it was realigned, take into account the fake frame. */
10249 if (stack_realign_drap)
10251 if (ix86_static_chain_on_stack)
10252 stack_size += UNITS_PER_WORD;
10254 if (!call_used_regs[REGNO (crtl->drap_reg)])
10255 stack_size += UNITS_PER_WORD;
10257 /* This over-estimates by 1 minimal-stack-alignment-unit but
10258 mitigates that by counting in the new return address slot. */
10259 current_function_dynamic_stack_size
10260 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10263 current_function_static_stack_size = stack_size;
10266 /* The stack has already been decremented by the instruction calling us
10267 so we need to probe unconditionally to preserve the protection area. */
10268 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10270 /* We expect the registers to be saved when probes are used. */
10271 gcc_assert (int_registers_saved);
10273 if (STACK_CHECK_MOVING_SP)
10275 ix86_adjust_stack_and_probe (allocate);
10280 HOST_WIDE_INT size = allocate;
10282 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10283 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10285 if (TARGET_STACK_PROBE)
10286 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10288 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10294 else if (!ix86_target_stack_probe ()
10295 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10297 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10298 GEN_INT (-allocate), -1,
10299 m->fs.cfa_reg == stack_pointer_rtx);
10303 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10305 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10307 bool eax_live = false;
10308 bool r10_live = false;
10311 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10312 if (!TARGET_64BIT_MS_ABI)
10313 eax_live = ix86_eax_live_at_start_p ();
10317 emit_insn (gen_push (eax));
10318 allocate -= UNITS_PER_WORD;
10322 r10 = gen_rtx_REG (Pmode, R10_REG);
10323 emit_insn (gen_push (r10));
10324 allocate -= UNITS_PER_WORD;
10327 emit_move_insn (eax, GEN_INT (allocate));
10328 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10330 /* Use the fact that AX still contains ALLOCATE. */
10331 adjust_stack_insn = (TARGET_64BIT
10332 ? gen_pro_epilogue_adjust_stack_di_sub
10333 : gen_pro_epilogue_adjust_stack_si_sub);
10335 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10336 stack_pointer_rtx, eax));
10338 /* Note that SEH directives need to continue tracking the stack
10339 pointer even after the frame pointer has been set up. */
10340 if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
10342 if (m->fs.cfa_reg == stack_pointer_rtx)
10343 m->fs.cfa_offset += allocate;
10345 RTX_FRAME_RELATED_P (insn) = 1;
10346 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10347 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10348 plus_constant (stack_pointer_rtx,
10351 m->fs.sp_offset += allocate;
10353 if (r10_live && eax_live)
10355 t = choose_baseaddr (m->fs.sp_offset - allocate);
10356 emit_move_insn (r10, gen_frame_mem (Pmode, t));
10357 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10358 emit_move_insn (eax, gen_frame_mem (Pmode, t));
10360 else if (eax_live || r10_live)
10362 t = choose_baseaddr (m->fs.sp_offset - allocate);
10363 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
10366 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10368 /* If we havn't already set up the frame pointer, do so now. */
10369 if (frame_pointer_needed && !m->fs.fp_valid)
10371 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10372 GEN_INT (frame.stack_pointer_offset
10373 - frame.hard_frame_pointer_offset));
10374 insn = emit_insn (insn);
10375 RTX_FRAME_RELATED_P (insn) = 1;
10376 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10378 if (m->fs.cfa_reg == stack_pointer_rtx)
10379 m->fs.cfa_reg = hard_frame_pointer_rtx;
10380 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10381 m->fs.fp_valid = true;
10384 if (!int_registers_saved)
10385 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10386 if (frame.nsseregs)
10387 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10389 pic_reg_used = false;
10390 if (pic_offset_table_rtx
10391 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10394 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10396 if (alt_pic_reg_used != INVALID_REGNUM)
10397 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10399 pic_reg_used = true;
10406 if (ix86_cmodel == CM_LARGE_PIC)
10408 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
10409 rtx label = gen_label_rtx ();
10410 emit_label (label);
10411 LABEL_PRESERVE_P (label) = 1;
10412 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10413 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
10414 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10415 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
10416 pic_offset_table_rtx, tmp_reg));
10419 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10422 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10425 /* In the pic_reg_used case, make sure that the got load isn't deleted
10426 when mcount needs it. Blockage to avoid call movement across mcount
10427 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10429 if (crtl->profile && !flag_fentry && pic_reg_used)
10430 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10432 if (crtl->drap_reg && !crtl->stack_realign_needed)
10434 /* vDRAP is setup but after reload it turns out stack realign
10435 isn't necessary, here we will emit prologue to setup DRAP
10436 without stack realign adjustment */
10437 t = choose_baseaddr (0);
10438 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10441 /* Prevent instructions from being scheduled into register save push
10442 sequence when access to the redzone area is done through frame pointer.
10443 The offset between the frame pointer and the stack pointer is calculated
10444 relative to the value of the stack pointer at the end of the function
10445 prologue, and moving instructions that access redzone area via frame
10446 pointer inside push sequence violates this assumption. */
10447 if (frame_pointer_needed && frame.red_zone_size)
10448 emit_insn (gen_memory_blockage ());
10450 /* Emit cld instruction if stringops are used in the function. */
10451 if (TARGET_CLD && ix86_current_function_needs_cld)
10452 emit_insn (gen_cld ());
10454 /* SEH requires that the prologue end within 256 bytes of the start of
10455 the function. Prevent instruction schedules that would extend that. */
10457 emit_insn (gen_blockage ());
10460 /* Emit code to restore REG using a POP insn. */
10463 ix86_emit_restore_reg_using_pop (rtx reg)
10465 struct machine_function *m = cfun->machine;
10466 rtx insn = emit_insn (gen_pop (reg));
10468 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10469 m->fs.sp_offset -= UNITS_PER_WORD;
10471 if (m->fs.cfa_reg == crtl->drap_reg
10472 && REGNO (reg) == REGNO (crtl->drap_reg))
10474 /* Previously we'd represented the CFA as an expression
10475 like *(%ebp - 8). We've just popped that value from
10476 the stack, which means we need to reset the CFA to
10477 the drap register. This will remain until we restore
10478 the stack pointer. */
10479 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10480 RTX_FRAME_RELATED_P (insn) = 1;
10482 /* This means that the DRAP register is valid for addressing too. */
10483 m->fs.drap_valid = true;
10487 if (m->fs.cfa_reg == stack_pointer_rtx)
10489 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10490 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10491 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10492 RTX_FRAME_RELATED_P (insn) = 1;
10494 m->fs.cfa_offset -= UNITS_PER_WORD;
10497 /* When the frame pointer is the CFA, and we pop it, we are
10498 swapping back to the stack pointer as the CFA. This happens
10499 for stack frames that don't allocate other data, so we assume
10500 the stack pointer is now pointing at the return address, i.e.
10501 the function entry state, which makes the offset be 1 word. */
10502 if (reg == hard_frame_pointer_rtx)
10504 m->fs.fp_valid = false;
10505 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10507 m->fs.cfa_reg = stack_pointer_rtx;
10508 m->fs.cfa_offset -= UNITS_PER_WORD;
10510 add_reg_note (insn, REG_CFA_DEF_CFA,
10511 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10512 GEN_INT (m->fs.cfa_offset)));
10513 RTX_FRAME_RELATED_P (insn) = 1;
10518 /* Emit code to restore saved registers using POP insns. */
10521 ix86_emit_restore_regs_using_pop (void)
10523 unsigned int regno;
10525 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10526 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10527 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
10530 /* Emit code and notes for the LEAVE instruction. */
10533 ix86_emit_leave (void)
10535 struct machine_function *m = cfun->machine;
10536 rtx insn = emit_insn (ix86_gen_leave ());
10538 ix86_add_queued_cfa_restore_notes (insn);
10540 gcc_assert (m->fs.fp_valid);
10541 m->fs.sp_valid = true;
10542 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10543 m->fs.fp_valid = false;
10545 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10547 m->fs.cfa_reg = stack_pointer_rtx;
10548 m->fs.cfa_offset = m->fs.sp_offset;
10550 add_reg_note (insn, REG_CFA_DEF_CFA,
10551 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
10552 RTX_FRAME_RELATED_P (insn) = 1;
10553 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10558 /* Emit code to restore saved registers using MOV insns.
10559 First register is restored from CFA - CFA_OFFSET. */
10561 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10562 int maybe_eh_return)
10564 struct machine_function *m = cfun->machine;
10565 unsigned int regno;
10567 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10568 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10570 rtx reg = gen_rtx_REG (Pmode, regno);
10573 mem = choose_baseaddr (cfa_offset);
10574 mem = gen_frame_mem (Pmode, mem);
10575 insn = emit_move_insn (reg, mem);
10577 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10579 /* Previously we'd represented the CFA as an expression
10580 like *(%ebp - 8). We've just popped that value from
10581 the stack, which means we need to reset the CFA to
10582 the drap register. This will remain until we restore
10583 the stack pointer. */
10584 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10585 RTX_FRAME_RELATED_P (insn) = 1;
10587 /* This means that the DRAP register is valid for addressing. */
10588 m->fs.drap_valid = true;
10591 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10593 cfa_offset -= UNITS_PER_WORD;
10597 /* Emit code to restore saved registers using MOV insns.
10598 First register is restored from CFA - CFA_OFFSET. */
10600 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10601 int maybe_eh_return)
10603 unsigned int regno;
10605 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10606 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10608 rtx reg = gen_rtx_REG (V4SFmode, regno);
10611 mem = choose_baseaddr (cfa_offset);
10612 mem = gen_rtx_MEM (V4SFmode, mem);
10613 set_mem_align (mem, 128);
10614 emit_move_insn (reg, mem);
10616 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10622 /* Restore function stack, frame, and registers. */
10625 ix86_expand_epilogue (int style)
10627 struct machine_function *m = cfun->machine;
10628 struct machine_frame_state frame_state_save = m->fs;
10629 struct ix86_frame frame;
10630 bool restore_regs_via_mov;
10633 ix86_finalize_stack_realign_flags ();
10634 ix86_compute_frame_layout (&frame);
10636 m->fs.sp_valid = (!frame_pointer_needed
10637 || (current_function_sp_is_unchanging
10638 && !stack_realign_fp));
10639 gcc_assert (!m->fs.sp_valid
10640 || m->fs.sp_offset == frame.stack_pointer_offset);
10642 /* The FP must be valid if the frame pointer is present. */
10643 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10644 gcc_assert (!m->fs.fp_valid
10645 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10647 /* We must have *some* valid pointer to the stack frame. */
10648 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10650 /* The DRAP is never valid at this point. */
10651 gcc_assert (!m->fs.drap_valid);
10653 /* See the comment about red zone and frame
10654 pointer usage in ix86_expand_prologue. */
10655 if (frame_pointer_needed && frame.red_zone_size)
10656 emit_insn (gen_memory_blockage ());
10658 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10659 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10661 /* Determine the CFA offset of the end of the red-zone. */
10662 m->fs.red_zone_offset = 0;
10663 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10665 /* The red-zone begins below the return address. */
10666 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10668 /* When the register save area is in the aligned portion of
10669 the stack, determine the maximum runtime displacement that
10670 matches up with the aligned frame. */
10671 if (stack_realign_drap)
10672 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10676 /* Special care must be taken for the normal return case of a function
10677 using eh_return: the eax and edx registers are marked as saved, but
10678 not restored along this path. Adjust the save location to match. */
10679 if (crtl->calls_eh_return && style != 2)
10680 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10682 /* EH_RETURN requires the use of moves to function properly. */
10683 if (crtl->calls_eh_return)
10684 restore_regs_via_mov = true;
10685 /* SEH requires the use of pops to identify the epilogue. */
10686 else if (TARGET_SEH)
10687 restore_regs_via_mov = false;
10688 /* If we're only restoring one register and sp is not valid then
10689 using a move instruction to restore the register since it's
10690 less work than reloading sp and popping the register. */
10691 else if (!m->fs.sp_valid && frame.nregs <= 1)
10692 restore_regs_via_mov = true;
10693 else if (TARGET_EPILOGUE_USING_MOVE
10694 && cfun->machine->use_fast_prologue_epilogue
10695 && (frame.nregs > 1
10696 || m->fs.sp_offset != frame.reg_save_offset))
10697 restore_regs_via_mov = true;
10698 else if (frame_pointer_needed
10700 && m->fs.sp_offset != frame.reg_save_offset)
10701 restore_regs_via_mov = true;
10702 else if (frame_pointer_needed
10703 && TARGET_USE_LEAVE
10704 && cfun->machine->use_fast_prologue_epilogue
10705 && frame.nregs == 1)
10706 restore_regs_via_mov = true;
10708 restore_regs_via_mov = false;
10710 if (restore_regs_via_mov || frame.nsseregs)
10712 /* Ensure that the entire register save area is addressable via
10713 the stack pointer, if we will restore via sp. */
10715 && m->fs.sp_offset > 0x7fffffff
10716 && !(m->fs.fp_valid || m->fs.drap_valid)
10717 && (frame.nsseregs + frame.nregs) != 0)
10719 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10720 GEN_INT (m->fs.sp_offset
10721 - frame.sse_reg_save_offset),
10723 m->fs.cfa_reg == stack_pointer_rtx);
10727 /* If there are any SSE registers to restore, then we have to do it
10728 via moves, since there's obviously no pop for SSE regs. */
10729 if (frame.nsseregs)
10730 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10733 if (restore_regs_via_mov)
10738 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10740 /* eh_return epilogues need %ecx added to the stack pointer. */
10743 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10745 /* Stack align doesn't work with eh_return. */
10746 gcc_assert (!stack_realign_drap);
10747 /* Neither does regparm nested functions. */
10748 gcc_assert (!ix86_static_chain_on_stack);
10750 if (frame_pointer_needed)
10752 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10753 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10754 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10756 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10757 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10759 /* Note that we use SA as a temporary CFA, as the return
10760 address is at the proper place relative to it. We
10761 pretend this happens at the FP restore insn because
10762 prior to this insn the FP would be stored at the wrong
10763 offset relative to SA, and after this insn we have no
10764 other reasonable register to use for the CFA. We don't
10765 bother resetting the CFA to the SP for the duration of
10766 the return insn. */
10767 add_reg_note (insn, REG_CFA_DEF_CFA,
10768 plus_constant (sa, UNITS_PER_WORD));
10769 ix86_add_queued_cfa_restore_notes (insn);
10770 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10771 RTX_FRAME_RELATED_P (insn) = 1;
10773 m->fs.cfa_reg = sa;
10774 m->fs.cfa_offset = UNITS_PER_WORD;
10775 m->fs.fp_valid = false;
10777 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10778 const0_rtx, style, false);
10782 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10783 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10784 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10785 ix86_add_queued_cfa_restore_notes (insn);
10787 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10788 if (m->fs.cfa_offset != UNITS_PER_WORD)
10790 m->fs.cfa_offset = UNITS_PER_WORD;
10791 add_reg_note (insn, REG_CFA_DEF_CFA,
10792 plus_constant (stack_pointer_rtx,
10794 RTX_FRAME_RELATED_P (insn) = 1;
10797 m->fs.sp_offset = UNITS_PER_WORD;
10798 m->fs.sp_valid = true;
10803 /* SEH requires that the function end with (1) a stack adjustment
10804 if necessary, (2) a sequence of pops, and (3) a return or
10805 jump instruction. Prevent insns from the function body from
10806 being scheduled into this sequence. */
10809 /* Prevent a catch region from being adjacent to the standard
10810 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10811 several other flags that would be interesting to test are
10813 if (flag_non_call_exceptions)
10814 emit_insn (gen_nops (const1_rtx));
10816 emit_insn (gen_blockage ());
10819 /* First step is to deallocate the stack frame so that we can
10820 pop the registers. */
10821 if (!m->fs.sp_valid)
10823 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10824 GEN_INT (m->fs.fp_offset
10825 - frame.reg_save_offset),
10828 else if (m->fs.sp_offset != frame.reg_save_offset)
10830 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10831 GEN_INT (m->fs.sp_offset
10832 - frame.reg_save_offset),
10834 m->fs.cfa_reg == stack_pointer_rtx);
10837 ix86_emit_restore_regs_using_pop ();
10840 /* If we used a stack pointer and haven't already got rid of it,
10842 if (m->fs.fp_valid)
10844 /* If the stack pointer is valid and pointing at the frame
10845 pointer store address, then we only need a pop. */
10846 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
10847 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10848 /* Leave results in shorter dependency chains on CPUs that are
10849 able to grok it fast. */
10850 else if (TARGET_USE_LEAVE
10851 || optimize_function_for_size_p (cfun)
10852 || !cfun->machine->use_fast_prologue_epilogue)
10853 ix86_emit_leave ();
10856 pro_epilogue_adjust_stack (stack_pointer_rtx,
10857 hard_frame_pointer_rtx,
10858 const0_rtx, style, !using_drap);
10859 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10865 int param_ptr_offset = UNITS_PER_WORD;
10868 gcc_assert (stack_realign_drap);
10870 if (ix86_static_chain_on_stack)
10871 param_ptr_offset += UNITS_PER_WORD;
10872 if (!call_used_regs[REGNO (crtl->drap_reg)])
10873 param_ptr_offset += UNITS_PER_WORD;
10875 insn = emit_insn (gen_rtx_SET
10876 (VOIDmode, stack_pointer_rtx,
10877 gen_rtx_PLUS (Pmode,
10879 GEN_INT (-param_ptr_offset))));
10880 m->fs.cfa_reg = stack_pointer_rtx;
10881 m->fs.cfa_offset = param_ptr_offset;
10882 m->fs.sp_offset = param_ptr_offset;
10883 m->fs.realigned = false;
10885 add_reg_note (insn, REG_CFA_DEF_CFA,
10886 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10887 GEN_INT (param_ptr_offset)));
10888 RTX_FRAME_RELATED_P (insn) = 1;
10890 if (!call_used_regs[REGNO (crtl->drap_reg)])
10891 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10894 /* At this point the stack pointer must be valid, and we must have
10895 restored all of the registers. We may not have deallocated the
10896 entire stack frame. We've delayed this until now because it may
10897 be possible to merge the local stack deallocation with the
10898 deallocation forced by ix86_static_chain_on_stack. */
10899 gcc_assert (m->fs.sp_valid);
10900 gcc_assert (!m->fs.fp_valid);
10901 gcc_assert (!m->fs.realigned);
10902 if (m->fs.sp_offset != UNITS_PER_WORD)
10904 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10905 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10909 /* Sibcall epilogues don't want a return instruction. */
10912 m->fs = frame_state_save;
10916 /* Emit vzeroupper if needed. */
10917 if (TARGET_VZEROUPPER
10918 && cfun->machine->use_avx256_p
10919 && !cfun->machine->caller_return_avx256_p)
10921 cfun->machine->use_vzeroupper_p = 1;
10922 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
10925 if (crtl->args.pops_args && crtl->args.size)
10927 rtx popc = GEN_INT (crtl->args.pops_args);
10929 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10930 address, do explicit add, and jump indirectly to the caller. */
10932 if (crtl->args.pops_args >= 65536)
10934 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10937 /* There is no "pascal" calling convention in any 64bit ABI. */
10938 gcc_assert (!TARGET_64BIT);
10940 insn = emit_insn (gen_pop (ecx));
10941 m->fs.cfa_offset -= UNITS_PER_WORD;
10942 m->fs.sp_offset -= UNITS_PER_WORD;
10944 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10945 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10946 add_reg_note (insn, REG_CFA_REGISTER,
10947 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10948 RTX_FRAME_RELATED_P (insn) = 1;
10950 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10952 emit_jump_insn (gen_return_indirect_internal (ecx));
10955 emit_jump_insn (gen_return_pop_internal (popc));
10958 emit_jump_insn (gen_return_internal ());
10960 /* Restore the state back to the state from the prologue,
10961 so that it's correct for the next epilogue. */
10962 m->fs = frame_state_save;
10965 /* Reset from the function's potential modifications. */
10968 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10969 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10971 if (pic_offset_table_rtx)
10972 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10974 /* Mach-O doesn't support labels at the end of objects, so if
10975 it looks like we might want one, insert a NOP. */
10977 rtx insn = get_last_insn ();
10980 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10981 insn = PREV_INSN (insn);
10985 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10986 fputs ("\tnop\n", file);
10992 /* Return a scratch register to use in the split stack prologue. The
10993 split stack prologue is used for -fsplit-stack. It is the first
10994 instructions in the function, even before the regular prologue.
10995 The scratch register can be any caller-saved register which is not
10996 used for parameters or for the static chain. */
10998 static unsigned int
10999 split_stack_prologue_scratch_regno (void)
11008 is_fastcall = (lookup_attribute ("fastcall",
11009 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11011 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11015 if (DECL_STATIC_CHAIN (cfun->decl))
11017 sorry ("-fsplit-stack does not support fastcall with "
11018 "nested function");
11019 return INVALID_REGNUM;
11023 else if (regparm < 3)
11025 if (!DECL_STATIC_CHAIN (cfun->decl))
11031 sorry ("-fsplit-stack does not support 2 register "
11032 " parameters for a nested function");
11033 return INVALID_REGNUM;
11040 /* FIXME: We could make this work by pushing a register
11041 around the addition and comparison. */
11042 sorry ("-fsplit-stack does not support 3 register parameters");
11043 return INVALID_REGNUM;
11048 /* A SYMBOL_REF for the function which allocates new stackspace for
11051 static GTY(()) rtx split_stack_fn;
11053 /* Handle -fsplit-stack. These are the first instructions in the
11054 function, even before the regular prologue. */
11057 ix86_expand_split_stack_prologue (void)
11059 struct ix86_frame frame;
11060 HOST_WIDE_INT allocate;
11062 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11063 rtx scratch_reg = NULL_RTX;
11064 rtx varargs_label = NULL_RTX;
11066 gcc_assert (flag_split_stack && reload_completed);
11068 ix86_finalize_stack_realign_flags ();
11069 ix86_compute_frame_layout (&frame);
11070 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11072 /* This is the label we will branch to if we have enough stack
11073 space. We expect the basic block reordering pass to reverse this
11074 branch if optimizing, so that we branch in the unlikely case. */
11075 label = gen_label_rtx ();
11077 /* We need to compare the stack pointer minus the frame size with
11078 the stack boundary in the TCB. The stack boundary always gives
11079 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11080 can compare directly. Otherwise we need to do an addition. */
11082 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11083 UNSPEC_STACK_CHECK);
11084 limit = gen_rtx_CONST (Pmode, limit);
11085 limit = gen_rtx_MEM (Pmode, limit);
11086 if (allocate < SPLIT_STACK_AVAILABLE)
11087 current = stack_pointer_rtx;
11090 unsigned int scratch_regno;
11093 /* We need a scratch register to hold the stack pointer minus
11094 the required frame size. Since this is the very start of the
11095 function, the scratch register can be any caller-saved
11096 register which is not used for parameters. */
11097 offset = GEN_INT (- allocate);
11098 scratch_regno = split_stack_prologue_scratch_regno ();
11099 if (scratch_regno == INVALID_REGNUM)
11101 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11102 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11104 /* We don't use ix86_gen_add3 in this case because it will
11105 want to split to lea, but when not optimizing the insn
11106 will not be split after this point. */
11107 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11108 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11113 emit_move_insn (scratch_reg, offset);
11114 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
11115 stack_pointer_rtx));
11117 current = scratch_reg;
11120 ix86_expand_branch (GEU, current, limit, label);
11121 jump_insn = get_last_insn ();
11122 JUMP_LABEL (jump_insn) = label;
11124 /* Mark the jump as very likely to be taken. */
11125 add_reg_note (jump_insn, REG_BR_PROB,
11126 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11128 /* Get more stack space. We pass in the desired stack space and the
11129 size of the arguments to copy to the new stack. In 32-bit mode
11130 we push the parameters; __morestack will return on a new stack
11131 anyhow. In 64-bit mode we pass the parameters in r10 and
11133 allocate_rtx = GEN_INT (allocate);
11134 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11135 call_fusage = NULL_RTX;
11140 reg = gen_rtx_REG (Pmode, R10_REG);
11142 /* If this function uses a static chain, it will be in %r10.
11143 Preserve it across the call to __morestack. */
11144 if (DECL_STATIC_CHAIN (cfun->decl))
11148 rax = gen_rtx_REG (Pmode, AX_REG);
11149 emit_move_insn (rax, reg);
11150 use_reg (&call_fusage, rax);
11153 emit_move_insn (reg, allocate_rtx);
11154 use_reg (&call_fusage, reg);
11155 reg = gen_rtx_REG (Pmode, R11_REG);
11156 emit_move_insn (reg, GEN_INT (args_size));
11157 use_reg (&call_fusage, reg);
11161 emit_insn (gen_push (GEN_INT (args_size)));
11162 emit_insn (gen_push (allocate_rtx));
11164 if (split_stack_fn == NULL_RTX)
11165 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11166 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
11167 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11169 add_function_usage_to (call_insn, call_fusage);
11171 /* In order to make call/return prediction work right, we now need
11172 to execute a return instruction. See
11173 libgcc/config/i386/morestack.S for the details on how this works.
11175 For flow purposes gcc must not see this as a return
11176 instruction--we need control flow to continue at the subsequent
11177 label. Therefore, we use an unspec. */
11178 gcc_assert (crtl->args.pops_args < 65536);
11179 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11181 /* If we are in 64-bit mode and this function uses a static chain,
11182 we saved %r10 in %rax before calling _morestack. */
11183 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11184 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
11185 gen_rtx_REG (Pmode, AX_REG));
11187 /* If this function calls va_start, we need to store a pointer to
11188 the arguments on the old stack, because they may not have been
11189 all copied to the new stack. At this point the old stack can be
11190 found at the frame pointer value used by __morestack, because
11191 __morestack has set that up before calling back to us. Here we
11192 store that pointer in a scratch register, and in
11193 ix86_expand_prologue we store the scratch register in a stack
11195 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11197 unsigned int scratch_regno;
11201 scratch_regno = split_stack_prologue_scratch_regno ();
11202 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11203 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11207 return address within this function
11208 return address of caller of this function
11210 So we add three words to get to the stack arguments.
11214 return address within this function
11215 first argument to __morestack
11216 second argument to __morestack
11217 return address of caller of this function
11219 So we add five words to get to the stack arguments.
11221 words = TARGET_64BIT ? 3 : 5;
11222 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11223 gen_rtx_PLUS (Pmode, frame_reg,
11224 GEN_INT (words * UNITS_PER_WORD))));
11226 varargs_label = gen_label_rtx ();
11227 emit_jump_insn (gen_jump (varargs_label));
11228 JUMP_LABEL (get_last_insn ()) = varargs_label;
11233 emit_label (label);
11234 LABEL_NUSES (label) = 1;
11236 /* If this function calls va_start, we now have to set the scratch
11237 register for the case where we do not call __morestack. In this
11238 case we need to set it based on the stack pointer. */
11239 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11241 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11242 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11243 GEN_INT (UNITS_PER_WORD))));
11245 emit_label (varargs_label);
11246 LABEL_NUSES (varargs_label) = 1;
11250 /* We may have to tell the dataflow pass that the split stack prologue
11251 is initializing a scratch register. */
11254 ix86_live_on_entry (bitmap regs)
11256 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11258 gcc_assert (flag_split_stack);
11259 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11263 /* Extract the parts of an RTL expression that is a valid memory address
11264 for an instruction. Return 0 if the structure of the address is
11265 grossly off. Return -1 if the address contains ASHIFT, so it is not
11266 strictly valid, but still used for computing length of lea instruction. */
11269 ix86_decompose_address (rtx addr, struct ix86_address *out)
11271 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11272 rtx base_reg, index_reg;
11273 HOST_WIDE_INT scale = 1;
11274 rtx scale_rtx = NULL_RTX;
11277 enum ix86_address_seg seg = SEG_DEFAULT;
11279 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
11281 else if (GET_CODE (addr) == PLUS)
11283 rtx addends[4], op;
11291 addends[n++] = XEXP (op, 1);
11294 while (GET_CODE (op) == PLUS);
11299 for (i = n; i >= 0; --i)
11302 switch (GET_CODE (op))
11307 index = XEXP (op, 0);
11308 scale_rtx = XEXP (op, 1);
11314 index = XEXP (op, 0);
11315 tmp = XEXP (op, 1);
11316 if (!CONST_INT_P (tmp))
11318 scale = INTVAL (tmp);
11319 if ((unsigned HOST_WIDE_INT) scale > 3)
11321 scale = 1 << scale;
11325 if (XINT (op, 1) == UNSPEC_TP
11326 && TARGET_TLS_DIRECT_SEG_REFS
11327 && seg == SEG_DEFAULT)
11328 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
11357 else if (GET_CODE (addr) == MULT)
11359 index = XEXP (addr, 0); /* index*scale */
11360 scale_rtx = XEXP (addr, 1);
11362 else if (GET_CODE (addr) == ASHIFT)
11364 /* We're called for lea too, which implements ashift on occasion. */
11365 index = XEXP (addr, 0);
11366 tmp = XEXP (addr, 1);
11367 if (!CONST_INT_P (tmp))
11369 scale = INTVAL (tmp);
11370 if ((unsigned HOST_WIDE_INT) scale > 3)
11372 scale = 1 << scale;
11376 disp = addr; /* displacement */
11378 /* Extract the integral value of scale. */
11381 if (!CONST_INT_P (scale_rtx))
11383 scale = INTVAL (scale_rtx);
11386 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11387 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11389 /* Avoid useless 0 displacement. */
11390 if (disp == const0_rtx && (base || index))
11393 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11394 if (base_reg && index_reg && scale == 1
11395 && (index_reg == arg_pointer_rtx
11396 || index_reg == frame_pointer_rtx
11397 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11400 tmp = base, base = index, index = tmp;
11401 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11404 /* Special case: %ebp cannot be encoded as a base without a displacement.
11408 && (base_reg == hard_frame_pointer_rtx
11409 || base_reg == frame_pointer_rtx
11410 || base_reg == arg_pointer_rtx
11411 || (REG_P (base_reg)
11412 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11413 || REGNO (base_reg) == R13_REG))))
11416 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11417 Avoid this by transforming to [%esi+0].
11418 Reload calls address legitimization without cfun defined, so we need
11419 to test cfun for being non-NULL. */
11420 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11421 && base_reg && !index_reg && !disp
11422 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11425 /* Special case: encode reg+reg instead of reg*2. */
11426 if (!base && index && scale == 2)
11427 base = index, base_reg = index_reg, scale = 1;
11429 /* Special case: scaling cannot be encoded without base or displacement. */
11430 if (!base && !disp && index && scale != 1)
11434 out->index = index;
11436 out->scale = scale;
11442 /* Return cost of the memory address x.
11443 For i386, it is better to use a complex address than let gcc copy
11444 the address into a reg and make a new pseudo. But not if the address
11445 requires to two regs - that would mean more pseudos with longer
11448 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
11450 struct ix86_address parts;
11452 int ok = ix86_decompose_address (x, &parts);
11456 if (parts.base && GET_CODE (parts.base) == SUBREG)
11457 parts.base = SUBREG_REG (parts.base);
11458 if (parts.index && GET_CODE (parts.index) == SUBREG)
11459 parts.index = SUBREG_REG (parts.index);
11461 /* Attempt to minimize number of registers in the address. */
11463 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11465 && (!REG_P (parts.index)
11466 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11470 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11472 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11473 && parts.base != parts.index)
11476 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11477 since it's predecode logic can't detect the length of instructions
11478 and it degenerates to vector decoded. Increase cost of such
11479 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11480 to split such addresses or even refuse such addresses at all.
11482 Following addressing modes are affected:
11487 The first and last case may be avoidable by explicitly coding the zero in
11488 memory address, but I don't have AMD-K6 machine handy to check this
11492 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11493 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11494 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11500 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11501 this is used for to form addresses to local data when -fPIC is in
11505 darwin_local_data_pic (rtx disp)
11507 return (GET_CODE (disp) == UNSPEC
11508 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
11511 /* Determine if a given RTX is a valid constant. We already know this
11512 satisfies CONSTANT_P. */
11515 legitimate_constant_p (rtx x)
11517 switch (GET_CODE (x))
11522 if (GET_CODE (x) == PLUS)
11524 if (!CONST_INT_P (XEXP (x, 1)))
11529 if (TARGET_MACHO && darwin_local_data_pic (x))
11532 /* Only some unspecs are valid as "constants". */
11533 if (GET_CODE (x) == UNSPEC)
11534 switch (XINT (x, 1))
11537 case UNSPEC_GOTOFF:
11538 case UNSPEC_PLTOFF:
11539 return TARGET_64BIT;
11541 case UNSPEC_NTPOFF:
11542 x = XVECEXP (x, 0, 0);
11543 return (GET_CODE (x) == SYMBOL_REF
11544 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11545 case UNSPEC_DTPOFF:
11546 x = XVECEXP (x, 0, 0);
11547 return (GET_CODE (x) == SYMBOL_REF
11548 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
11553 /* We must have drilled down to a symbol. */
11554 if (GET_CODE (x) == LABEL_REF)
11556 if (GET_CODE (x) != SYMBOL_REF)
11561 /* TLS symbols are never valid. */
11562 if (SYMBOL_REF_TLS_MODEL (x))
11565 /* DLLIMPORT symbols are never valid. */
11566 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11567 && SYMBOL_REF_DLLIMPORT_P (x))
11571 /* mdynamic-no-pic */
11572 if (MACHO_DYNAMIC_NO_PIC_P)
11573 return machopic_symbol_defined_p (x);
11578 if (GET_MODE (x) == TImode
11579 && x != CONST0_RTX (TImode)
11585 if (!standard_sse_constant_p (x))
11592 /* Otherwise we handle everything else in the move patterns. */
11596 /* Determine if it's legal to put X into the constant pool. This
11597 is not possible for the address of thread-local symbols, which
11598 is checked above. */
11601 ix86_cannot_force_const_mem (rtx x)
11603 /* We can always put integral constants and vectors in memory. */
11604 switch (GET_CODE (x))
11614 return !legitimate_constant_p (x);
11618 /* Nonzero if the constant value X is a legitimate general operand
11619 when generating PIC code. It is given that flag_pic is on and
11620 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11623 legitimate_pic_operand_p (rtx x)
11627 switch (GET_CODE (x))
11630 inner = XEXP (x, 0);
11631 if (GET_CODE (inner) == PLUS
11632 && CONST_INT_P (XEXP (inner, 1)))
11633 inner = XEXP (inner, 0);
11635 /* Only some unspecs are valid as "constants". */
11636 if (GET_CODE (inner) == UNSPEC)
11637 switch (XINT (inner, 1))
11640 case UNSPEC_GOTOFF:
11641 case UNSPEC_PLTOFF:
11642 return TARGET_64BIT;
11644 x = XVECEXP (inner, 0, 0);
11645 return (GET_CODE (x) == SYMBOL_REF
11646 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11647 case UNSPEC_MACHOPIC_OFFSET:
11648 return legitimate_pic_address_disp_p (x);
11656 return legitimate_pic_address_disp_p (x);
11663 /* Determine if a given CONST RTX is a valid memory displacement
11667 legitimate_pic_address_disp_p (rtx disp)
11671 /* In 64bit mode we can allow direct addresses of symbols and labels
11672 when they are not dynamic symbols. */
11675 rtx op0 = disp, op1;
11677 switch (GET_CODE (disp))
11683 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11685 op0 = XEXP (XEXP (disp, 0), 0);
11686 op1 = XEXP (XEXP (disp, 0), 1);
11687 if (!CONST_INT_P (op1)
11688 || INTVAL (op1) >= 16*1024*1024
11689 || INTVAL (op1) < -16*1024*1024)
11691 if (GET_CODE (op0) == LABEL_REF)
11693 if (GET_CODE (op0) != SYMBOL_REF)
11698 /* TLS references should always be enclosed in UNSPEC. */
11699 if (SYMBOL_REF_TLS_MODEL (op0))
11701 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11702 && ix86_cmodel != CM_LARGE_PIC)
11710 if (GET_CODE (disp) != CONST)
11712 disp = XEXP (disp, 0);
11716 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11717 of GOT tables. We should not need these anyway. */
11718 if (GET_CODE (disp) != UNSPEC
11719 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11720 && XINT (disp, 1) != UNSPEC_GOTOFF
11721 && XINT (disp, 1) != UNSPEC_PLTOFF))
11724 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11725 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11731 if (GET_CODE (disp) == PLUS)
11733 if (!CONST_INT_P (XEXP (disp, 1)))
11735 disp = XEXP (disp, 0);
11739 if (TARGET_MACHO && darwin_local_data_pic (disp))
11742 if (GET_CODE (disp) != UNSPEC)
11745 switch (XINT (disp, 1))
11750 /* We need to check for both symbols and labels because VxWorks loads
11751 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11753 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11754 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11755 case UNSPEC_GOTOFF:
11756 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11757 While ABI specify also 32bit relocation but we don't produce it in
11758 small PIC model at all. */
11759 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11760 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11762 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11764 case UNSPEC_GOTTPOFF:
11765 case UNSPEC_GOTNTPOFF:
11766 case UNSPEC_INDNTPOFF:
11769 disp = XVECEXP (disp, 0, 0);
11770 return (GET_CODE (disp) == SYMBOL_REF
11771 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11772 case UNSPEC_NTPOFF:
11773 disp = XVECEXP (disp, 0, 0);
11774 return (GET_CODE (disp) == SYMBOL_REF
11775 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11776 case UNSPEC_DTPOFF:
11777 disp = XVECEXP (disp, 0, 0);
11778 return (GET_CODE (disp) == SYMBOL_REF
11779 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11785 /* Recognizes RTL expressions that are valid memory addresses for an
11786 instruction. The MODE argument is the machine mode for the MEM
11787 expression that wants to use this address.
11789 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11790 convert common non-canonical forms to canonical form so that they will
11794 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11795 rtx addr, bool strict)
11797 struct ix86_address parts;
11798 rtx base, index, disp;
11799 HOST_WIDE_INT scale;
11801 if (ix86_decompose_address (addr, &parts) <= 0)
11802 /* Decomposition failed. */
11806 index = parts.index;
11808 scale = parts.scale;
11810 /* Validate base register.
11812 Don't allow SUBREG's that span more than a word here. It can lead to spill
11813 failures when the base is one word out of a two word structure, which is
11814 represented internally as a DImode int. */
11822 else if (GET_CODE (base) == SUBREG
11823 && REG_P (SUBREG_REG (base))
11824 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11826 reg = SUBREG_REG (base);
11828 /* Base is not a register. */
11831 if (GET_MODE (base) != Pmode)
11832 /* Base is not in Pmode. */
11835 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11836 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11837 /* Base is not valid. */
11841 /* Validate index register.
11843 Don't allow SUBREG's that span more than a word here -- same as above. */
11851 else if (GET_CODE (index) == SUBREG
11852 && REG_P (SUBREG_REG (index))
11853 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11855 reg = SUBREG_REG (index);
11857 /* Index is not a register. */
11860 if (GET_MODE (index) != Pmode)
11861 /* Index is not in Pmode. */
11864 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11865 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11866 /* Index is not valid. */
11870 /* Validate scale factor. */
11874 /* Scale without index. */
11877 if (scale != 2 && scale != 4 && scale != 8)
11878 /* Scale is not a valid multiplier. */
11882 /* Validate displacement. */
11885 if (GET_CODE (disp) == CONST
11886 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11887 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11888 switch (XINT (XEXP (disp, 0), 1))
11890 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11891 used. While ABI specify also 32bit relocations, we don't produce
11892 them at all and use IP relative instead. */
11894 case UNSPEC_GOTOFF:
11895 gcc_assert (flag_pic);
11897 goto is_legitimate_pic;
11899 /* 64bit address unspec. */
11902 case UNSPEC_GOTPCREL:
11903 gcc_assert (flag_pic);
11904 goto is_legitimate_pic;
11906 case UNSPEC_GOTTPOFF:
11907 case UNSPEC_GOTNTPOFF:
11908 case UNSPEC_INDNTPOFF:
11909 case UNSPEC_NTPOFF:
11910 case UNSPEC_DTPOFF:
11913 case UNSPEC_STACK_CHECK:
11914 gcc_assert (flag_split_stack);
11918 /* Invalid address unspec. */
11922 else if (SYMBOLIC_CONST (disp)
11926 && MACHOPIC_INDIRECT
11927 && !machopic_operand_p (disp)
11933 if (TARGET_64BIT && (index || base))
11935 /* foo@dtpoff(%rX) is ok. */
11936 if (GET_CODE (disp) != CONST
11937 || GET_CODE (XEXP (disp, 0)) != PLUS
11938 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11939 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11940 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11941 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11942 /* Non-constant pic memory reference. */
11945 else if ((!TARGET_MACHO || flag_pic)
11946 && ! legitimate_pic_address_disp_p (disp))
11947 /* Displacement is an invalid pic construct. */
11950 else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
11951 /* displacment must be referenced via non_lazy_pointer */
11955 /* This code used to verify that a symbolic pic displacement
11956 includes the pic_offset_table_rtx register.
11958 While this is good idea, unfortunately these constructs may
11959 be created by "adds using lea" optimization for incorrect
11968 This code is nonsensical, but results in addressing
11969 GOT table with pic_offset_table_rtx base. We can't
11970 just refuse it easily, since it gets matched by
11971 "addsi3" pattern, that later gets split to lea in the
11972 case output register differs from input. While this
11973 can be handled by separate addsi pattern for this case
11974 that never results in lea, this seems to be easier and
11975 correct fix for crash to disable this test. */
11977 else if (GET_CODE (disp) != LABEL_REF
11978 && !CONST_INT_P (disp)
11979 && (GET_CODE (disp) != CONST
11980 || !legitimate_constant_p (disp))
11981 && (GET_CODE (disp) != SYMBOL_REF
11982 || !legitimate_constant_p (disp)))
11983 /* Displacement is not constant. */
11985 else if (TARGET_64BIT
11986 && !x86_64_immediate_operand (disp, VOIDmode))
11987 /* Displacement is out of range. */
11991 /* Everything looks valid. */
11995 /* Determine if a given RTX is a valid constant address. */
11998 constant_address_p (rtx x)
12000 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12003 /* Return a unique alias set for the GOT. */
12005 static alias_set_type
12006 ix86_GOT_alias_set (void)
12008 static alias_set_type set = -1;
12010 set = new_alias_set ();
12014 /* Return a legitimate reference for ORIG (an address) using the
12015 register REG. If REG is 0, a new pseudo is generated.
12017 There are two types of references that must be handled:
12019 1. Global data references must load the address from the GOT, via
12020 the PIC reg. An insn is emitted to do this load, and the reg is
12023 2. Static data references, constant pool addresses, and code labels
12024 compute the address as an offset from the GOT, whose base is in
12025 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12026 differentiate them from global data objects. The returned
12027 address is the PIC reg + an unspec constant.
12029 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12030 reg also appears in the address. */
12033 legitimize_pic_address (rtx orig, rtx reg)
12036 rtx new_rtx = orig;
12040 if (TARGET_MACHO && !TARGET_64BIT)
12043 reg = gen_reg_rtx (Pmode);
12044 /* Use the generic Mach-O PIC machinery. */
12045 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12049 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12051 else if (TARGET_64BIT
12052 && ix86_cmodel != CM_SMALL_PIC
12053 && gotoff_operand (addr, Pmode))
12056 /* This symbol may be referenced via a displacement from the PIC
12057 base address (@GOTOFF). */
12059 if (reload_in_progress)
12060 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12061 if (GET_CODE (addr) == CONST)
12062 addr = XEXP (addr, 0);
12063 if (GET_CODE (addr) == PLUS)
12065 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12067 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12070 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12071 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12073 tmpreg = gen_reg_rtx (Pmode);
12076 emit_move_insn (tmpreg, new_rtx);
12080 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12081 tmpreg, 1, OPTAB_DIRECT);
12084 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12086 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
12088 /* This symbol may be referenced via a displacement from the PIC
12089 base address (@GOTOFF). */
12091 if (reload_in_progress)
12092 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12093 if (GET_CODE (addr) == CONST)
12094 addr = XEXP (addr, 0);
12095 if (GET_CODE (addr) == PLUS)
12097 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12099 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12102 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12103 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12104 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12108 emit_move_insn (reg, new_rtx);
12112 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12113 /* We can't use @GOTOFF for text labels on VxWorks;
12114 see gotoff_operand. */
12115 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12117 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12119 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12120 return legitimize_dllimport_symbol (addr, true);
12121 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
12122 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12123 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12125 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
12126 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12130 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12132 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12133 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12134 new_rtx = gen_const_mem (Pmode, new_rtx);
12135 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12138 reg = gen_reg_rtx (Pmode);
12139 /* Use directly gen_movsi, otherwise the address is loaded
12140 into register for CSE. We don't want to CSE this addresses,
12141 instead we CSE addresses from the GOT table, so skip this. */
12142 emit_insn (gen_movsi (reg, new_rtx));
12147 /* This symbol must be referenced via a load from the
12148 Global Offset Table (@GOT). */
12150 if (reload_in_progress)
12151 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12152 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12153 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12155 new_rtx = force_reg (Pmode, new_rtx);
12156 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12157 new_rtx = gen_const_mem (Pmode, new_rtx);
12158 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12161 reg = gen_reg_rtx (Pmode);
12162 emit_move_insn (reg, new_rtx);
12168 if (CONST_INT_P (addr)
12169 && !x86_64_immediate_operand (addr, VOIDmode))
12173 emit_move_insn (reg, addr);
12177 new_rtx = force_reg (Pmode, addr);
12179 else if (GET_CODE (addr) == CONST)
12181 addr = XEXP (addr, 0);
12183 /* We must match stuff we generate before. Assume the only
12184 unspecs that can get here are ours. Not that we could do
12185 anything with them anyway.... */
12186 if (GET_CODE (addr) == UNSPEC
12187 || (GET_CODE (addr) == PLUS
12188 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12190 gcc_assert (GET_CODE (addr) == PLUS);
12192 if (GET_CODE (addr) == PLUS)
12194 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12196 /* Check first to see if this is a constant offset from a @GOTOFF
12197 symbol reference. */
12198 if (gotoff_operand (op0, Pmode)
12199 && CONST_INT_P (op1))
12203 if (reload_in_progress)
12204 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12205 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12207 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12208 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12209 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12213 emit_move_insn (reg, new_rtx);
12219 if (INTVAL (op1) < -16*1024*1024
12220 || INTVAL (op1) >= 16*1024*1024)
12222 if (!x86_64_immediate_operand (op1, Pmode))
12223 op1 = force_reg (Pmode, op1);
12224 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12230 base = legitimize_pic_address (XEXP (addr, 0), reg);
12231 new_rtx = legitimize_pic_address (XEXP (addr, 1),
12232 base == reg ? NULL_RTX : reg);
12234 if (CONST_INT_P (new_rtx))
12235 new_rtx = plus_constant (base, INTVAL (new_rtx));
12238 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
12240 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
12241 new_rtx = XEXP (new_rtx, 1);
12243 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
12251 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12254 get_thread_pointer (int to_reg)
12258 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12262 reg = gen_reg_rtx (Pmode);
12263 insn = gen_rtx_SET (VOIDmode, reg, tp);
12264 insn = emit_insn (insn);
12269 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12270 false if we expect this to be used for a memory address and true if
12271 we expect to load the address into a register. */
12274 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
12276 rtx dest, base, off, pic, tp;
12281 case TLS_MODEL_GLOBAL_DYNAMIC:
12282 dest = gen_reg_rtx (Pmode);
12283 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12285 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12287 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
12290 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
12291 insns = get_insns ();
12294 RTL_CONST_CALL_P (insns) = 1;
12295 emit_libcall_block (insns, dest, rax, x);
12297 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12298 emit_insn (gen_tls_global_dynamic_64 (dest, x));
12300 emit_insn (gen_tls_global_dynamic_32 (dest, x));
12302 if (TARGET_GNU2_TLS)
12304 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12306 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12310 case TLS_MODEL_LOCAL_DYNAMIC:
12311 base = gen_reg_rtx (Pmode);
12312 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
12314 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
12316 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
12319 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
12320 insns = get_insns ();
12323 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
12324 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
12325 RTL_CONST_CALL_P (insns) = 1;
12326 emit_libcall_block (insns, base, rax, note);
12328 else if (TARGET_64BIT && TARGET_GNU2_TLS)
12329 emit_insn (gen_tls_local_dynamic_base_64 (base));
12331 emit_insn (gen_tls_local_dynamic_base_32 (base));
12333 if (TARGET_GNU2_TLS)
12335 rtx x = ix86_tls_module_base ();
12337 set_unique_reg_note (get_last_insn (), REG_EQUIV,
12338 gen_rtx_MINUS (Pmode, x, tp));
12341 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
12342 off = gen_rtx_CONST (Pmode, off);
12344 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
12346 if (TARGET_GNU2_TLS)
12348 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
12350 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
12355 case TLS_MODEL_INITIAL_EXEC:
12359 type = UNSPEC_GOTNTPOFF;
12363 if (reload_in_progress)
12364 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12365 pic = pic_offset_table_rtx;
12366 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
12368 else if (!TARGET_ANY_GNU_TLS)
12370 pic = gen_reg_rtx (Pmode);
12371 emit_insn (gen_set_got (pic));
12372 type = UNSPEC_GOTTPOFF;
12377 type = UNSPEC_INDNTPOFF;
12380 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
12381 off = gen_rtx_CONST (Pmode, off);
12383 off = gen_rtx_PLUS (Pmode, pic, off);
12384 off = gen_const_mem (Pmode, off);
12385 set_mem_alias_set (off, ix86_GOT_alias_set ());
12387 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12389 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12390 off = force_reg (Pmode, off);
12391 return gen_rtx_PLUS (Pmode, base, off);
12395 base = get_thread_pointer (true);
12396 dest = gen_reg_rtx (Pmode);
12397 emit_insn (gen_subsi3 (dest, base, off));
12401 case TLS_MODEL_LOCAL_EXEC:
12402 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
12403 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12404 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
12405 off = gen_rtx_CONST (Pmode, off);
12407 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
12409 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
12410 return gen_rtx_PLUS (Pmode, base, off);
12414 base = get_thread_pointer (true);
12415 dest = gen_reg_rtx (Pmode);
12416 emit_insn (gen_subsi3 (dest, base, off));
12421 gcc_unreachable ();
12427 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12430 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
12431 htab_t dllimport_map;
12434 get_dllimport_decl (tree decl)
12436 struct tree_map *h, in;
12439 const char *prefix;
12440 size_t namelen, prefixlen;
12445 if (!dllimport_map)
12446 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
12448 in.hash = htab_hash_pointer (decl);
12449 in.base.from = decl;
12450 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
12451 h = (struct tree_map *) *loc;
12455 *loc = h = ggc_alloc_tree_map ();
12457 h->base.from = decl;
12458 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
12459 VAR_DECL, NULL, ptr_type_node);
12460 DECL_ARTIFICIAL (to) = 1;
12461 DECL_IGNORED_P (to) = 1;
12462 DECL_EXTERNAL (to) = 1;
12463 TREE_READONLY (to) = 1;
12465 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
12466 name = targetm.strip_name_encoding (name);
12467 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
12468 ? "*__imp_" : "*__imp__";
12469 namelen = strlen (name);
12470 prefixlen = strlen (prefix);
12471 imp_name = (char *) alloca (namelen + prefixlen + 1);
12472 memcpy (imp_name, prefix, prefixlen);
12473 memcpy (imp_name + prefixlen, name, namelen + 1);
12475 name = ggc_alloc_string (imp_name, namelen + prefixlen);
12476 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
12477 SET_SYMBOL_REF_DECL (rtl, to);
12478 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
12480 rtl = gen_const_mem (Pmode, rtl);
12481 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
12483 SET_DECL_RTL (to, rtl);
12484 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
12489 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12490 true if we require the result be a register. */
12493 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
12498 gcc_assert (SYMBOL_REF_DECL (symbol));
12499 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
12501 x = DECL_RTL (imp_decl);
12503 x = force_reg (Pmode, x);
12507 /* Try machine-dependent ways of modifying an illegitimate address
12508 to be legitimate. If we find one, return the new, valid address.
12509 This macro is used in only one place: `memory_address' in explow.c.
12511 OLDX is the address as it was before break_out_memory_refs was called.
12512 In some cases it is useful to look at this to decide what needs to be done.
12514 It is always safe for this macro to do nothing. It exists to recognize
12515 opportunities to optimize the output.
12517 For the 80386, we handle X+REG by loading X into a register R and
12518 using R+REG. R will go in a general reg and indexing will be used.
12519 However, if REG is a broken-out memory address or multiplication,
12520 nothing needs to be done because REG can certainly go in a general reg.
12522 When -fpic is used, special handling is needed for symbolic references.
12523 See comments by legitimize_pic_address in i386.c for details. */
12526 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
12527 enum machine_mode mode)
12532 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12534 return legitimize_tls_address (x, (enum tls_model) log, false);
12535 if (GET_CODE (x) == CONST
12536 && GET_CODE (XEXP (x, 0)) == PLUS
12537 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12538 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12540 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12541 (enum tls_model) log, false);
12542 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12545 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12547 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
12548 return legitimize_dllimport_symbol (x, true);
12549 if (GET_CODE (x) == CONST
12550 && GET_CODE (XEXP (x, 0)) == PLUS
12551 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12552 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
12554 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
12555 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12559 if (flag_pic && SYMBOLIC_CONST (x))
12560 return legitimize_pic_address (x, 0);
12563 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12564 return machopic_indirect_data_reference (x, 0);
12567 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12568 if (GET_CODE (x) == ASHIFT
12569 && CONST_INT_P (XEXP (x, 1))
12570 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12573 log = INTVAL (XEXP (x, 1));
12574 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12575 GEN_INT (1 << log));
12578 if (GET_CODE (x) == PLUS)
12580 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12582 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12583 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12584 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12587 log = INTVAL (XEXP (XEXP (x, 0), 1));
12588 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12589 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12590 GEN_INT (1 << log));
12593 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12594 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12595 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12598 log = INTVAL (XEXP (XEXP (x, 1), 1));
12599 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12600 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12601 GEN_INT (1 << log));
12604 /* Put multiply first if it isn't already. */
12605 if (GET_CODE (XEXP (x, 1)) == MULT)
12607 rtx tmp = XEXP (x, 0);
12608 XEXP (x, 0) = XEXP (x, 1);
12613 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12614 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12615 created by virtual register instantiation, register elimination, and
12616 similar optimizations. */
12617 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12620 x = gen_rtx_PLUS (Pmode,
12621 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12622 XEXP (XEXP (x, 1), 0)),
12623 XEXP (XEXP (x, 1), 1));
12627 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12628 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12629 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12630 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12631 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12632 && CONSTANT_P (XEXP (x, 1)))
12635 rtx other = NULL_RTX;
12637 if (CONST_INT_P (XEXP (x, 1)))
12639 constant = XEXP (x, 1);
12640 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12642 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12644 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12645 other = XEXP (x, 1);
12653 x = gen_rtx_PLUS (Pmode,
12654 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12655 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12656 plus_constant (other, INTVAL (constant)));
12660 if (changed && ix86_legitimate_address_p (mode, x, false))
12663 if (GET_CODE (XEXP (x, 0)) == MULT)
12666 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12669 if (GET_CODE (XEXP (x, 1)) == MULT)
12672 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12676 && REG_P (XEXP (x, 1))
12677 && REG_P (XEXP (x, 0)))
12680 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12683 x = legitimize_pic_address (x, 0);
12686 if (changed && ix86_legitimate_address_p (mode, x, false))
12689 if (REG_P (XEXP (x, 0)))
12691 rtx temp = gen_reg_rtx (Pmode);
12692 rtx val = force_operand (XEXP (x, 1), temp);
12694 emit_move_insn (temp, val);
12696 XEXP (x, 1) = temp;
12700 else if (REG_P (XEXP (x, 1)))
12702 rtx temp = gen_reg_rtx (Pmode);
12703 rtx val = force_operand (XEXP (x, 0), temp);
12705 emit_move_insn (temp, val);
12707 XEXP (x, 0) = temp;
12715 /* Print an integer constant expression in assembler syntax. Addition
12716 and subtraction are the only arithmetic that may appear in these
12717 expressions. FILE is the stdio stream to write to, X is the rtx, and
12718 CODE is the operand print code from the output string. */
12721 output_pic_addr_const (FILE *file, rtx x, int code)
12725 switch (GET_CODE (x))
12728 gcc_assert (flag_pic);
12733 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12734 output_addr_const (file, x);
12737 const char *name = XSTR (x, 0);
12739 /* Mark the decl as referenced so that cgraph will
12740 output the function. */
12741 if (SYMBOL_REF_DECL (x))
12742 mark_decl_referenced (SYMBOL_REF_DECL (x));
12745 if (MACHOPIC_INDIRECT
12746 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12747 name = machopic_indirection_name (x, /*stub_p=*/true);
12749 assemble_name (file, name);
12751 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12752 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12753 fputs ("@PLT", file);
12760 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12761 assemble_name (asm_out_file, buf);
12765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12769 /* This used to output parentheses around the expression,
12770 but that does not work on the 386 (either ATT or BSD assembler). */
12771 output_pic_addr_const (file, XEXP (x, 0), code);
12775 if (GET_MODE (x) == VOIDmode)
12777 /* We can use %d if the number is <32 bits and positive. */
12778 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12779 fprintf (file, "0x%lx%08lx",
12780 (unsigned long) CONST_DOUBLE_HIGH (x),
12781 (unsigned long) CONST_DOUBLE_LOW (x));
12783 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12786 /* We can't handle floating point constants;
12787 TARGET_PRINT_OPERAND must handle them. */
12788 output_operand_lossage ("floating constant misused");
12792 /* Some assemblers need integer constants to appear first. */
12793 if (CONST_INT_P (XEXP (x, 0)))
12795 output_pic_addr_const (file, XEXP (x, 0), code);
12797 output_pic_addr_const (file, XEXP (x, 1), code);
12801 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12802 output_pic_addr_const (file, XEXP (x, 1), code);
12804 output_pic_addr_const (file, XEXP (x, 0), code);
12810 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12811 output_pic_addr_const (file, XEXP (x, 0), code);
12813 output_pic_addr_const (file, XEXP (x, 1), code);
12815 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12819 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12821 bool f = i386_asm_output_addr_const_extra (file, x);
12826 gcc_assert (XVECLEN (x, 0) == 1);
12827 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12828 switch (XINT (x, 1))
12831 fputs ("@GOT", file);
12833 case UNSPEC_GOTOFF:
12834 fputs ("@GOTOFF", file);
12836 case UNSPEC_PLTOFF:
12837 fputs ("@PLTOFF", file);
12839 case UNSPEC_GOTPCREL:
12840 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12841 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12843 case UNSPEC_GOTTPOFF:
12844 /* FIXME: This might be @TPOFF in Sun ld too. */
12845 fputs ("@gottpoff", file);
12848 fputs ("@tpoff", file);
12850 case UNSPEC_NTPOFF:
12852 fputs ("@tpoff", file);
12854 fputs ("@ntpoff", file);
12856 case UNSPEC_DTPOFF:
12857 fputs ("@dtpoff", file);
12859 case UNSPEC_GOTNTPOFF:
12861 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12862 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12864 fputs ("@gotntpoff", file);
12866 case UNSPEC_INDNTPOFF:
12867 fputs ("@indntpoff", file);
12870 case UNSPEC_MACHOPIC_OFFSET:
12872 machopic_output_function_base_name (file);
12876 output_operand_lossage ("invalid UNSPEC as operand");
12882 output_operand_lossage ("invalid expression as operand");
12886 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12887 We need to emit DTP-relative relocations. */
12889 static void ATTRIBUTE_UNUSED
12890 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12892 fputs (ASM_LONG, file);
12893 output_addr_const (file, x);
12894 fputs ("@dtpoff", file);
12900 fputs (", 0", file);
12903 gcc_unreachable ();
12907 /* Return true if X is a representation of the PIC register. This copes
12908 with calls from ix86_find_base_term, where the register might have
12909 been replaced by a cselib value. */
12912 ix86_pic_register_p (rtx x)
12914 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12915 return (pic_offset_table_rtx
12916 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12918 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12921 /* Helper function for ix86_delegitimize_address.
12922 Attempt to delegitimize TLS local-exec accesses. */
12925 ix86_delegitimize_tls_address (rtx orig_x)
12927 rtx x = orig_x, unspec;
12928 struct ix86_address addr;
12930 if (!TARGET_TLS_DIRECT_SEG_REFS)
12934 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12936 if (ix86_decompose_address (x, &addr) == 0
12937 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12938 || addr.disp == NULL_RTX
12939 || GET_CODE (addr.disp) != CONST)
12941 unspec = XEXP (addr.disp, 0);
12942 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12943 unspec = XEXP (unspec, 0);
12944 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12946 x = XVECEXP (unspec, 0, 0);
12947 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12948 if (unspec != XEXP (addr.disp, 0))
12949 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12952 rtx idx = addr.index;
12953 if (addr.scale != 1)
12954 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12955 x = gen_rtx_PLUS (Pmode, idx, x);
12958 x = gen_rtx_PLUS (Pmode, addr.base, x);
12959 if (MEM_P (orig_x))
12960 x = replace_equiv_address_nv (orig_x, x);
12964 /* In the name of slightly smaller debug output, and to cater to
12965 general assembler lossage, recognize PIC+GOTOFF and turn it back
12966 into a direct symbol reference.
12968 On Darwin, this is necessary to avoid a crash, because Darwin
12969 has a different PIC label for each routine but the DWARF debugging
12970 information is not associated with any particular routine, so it's
12971 necessary to remove references to the PIC label from RTL stored by
12972 the DWARF output code. */
12975 ix86_delegitimize_address (rtx x)
12977 rtx orig_x = delegitimize_mem_from_attrs (x);
12978 /* addend is NULL or some rtx if x is something+GOTOFF where
12979 something doesn't include the PIC register. */
12980 rtx addend = NULL_RTX;
12981 /* reg_addend is NULL or a multiple of some register. */
12982 rtx reg_addend = NULL_RTX;
12983 /* const_addend is NULL or a const_int. */
12984 rtx const_addend = NULL_RTX;
12985 /* This is the result, or NULL. */
12986 rtx result = NULL_RTX;
12995 if (GET_CODE (x) != CONST
12996 || GET_CODE (XEXP (x, 0)) != UNSPEC
12997 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12998 || !MEM_P (orig_x))
12999 return ix86_delegitimize_tls_address (orig_x);
13000 x = XVECEXP (XEXP (x, 0), 0, 0);
13001 if (GET_MODE (orig_x) != Pmode)
13002 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
13006 if (GET_CODE (x) != PLUS
13007 || GET_CODE (XEXP (x, 1)) != CONST)
13008 return ix86_delegitimize_tls_address (orig_x);
13010 if (ix86_pic_register_p (XEXP (x, 0)))
13011 /* %ebx + GOT/GOTOFF */
13013 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13015 /* %ebx + %reg * scale + GOT/GOTOFF */
13016 reg_addend = XEXP (x, 0);
13017 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13018 reg_addend = XEXP (reg_addend, 1);
13019 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13020 reg_addend = XEXP (reg_addend, 0);
13023 reg_addend = NULL_RTX;
13024 addend = XEXP (x, 0);
13028 addend = XEXP (x, 0);
13030 x = XEXP (XEXP (x, 1), 0);
13031 if (GET_CODE (x) == PLUS
13032 && CONST_INT_P (XEXP (x, 1)))
13034 const_addend = XEXP (x, 1);
13038 if (GET_CODE (x) == UNSPEC
13039 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13040 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13041 result = XVECEXP (x, 0, 0);
13043 if (TARGET_MACHO && darwin_local_data_pic (x)
13044 && !MEM_P (orig_x))
13045 result = XVECEXP (x, 0, 0);
13048 return ix86_delegitimize_tls_address (orig_x);
13051 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13053 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13056 /* If the rest of original X doesn't involve the PIC register, add
13057 addend and subtract pic_offset_table_rtx. This can happen e.g.
13059 leal (%ebx, %ecx, 4), %ecx
13061 movl foo@GOTOFF(%ecx), %edx
13062 in which case we return (%ecx - %ebx) + foo. */
13063 if (pic_offset_table_rtx)
13064 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13065 pic_offset_table_rtx),
13070 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13071 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13075 /* If X is a machine specific address (i.e. a symbol or label being
13076 referenced as a displacement from the GOT implemented using an
13077 UNSPEC), then return the base term. Otherwise return X. */
13080 ix86_find_base_term (rtx x)
13086 if (GET_CODE (x) != CONST)
13088 term = XEXP (x, 0);
13089 if (GET_CODE (term) == PLUS
13090 && (CONST_INT_P (XEXP (term, 1))
13091 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13092 term = XEXP (term, 0);
13093 if (GET_CODE (term) != UNSPEC
13094 || XINT (term, 1) != UNSPEC_GOTPCREL)
13097 return XVECEXP (term, 0, 0);
13100 return ix86_delegitimize_address (x);
13104 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
13105 int fp, FILE *file)
13107 const char *suffix;
13109 if (mode == CCFPmode || mode == CCFPUmode)
13111 code = ix86_fp_compare_code_to_integer (code);
13115 code = reverse_condition (code);
13166 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13170 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13171 Those same assemblers have the same but opposite lossage on cmov. */
13172 if (mode == CCmode)
13173 suffix = fp ? "nbe" : "a";
13174 else if (mode == CCCmode)
13177 gcc_unreachable ();
13193 gcc_unreachable ();
13197 gcc_assert (mode == CCmode || mode == CCCmode);
13214 gcc_unreachable ();
13218 /* ??? As above. */
13219 gcc_assert (mode == CCmode || mode == CCCmode);
13220 suffix = fp ? "nb" : "ae";
13223 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
13227 /* ??? As above. */
13228 if (mode == CCmode)
13230 else if (mode == CCCmode)
13231 suffix = fp ? "nb" : "ae";
13233 gcc_unreachable ();
13236 suffix = fp ? "u" : "p";
13239 suffix = fp ? "nu" : "np";
13242 gcc_unreachable ();
13244 fputs (suffix, file);
13247 /* Print the name of register X to FILE based on its machine mode and number.
13248 If CODE is 'w', pretend the mode is HImode.
13249 If CODE is 'b', pretend the mode is QImode.
13250 If CODE is 'k', pretend the mode is SImode.
13251 If CODE is 'q', pretend the mode is DImode.
13252 If CODE is 'x', pretend the mode is V4SFmode.
13253 If CODE is 't', pretend the mode is V8SFmode.
13254 If CODE is 'h', pretend the reg is the 'high' byte register.
13255 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13256 If CODE is 'd', duplicate the operand for AVX instruction.
13260 print_reg (rtx x, int code, FILE *file)
13263 bool duplicated = code == 'd' && TARGET_AVX;
13265 gcc_assert (x == pc_rtx
13266 || (REGNO (x) != ARG_POINTER_REGNUM
13267 && REGNO (x) != FRAME_POINTER_REGNUM
13268 && REGNO (x) != FLAGS_REG
13269 && REGNO (x) != FPSR_REG
13270 && REGNO (x) != FPCR_REG));
13272 if (ASSEMBLER_DIALECT == ASM_ATT)
13277 gcc_assert (TARGET_64BIT);
13278 fputs ("rip", file);
13282 if (code == 'w' || MMX_REG_P (x))
13284 else if (code == 'b')
13286 else if (code == 'k')
13288 else if (code == 'q')
13290 else if (code == 'y')
13292 else if (code == 'h')
13294 else if (code == 'x')
13296 else if (code == 't')
13299 code = GET_MODE_SIZE (GET_MODE (x));
13301 /* Irritatingly, AMD extended registers use different naming convention
13302 from the normal registers. */
13303 if (REX_INT_REG_P (x))
13305 gcc_assert (TARGET_64BIT);
13309 error ("extended registers have no high halves");
13312 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
13315 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
13318 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
13321 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
13324 error ("unsupported operand size for extended register");
13334 if (STACK_TOP_P (x))
13343 if (! ANY_FP_REG_P (x))
13344 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
13349 reg = hi_reg_name[REGNO (x)];
13352 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
13354 reg = qi_reg_name[REGNO (x)];
13357 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
13359 reg = qi_high_reg_name[REGNO (x)];
13364 gcc_assert (!duplicated);
13366 fputs (hi_reg_name[REGNO (x)] + 1, file);
13371 gcc_unreachable ();
13377 if (ASSEMBLER_DIALECT == ASM_ATT)
13378 fprintf (file, ", %%%s", reg);
13380 fprintf (file, ", %s", reg);
13384 /* Locate some local-dynamic symbol still in use by this function
13385 so that we can print its name in some tls_local_dynamic_base
13389 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
13393 if (GET_CODE (x) == SYMBOL_REF
13394 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
13396 cfun->machine->some_ld_name = XSTR (x, 0);
13403 static const char *
13404 get_some_local_dynamic_name (void)
13408 if (cfun->machine->some_ld_name)
13409 return cfun->machine->some_ld_name;
13411 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
13412 if (NONDEBUG_INSN_P (insn)
13413 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
13414 return cfun->machine->some_ld_name;
13419 /* Meaning of CODE:
13420 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13421 C -- print opcode suffix for set/cmov insn.
13422 c -- like C, but print reversed condition
13423 F,f -- likewise, but for floating-point.
13424 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13426 R -- print the prefix for register names.
13427 z -- print the opcode suffix for the size of the current operand.
13428 Z -- likewise, with special suffixes for x87 instructions.
13429 * -- print a star (in certain assembler syntax)
13430 A -- print an absolute memory reference.
13431 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13432 s -- print a shift double count, followed by the assemblers argument
13434 b -- print the QImode name of the register for the indicated operand.
13435 %b0 would print %al if operands[0] is reg 0.
13436 w -- likewise, print the HImode name of the register.
13437 k -- likewise, print the SImode name of the register.
13438 q -- likewise, print the DImode name of the register.
13439 x -- likewise, print the V4SFmode name of the register.
13440 t -- likewise, print the V8SFmode name of the register.
13441 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13442 y -- print "st(0)" instead of "st" as a register.
13443 d -- print duplicated register operand for AVX instruction.
13444 D -- print condition for SSE cmp instruction.
13445 P -- if PIC, print an @PLT suffix.
13446 X -- don't print any sort of PIC '@' suffix for a symbol.
13447 & -- print some in-use local-dynamic symbol name.
13448 H -- print a memory address offset by 8; used for sse high-parts
13449 Y -- print condition for XOP pcom* instruction.
13450 + -- print a branch hint as 'cs' or 'ds' prefix
13451 ; -- print a semicolon (after prefixes due to bug in older gas).
13452 @ -- print a segment register of thread base pointer load
13456 ix86_print_operand (FILE *file, rtx x, int code)
13463 if (ASSEMBLER_DIALECT == ASM_ATT)
13469 const char *name = get_some_local_dynamic_name ();
13471 output_operand_lossage ("'%%&' used without any "
13472 "local dynamic TLS references");
13474 assemble_name (file, name);
13479 switch (ASSEMBLER_DIALECT)
13486 /* Intel syntax. For absolute addresses, registers should not
13487 be surrounded by braces. */
13491 ix86_print_operand (file, x, 0);
13498 gcc_unreachable ();
13501 ix86_print_operand (file, x, 0);
13506 if (ASSEMBLER_DIALECT == ASM_ATT)
13511 if (ASSEMBLER_DIALECT == ASM_ATT)
13516 if (ASSEMBLER_DIALECT == ASM_ATT)
13521 if (ASSEMBLER_DIALECT == ASM_ATT)
13526 if (ASSEMBLER_DIALECT == ASM_ATT)
13531 if (ASSEMBLER_DIALECT == ASM_ATT)
13536 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13538 /* Opcodes don't get size suffixes if using Intel opcodes. */
13539 if (ASSEMBLER_DIALECT == ASM_INTEL)
13542 switch (GET_MODE_SIZE (GET_MODE (x)))
13561 output_operand_lossage
13562 ("invalid operand size for operand code '%c'", code);
13567 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13569 (0, "non-integer operand used with operand code '%c'", code);
13573 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13574 if (ASSEMBLER_DIALECT == ASM_INTEL)
13577 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13579 switch (GET_MODE_SIZE (GET_MODE (x)))
13582 #ifdef HAVE_AS_IX86_FILDS
13592 #ifdef HAVE_AS_IX86_FILDQ
13595 fputs ("ll", file);
13603 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13605 /* 387 opcodes don't get size suffixes
13606 if the operands are registers. */
13607 if (STACK_REG_P (x))
13610 switch (GET_MODE_SIZE (GET_MODE (x)))
13631 output_operand_lossage
13632 ("invalid operand type used with operand code '%c'", code);
13636 output_operand_lossage
13637 ("invalid operand size for operand code '%c'", code);
13654 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13656 ix86_print_operand (file, x, 0);
13657 fputs (", ", file);
13662 /* Little bit of braindamage here. The SSE compare instructions
13663 does use completely different names for the comparisons that the
13664 fp conditional moves. */
13667 switch (GET_CODE (x))
13670 fputs ("eq", file);
13673 fputs ("eq_us", file);
13676 fputs ("lt", file);
13679 fputs ("nge", file);
13682 fputs ("le", file);
13685 fputs ("ngt", file);
13688 fputs ("unord", file);
13691 fputs ("neq", file);
13694 fputs ("neq_oq", file);
13697 fputs ("ge", file);
13700 fputs ("nlt", file);
13703 fputs ("gt", file);
13706 fputs ("nle", file);
13709 fputs ("ord", file);
13712 output_operand_lossage ("operand is not a condition code, "
13713 "invalid operand code 'D'");
13719 switch (GET_CODE (x))
13723 fputs ("eq", file);
13727 fputs ("lt", file);
13731 fputs ("le", file);
13734 fputs ("unord", file);
13738 fputs ("neq", file);
13742 fputs ("nlt", file);
13746 fputs ("nle", file);
13749 fputs ("ord", file);
13752 output_operand_lossage ("operand is not a condition code, "
13753 "invalid operand code 'D'");
13759 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13760 if (ASSEMBLER_DIALECT == ASM_ATT)
13762 switch (GET_MODE (x))
13764 case HImode: putc ('w', file); break;
13766 case SFmode: putc ('l', file); break;
13768 case DFmode: putc ('q', file); break;
13769 default: gcc_unreachable ();
13776 if (!COMPARISON_P (x))
13778 output_operand_lossage ("operand is neither a constant nor a "
13779 "condition code, invalid operand code "
13783 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13786 if (!COMPARISON_P (x))
13788 output_operand_lossage ("operand is neither a constant nor a "
13789 "condition code, invalid operand code "
13793 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13794 if (ASSEMBLER_DIALECT == ASM_ATT)
13797 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13800 /* Like above, but reverse condition */
13802 /* Check to see if argument to %c is really a constant
13803 and not a condition code which needs to be reversed. */
13804 if (!COMPARISON_P (x))
13806 output_operand_lossage ("operand is neither a constant nor a "
13807 "condition code, invalid operand "
13811 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13814 if (!COMPARISON_P (x))
13816 output_operand_lossage ("operand is neither a constant nor a "
13817 "condition code, invalid operand "
13821 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13822 if (ASSEMBLER_DIALECT == ASM_ATT)
13825 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13829 /* It doesn't actually matter what mode we use here, as we're
13830 only going to use this for printing. */
13831 x = adjust_address_nv (x, DImode, 8);
13839 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13842 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13845 int pred_val = INTVAL (XEXP (x, 0));
13847 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13848 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13850 int taken = pred_val > REG_BR_PROB_BASE / 2;
13851 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13853 /* Emit hints only in the case default branch prediction
13854 heuristics would fail. */
13855 if (taken != cputaken)
13857 /* We use 3e (DS) prefix for taken branches and
13858 2e (CS) prefix for not taken branches. */
13860 fputs ("ds ; ", file);
13862 fputs ("cs ; ", file);
13870 switch (GET_CODE (x))
13873 fputs ("neq", file);
13876 fputs ("eq", file);
13880 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13884 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13888 fputs ("le", file);
13892 fputs ("lt", file);
13895 fputs ("unord", file);
13898 fputs ("ord", file);
13901 fputs ("ueq", file);
13904 fputs ("nlt", file);
13907 fputs ("nle", file);
13910 fputs ("ule", file);
13913 fputs ("ult", file);
13916 fputs ("une", file);
13919 output_operand_lossage ("operand is not a condition code, "
13920 "invalid operand code 'Y'");
13926 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13932 if (ASSEMBLER_DIALECT == ASM_ATT)
13935 /* The kernel uses a different segment register for performance
13936 reasons; a system call would not have to trash the userspace
13937 segment register, which would be expensive. */
13938 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13939 fputs ("fs", file);
13941 fputs ("gs", file);
13945 output_operand_lossage ("invalid operand code '%c'", code);
13950 print_reg (x, code, file);
13952 else if (MEM_P (x))
13954 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13955 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13956 && GET_MODE (x) != BLKmode)
13959 switch (GET_MODE_SIZE (GET_MODE (x)))
13961 case 1: size = "BYTE"; break;
13962 case 2: size = "WORD"; break;
13963 case 4: size = "DWORD"; break;
13964 case 8: size = "QWORD"; break;
13965 case 12: size = "TBYTE"; break;
13967 if (GET_MODE (x) == XFmode)
13972 case 32: size = "YMMWORD"; break;
13974 gcc_unreachable ();
13977 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13980 else if (code == 'w')
13982 else if (code == 'k')
13985 fputs (size, file);
13986 fputs (" PTR ", file);
13990 /* Avoid (%rip) for call operands. */
13991 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13992 && !CONST_INT_P (x))
13993 output_addr_const (file, x);
13994 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13995 output_operand_lossage ("invalid constraints for operand");
13997 output_address (x);
14000 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14005 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14006 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14008 if (ASSEMBLER_DIALECT == ASM_ATT)
14010 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14012 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
14014 fprintf (file, "0x%08x", (unsigned int) l);
14017 /* These float cases don't actually occur as immediate operands. */
14018 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14022 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14023 fputs (dstr, file);
14026 else if (GET_CODE (x) == CONST_DOUBLE
14027 && GET_MODE (x) == XFmode)
14031 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14032 fputs (dstr, file);
14037 /* We have patterns that allow zero sets of memory, for instance.
14038 In 64-bit mode, we should probably support all 8-byte vectors,
14039 since we can in fact encode that into an immediate. */
14040 if (GET_CODE (x) == CONST_VECTOR)
14042 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14048 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14050 if (ASSEMBLER_DIALECT == ASM_ATT)
14053 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14054 || GET_CODE (x) == LABEL_REF)
14056 if (ASSEMBLER_DIALECT == ASM_ATT)
14059 fputs ("OFFSET FLAT:", file);
14062 if (CONST_INT_P (x))
14063 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14064 else if (flag_pic || MACHOPIC_INDIRECT)
14065 output_pic_addr_const (file, x, code);
14067 output_addr_const (file, x);
14072 ix86_print_operand_punct_valid_p (unsigned char code)
14074 return (code == '@' || code == '*' || code == '+'
14075 || code == '&' || code == ';');
14078 /* Print a memory operand whose address is ADDR. */
14081 ix86_print_operand_address (FILE *file, rtx addr)
14083 struct ix86_address parts;
14084 rtx base, index, disp;
14086 int ok = ix86_decompose_address (addr, &parts);
14091 index = parts.index;
14093 scale = parts.scale;
14101 if (ASSEMBLER_DIALECT == ASM_ATT)
14103 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14106 gcc_unreachable ();
14109 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14110 if (TARGET_64BIT && !base && !index)
14114 if (GET_CODE (disp) == CONST
14115 && GET_CODE (XEXP (disp, 0)) == PLUS
14116 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14117 symbol = XEXP (XEXP (disp, 0), 0);
14119 if (GET_CODE (symbol) == LABEL_REF
14120 || (GET_CODE (symbol) == SYMBOL_REF
14121 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14124 if (!base && !index)
14126 /* Displacement only requires special attention. */
14128 if (CONST_INT_P (disp))
14130 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14131 fputs ("ds:", file);
14132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14135 output_pic_addr_const (file, disp, 0);
14137 output_addr_const (file, disp);
14141 if (ASSEMBLER_DIALECT == ASM_ATT)
14146 output_pic_addr_const (file, disp, 0);
14147 else if (GET_CODE (disp) == LABEL_REF)
14148 output_asm_label (disp);
14150 output_addr_const (file, disp);
14155 print_reg (base, 0, file);
14159 print_reg (index, 0, file);
14161 fprintf (file, ",%d", scale);
14167 rtx offset = NULL_RTX;
14171 /* Pull out the offset of a symbol; print any symbol itself. */
14172 if (GET_CODE (disp) == CONST
14173 && GET_CODE (XEXP (disp, 0)) == PLUS
14174 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14176 offset = XEXP (XEXP (disp, 0), 1);
14177 disp = gen_rtx_CONST (VOIDmode,
14178 XEXP (XEXP (disp, 0), 0));
14182 output_pic_addr_const (file, disp, 0);
14183 else if (GET_CODE (disp) == LABEL_REF)
14184 output_asm_label (disp);
14185 else if (CONST_INT_P (disp))
14188 output_addr_const (file, disp);
14194 print_reg (base, 0, file);
14197 if (INTVAL (offset) >= 0)
14199 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14203 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14210 print_reg (index, 0, file);
14212 fprintf (file, "*%d", scale);
14219 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14222 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14226 if (GET_CODE (x) != UNSPEC)
14229 op = XVECEXP (x, 0, 0);
14230 switch (XINT (x, 1))
14232 case UNSPEC_GOTTPOFF:
14233 output_addr_const (file, op);
14234 /* FIXME: This might be @TPOFF in Sun ld. */
14235 fputs ("@gottpoff", file);
14238 output_addr_const (file, op);
14239 fputs ("@tpoff", file);
14241 case UNSPEC_NTPOFF:
14242 output_addr_const (file, op);
14244 fputs ("@tpoff", file);
14246 fputs ("@ntpoff", file);
14248 case UNSPEC_DTPOFF:
14249 output_addr_const (file, op);
14250 fputs ("@dtpoff", file);
14252 case UNSPEC_GOTNTPOFF:
14253 output_addr_const (file, op);
14255 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14256 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14258 fputs ("@gotntpoff", file);
14260 case UNSPEC_INDNTPOFF:
14261 output_addr_const (file, op);
14262 fputs ("@indntpoff", file);
14265 case UNSPEC_MACHOPIC_OFFSET:
14266 output_addr_const (file, op);
14268 machopic_output_function_base_name (file);
14272 case UNSPEC_STACK_CHECK:
14276 gcc_assert (flag_split_stack);
14278 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14279 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
14281 gcc_unreachable ();
14284 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
14295 /* Split one or more double-mode RTL references into pairs of half-mode
14296 references. The RTL can be REG, offsettable MEM, integer constant, or
14297 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14298 split and "num" is its length. lo_half and hi_half are output arrays
14299 that parallel "operands". */
14302 split_double_mode (enum machine_mode mode, rtx operands[],
14303 int num, rtx lo_half[], rtx hi_half[])
14305 enum machine_mode half_mode;
14311 half_mode = DImode;
14314 half_mode = SImode;
14317 gcc_unreachable ();
14320 byte = GET_MODE_SIZE (half_mode);
14324 rtx op = operands[num];
14326 /* simplify_subreg refuse to split volatile memory addresses,
14327 but we still have to handle it. */
14330 lo_half[num] = adjust_address (op, half_mode, 0);
14331 hi_half[num] = adjust_address (op, half_mode, byte);
14335 lo_half[num] = simplify_gen_subreg (half_mode, op,
14336 GET_MODE (op) == VOIDmode
14337 ? mode : GET_MODE (op), 0);
14338 hi_half[num] = simplify_gen_subreg (half_mode, op,
14339 GET_MODE (op) == VOIDmode
14340 ? mode : GET_MODE (op), byte);
14345 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14346 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14347 is the expression of the binary operation. The output may either be
14348 emitted here, or returned to the caller, like all output_* functions.
14350 There is no guarantee that the operands are the same mode, as they
14351 might be within FLOAT or FLOAT_EXTEND expressions. */
14353 #ifndef SYSV386_COMPAT
14354 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14355 wants to fix the assemblers because that causes incompatibility
14356 with gcc. No-one wants to fix gcc because that causes
14357 incompatibility with assemblers... You can use the option of
14358 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14359 #define SYSV386_COMPAT 1
14363 output_387_binary_op (rtx insn, rtx *operands)
14365 static char buf[40];
14368 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
14370 #ifdef ENABLE_CHECKING
14371 /* Even if we do not want to check the inputs, this documents input
14372 constraints. Which helps in understanding the following code. */
14373 if (STACK_REG_P (operands[0])
14374 && ((REG_P (operands[1])
14375 && REGNO (operands[0]) == REGNO (operands[1])
14376 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14377 || (REG_P (operands[2])
14378 && REGNO (operands[0]) == REGNO (operands[2])
14379 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14380 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14383 gcc_assert (is_sse);
14386 switch (GET_CODE (operands[3]))
14389 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14390 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14398 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14399 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14407 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14408 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14416 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14417 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14425 gcc_unreachable ();
14432 strcpy (buf, ssep);
14433 if (GET_MODE (operands[0]) == SFmode)
14434 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
14436 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
14440 strcpy (buf, ssep + 1);
14441 if (GET_MODE (operands[0]) == SFmode)
14442 strcat (buf, "ss\t{%2, %0|%0, %2}");
14444 strcat (buf, "sd\t{%2, %0|%0, %2}");
14450 switch (GET_CODE (operands[3]))
14454 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14456 rtx temp = operands[2];
14457 operands[2] = operands[1];
14458 operands[1] = temp;
14461 /* know operands[0] == operands[1]. */
14463 if (MEM_P (operands[2]))
14469 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14471 if (STACK_TOP_P (operands[0]))
14472 /* How is it that we are storing to a dead operand[2]?
14473 Well, presumably operands[1] is dead too. We can't
14474 store the result to st(0) as st(0) gets popped on this
14475 instruction. Instead store to operands[2] (which I
14476 think has to be st(1)). st(1) will be popped later.
14477 gcc <= 2.8.1 didn't have this check and generated
14478 assembly code that the Unixware assembler rejected. */
14479 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14481 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14485 if (STACK_TOP_P (operands[0]))
14486 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14488 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14493 if (MEM_P (operands[1]))
14499 if (MEM_P (operands[2]))
14505 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14508 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14509 derived assemblers, confusingly reverse the direction of
14510 the operation for fsub{r} and fdiv{r} when the
14511 destination register is not st(0). The Intel assembler
14512 doesn't have this brain damage. Read !SYSV386_COMPAT to
14513 figure out what the hardware really does. */
14514 if (STACK_TOP_P (operands[0]))
14515 p = "{p\t%0, %2|rp\t%2, %0}";
14517 p = "{rp\t%2, %0|p\t%0, %2}";
14519 if (STACK_TOP_P (operands[0]))
14520 /* As above for fmul/fadd, we can't store to st(0). */
14521 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14523 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14528 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14531 if (STACK_TOP_P (operands[0]))
14532 p = "{rp\t%0, %1|p\t%1, %0}";
14534 p = "{p\t%1, %0|rp\t%0, %1}";
14536 if (STACK_TOP_P (operands[0]))
14537 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14539 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14544 if (STACK_TOP_P (operands[0]))
14546 if (STACK_TOP_P (operands[1]))
14547 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14549 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14552 else if (STACK_TOP_P (operands[1]))
14555 p = "{\t%1, %0|r\t%0, %1}";
14557 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14563 p = "{r\t%2, %0|\t%0, %2}";
14565 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14571 gcc_unreachable ();
14578 /* Return needed mode for entity in optimize_mode_switching pass. */
14581 ix86_mode_needed (int entity, rtx insn)
14583 enum attr_i387_cw mode;
14585 /* The mode UNINITIALIZED is used to store control word after a
14586 function call or ASM pattern. The mode ANY specify that function
14587 has no requirements on the control word and make no changes in the
14588 bits we are interested in. */
14591 || (NONJUMP_INSN_P (insn)
14592 && (asm_noperands (PATTERN (insn)) >= 0
14593 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14594 return I387_CW_UNINITIALIZED;
14596 if (recog_memoized (insn) < 0)
14597 return I387_CW_ANY;
14599 mode = get_attr_i387_cw (insn);
14604 if (mode == I387_CW_TRUNC)
14609 if (mode == I387_CW_FLOOR)
14614 if (mode == I387_CW_CEIL)
14619 if (mode == I387_CW_MASK_PM)
14624 gcc_unreachable ();
14627 return I387_CW_ANY;
14630 /* Output code to initialize control word copies used by trunc?f?i and
14631 rounding patterns. CURRENT_MODE is set to current control word,
14632 while NEW_MODE is set to new control word. */
14635 emit_i387_cw_initialization (int mode)
14637 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14640 enum ix86_stack_slot slot;
14642 rtx reg = gen_reg_rtx (HImode);
14644 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14645 emit_move_insn (reg, copy_rtx (stored_mode));
14647 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14648 || optimize_function_for_size_p (cfun))
14652 case I387_CW_TRUNC:
14653 /* round toward zero (truncate) */
14654 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14655 slot = SLOT_CW_TRUNC;
14658 case I387_CW_FLOOR:
14659 /* round down toward -oo */
14660 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14661 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14662 slot = SLOT_CW_FLOOR;
14666 /* round up toward +oo */
14667 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14668 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14669 slot = SLOT_CW_CEIL;
14672 case I387_CW_MASK_PM:
14673 /* mask precision exception for nearbyint() */
14674 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14675 slot = SLOT_CW_MASK_PM;
14679 gcc_unreachable ();
14686 case I387_CW_TRUNC:
14687 /* round toward zero (truncate) */
14688 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14689 slot = SLOT_CW_TRUNC;
14692 case I387_CW_FLOOR:
14693 /* round down toward -oo */
14694 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14695 slot = SLOT_CW_FLOOR;
14699 /* round up toward +oo */
14700 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14701 slot = SLOT_CW_CEIL;
14704 case I387_CW_MASK_PM:
14705 /* mask precision exception for nearbyint() */
14706 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14707 slot = SLOT_CW_MASK_PM;
14711 gcc_unreachable ();
14715 gcc_assert (slot < MAX_386_STACK_LOCALS);
14717 new_mode = assign_386_stack_local (HImode, slot);
14718 emit_move_insn (new_mode, reg);
14721 /* Output code for INSN to convert a float to a signed int. OPERANDS
14722 are the insn operands. The output may be [HSD]Imode and the input
14723 operand may be [SDX]Fmode. */
14726 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14728 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14729 int dimode_p = GET_MODE (operands[0]) == DImode;
14730 int round_mode = get_attr_i387_cw (insn);
14732 /* Jump through a hoop or two for DImode, since the hardware has no
14733 non-popping instruction. We used to do this a different way, but
14734 that was somewhat fragile and broke with post-reload splitters. */
14735 if ((dimode_p || fisttp) && !stack_top_dies)
14736 output_asm_insn ("fld\t%y1", operands);
14738 gcc_assert (STACK_TOP_P (operands[1]));
14739 gcc_assert (MEM_P (operands[0]));
14740 gcc_assert (GET_MODE (operands[1]) != TFmode);
14743 output_asm_insn ("fisttp%Z0\t%0", operands);
14746 if (round_mode != I387_CW_ANY)
14747 output_asm_insn ("fldcw\t%3", operands);
14748 if (stack_top_dies || dimode_p)
14749 output_asm_insn ("fistp%Z0\t%0", operands);
14751 output_asm_insn ("fist%Z0\t%0", operands);
14752 if (round_mode != I387_CW_ANY)
14753 output_asm_insn ("fldcw\t%2", operands);
14759 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14760 have the values zero or one, indicates the ffreep insn's operand
14761 from the OPERANDS array. */
14763 static const char *
14764 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14766 if (TARGET_USE_FFREEP)
14767 #ifdef HAVE_AS_IX86_FFREEP
14768 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14771 static char retval[32];
14772 int regno = REGNO (operands[opno]);
14774 gcc_assert (FP_REGNO_P (regno));
14776 regno -= FIRST_STACK_REG;
14778 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14783 return opno ? "fstp\t%y1" : "fstp\t%y0";
14787 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14788 should be used. UNORDERED_P is true when fucom should be used. */
14791 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14793 int stack_top_dies;
14794 rtx cmp_op0, cmp_op1;
14795 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14799 cmp_op0 = operands[0];
14800 cmp_op1 = operands[1];
14804 cmp_op0 = operands[1];
14805 cmp_op1 = operands[2];
14810 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14811 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14812 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14813 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14815 if (GET_MODE (operands[0]) == SFmode)
14817 return &ucomiss[TARGET_AVX ? 0 : 1];
14819 return &comiss[TARGET_AVX ? 0 : 1];
14822 return &ucomisd[TARGET_AVX ? 0 : 1];
14824 return &comisd[TARGET_AVX ? 0 : 1];
14827 gcc_assert (STACK_TOP_P (cmp_op0));
14829 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14831 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14833 if (stack_top_dies)
14835 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14836 return output_387_ffreep (operands, 1);
14839 return "ftst\n\tfnstsw\t%0";
14842 if (STACK_REG_P (cmp_op1)
14844 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14845 && REGNO (cmp_op1) != FIRST_STACK_REG)
14847 /* If both the top of the 387 stack dies, and the other operand
14848 is also a stack register that dies, then this must be a
14849 `fcompp' float compare */
14853 /* There is no double popping fcomi variant. Fortunately,
14854 eflags is immune from the fstp's cc clobbering. */
14856 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14858 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14859 return output_387_ffreep (operands, 0);
14864 return "fucompp\n\tfnstsw\t%0";
14866 return "fcompp\n\tfnstsw\t%0";
14871 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14873 static const char * const alt[16] =
14875 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14876 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14877 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14878 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14880 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14881 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14885 "fcomi\t{%y1, %0|%0, %y1}",
14886 "fcomip\t{%y1, %0|%0, %y1}",
14887 "fucomi\t{%y1, %0|%0, %y1}",
14888 "fucomip\t{%y1, %0|%0, %y1}",
14899 mask = eflags_p << 3;
14900 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14901 mask |= unordered_p << 1;
14902 mask |= stack_top_dies;
14904 gcc_assert (mask < 16);
14913 ix86_output_addr_vec_elt (FILE *file, int value)
14915 const char *directive = ASM_LONG;
14919 directive = ASM_QUAD;
14921 gcc_assert (!TARGET_64BIT);
14924 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14928 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14930 const char *directive = ASM_LONG;
14933 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14934 directive = ASM_QUAD;
14936 gcc_assert (!TARGET_64BIT);
14938 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14939 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14940 fprintf (file, "%s%s%d-%s%d\n",
14941 directive, LPREFIX, value, LPREFIX, rel);
14942 else if (HAVE_AS_GOTOFF_IN_DATA)
14943 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14945 else if (TARGET_MACHO)
14947 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14948 machopic_output_function_base_name (file);
14953 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14954 GOT_SYMBOL_NAME, LPREFIX, value);
14957 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14961 ix86_expand_clear (rtx dest)
14965 /* We play register width games, which are only valid after reload. */
14966 gcc_assert (reload_completed);
14968 /* Avoid HImode and its attendant prefix byte. */
14969 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14970 dest = gen_rtx_REG (SImode, REGNO (dest));
14971 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14973 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14974 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14976 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14977 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14983 /* X is an unchanging MEM. If it is a constant pool reference, return
14984 the constant pool rtx, else NULL. */
14987 maybe_get_pool_constant (rtx x)
14989 x = ix86_delegitimize_address (XEXP (x, 0));
14991 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14992 return get_pool_constant (x);
14998 ix86_expand_move (enum machine_mode mode, rtx operands[])
15001 enum tls_model model;
15006 if (GET_CODE (op1) == SYMBOL_REF)
15008 model = SYMBOL_REF_TLS_MODEL (op1);
15011 op1 = legitimize_tls_address (op1, model, true);
15012 op1 = force_operand (op1, op0);
15016 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15017 && SYMBOL_REF_DLLIMPORT_P (op1))
15018 op1 = legitimize_dllimport_symbol (op1, false);
15020 else if (GET_CODE (op1) == CONST
15021 && GET_CODE (XEXP (op1, 0)) == PLUS
15022 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
15024 rtx addend = XEXP (XEXP (op1, 0), 1);
15025 rtx symbol = XEXP (XEXP (op1, 0), 0);
15028 model = SYMBOL_REF_TLS_MODEL (symbol);
15030 tmp = legitimize_tls_address (symbol, model, true);
15031 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15032 && SYMBOL_REF_DLLIMPORT_P (symbol))
15033 tmp = legitimize_dllimport_symbol (symbol, true);
15037 tmp = force_operand (tmp, NULL);
15038 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
15039 op0, 1, OPTAB_DIRECT);
15045 if ((flag_pic || MACHOPIC_INDIRECT)
15046 && mode == Pmode && symbolic_operand (op1, Pmode))
15048 if (TARGET_MACHO && !TARGET_64BIT)
15051 /* dynamic-no-pic */
15052 if (MACHOPIC_INDIRECT)
15054 rtx temp = ((reload_in_progress
15055 || ((op0 && REG_P (op0))
15057 ? op0 : gen_reg_rtx (Pmode));
15058 op1 = machopic_indirect_data_reference (op1, temp);
15060 op1 = machopic_legitimize_pic_address (op1, mode,
15061 temp == op1 ? 0 : temp);
15063 if (op0 != op1 && GET_CODE (op0) != MEM)
15065 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
15069 if (GET_CODE (op0) == MEM)
15070 op1 = force_reg (Pmode, op1);
15074 if (GET_CODE (temp) != REG)
15075 temp = gen_reg_rtx (Pmode);
15076 temp = legitimize_pic_address (op1, temp);
15081 /* dynamic-no-pic */
15087 op1 = force_reg (Pmode, op1);
15088 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
15090 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
15091 op1 = legitimize_pic_address (op1, reg);
15100 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
15101 || !push_operand (op0, mode))
15103 op1 = force_reg (mode, op1);
15105 if (push_operand (op0, mode)
15106 && ! general_no_elim_operand (op1, mode))
15107 op1 = copy_to_mode_reg (mode, op1);
15109 /* Force large constants in 64bit compilation into register
15110 to get them CSEed. */
15111 if (can_create_pseudo_p ()
15112 && (mode == DImode) && TARGET_64BIT
15113 && immediate_operand (op1, mode)
15114 && !x86_64_zext_immediate_operand (op1, VOIDmode)
15115 && !register_operand (op0, mode)
15117 op1 = copy_to_mode_reg (mode, op1);
15119 if (can_create_pseudo_p ()
15120 && FLOAT_MODE_P (mode)
15121 && GET_CODE (op1) == CONST_DOUBLE)
15123 /* If we are loading a floating point constant to a register,
15124 force the value to memory now, since we'll get better code
15125 out the back end. */
15127 op1 = validize_mem (force_const_mem (mode, op1));
15128 if (!register_operand (op0, mode))
15130 rtx temp = gen_reg_rtx (mode);
15131 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
15132 emit_move_insn (op0, temp);
15138 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15142 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
15144 rtx op0 = operands[0], op1 = operands[1];
15145 unsigned int align = GET_MODE_ALIGNMENT (mode);
15147 /* Force constants other than zero into memory. We do not know how
15148 the instructions used to build constants modify the upper 64 bits
15149 of the register, once we have that information we may be able
15150 to handle some of them more efficiently. */
15151 if (can_create_pseudo_p ()
15152 && register_operand (op0, mode)
15153 && (CONSTANT_P (op1)
15154 || (GET_CODE (op1) == SUBREG
15155 && CONSTANT_P (SUBREG_REG (op1))))
15156 && !standard_sse_constant_p (op1))
15157 op1 = validize_mem (force_const_mem (mode, op1));
15159 /* We need to check memory alignment for SSE mode since attribute
15160 can make operands unaligned. */
15161 if (can_create_pseudo_p ()
15162 && SSE_REG_MODE_P (mode)
15163 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
15164 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
15168 /* ix86_expand_vector_move_misalign() does not like constants ... */
15169 if (CONSTANT_P (op1)
15170 || (GET_CODE (op1) == SUBREG
15171 && CONSTANT_P (SUBREG_REG (op1))))
15172 op1 = validize_mem (force_const_mem (mode, op1));
15174 /* ... nor both arguments in memory. */
15175 if (!register_operand (op0, mode)
15176 && !register_operand (op1, mode))
15177 op1 = force_reg (mode, op1);
15179 tmp[0] = op0; tmp[1] = op1;
15180 ix86_expand_vector_move_misalign (mode, tmp);
15184 /* Make operand1 a register if it isn't already. */
15185 if (can_create_pseudo_p ()
15186 && !register_operand (op0, mode)
15187 && !register_operand (op1, mode))
15189 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
15193 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
15196 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15197 straight to ix86_expand_vector_move. */
15198 /* Code generation for scalar reg-reg moves of single and double precision data:
15199 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15203 if (x86_sse_partial_reg_dependency == true)
15208 Code generation for scalar loads of double precision data:
15209 if (x86_sse_split_regs == true)
15210 movlpd mem, reg (gas syntax)
15214 Code generation for unaligned packed loads of single precision data
15215 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15216 if (x86_sse_unaligned_move_optimal)
15219 if (x86_sse_partial_reg_dependency == true)
15231 Code generation for unaligned packed loads of double precision data
15232 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15233 if (x86_sse_unaligned_move_optimal)
15236 if (x86_sse_split_regs == true)
15249 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
15258 switch (GET_MODE_CLASS (mode))
15260 case MODE_VECTOR_INT:
15262 switch (GET_MODE_SIZE (mode))
15265 /* If we're optimizing for size, movups is the smallest. */
15266 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15268 op0 = gen_lowpart (V4SFmode, op0);
15269 op1 = gen_lowpart (V4SFmode, op1);
15270 emit_insn (gen_avx_movups (op0, op1));
15273 op0 = gen_lowpart (V16QImode, op0);
15274 op1 = gen_lowpart (V16QImode, op1);
15275 emit_insn (gen_avx_movdqu (op0, op1));
15278 op0 = gen_lowpart (V32QImode, op0);
15279 op1 = gen_lowpart (V32QImode, op1);
15280 emit_insn (gen_avx_movdqu256 (op0, op1));
15283 gcc_unreachable ();
15286 case MODE_VECTOR_FLOAT:
15287 op0 = gen_lowpart (mode, op0);
15288 op1 = gen_lowpart (mode, op1);
15293 emit_insn (gen_avx_movups (op0, op1));
15296 emit_insn (gen_avx_movups256 (op0, op1));
15299 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15301 op0 = gen_lowpart (V4SFmode, op0);
15302 op1 = gen_lowpart (V4SFmode, op1);
15303 emit_insn (gen_avx_movups (op0, op1));
15306 emit_insn (gen_avx_movupd (op0, op1));
15309 emit_insn (gen_avx_movupd256 (op0, op1));
15312 gcc_unreachable ();
15317 gcc_unreachable ();
15325 /* If we're optimizing for size, movups is the smallest. */
15326 if (optimize_insn_for_size_p ()
15327 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15329 op0 = gen_lowpart (V4SFmode, op0);
15330 op1 = gen_lowpart (V4SFmode, op1);
15331 emit_insn (gen_sse_movups (op0, op1));
15335 /* ??? If we have typed data, then it would appear that using
15336 movdqu is the only way to get unaligned data loaded with
15338 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15340 op0 = gen_lowpart (V16QImode, op0);
15341 op1 = gen_lowpart (V16QImode, op1);
15342 emit_insn (gen_sse2_movdqu (op0, op1));
15346 if (TARGET_SSE2 && mode == V2DFmode)
15350 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15352 op0 = gen_lowpart (V2DFmode, op0);
15353 op1 = gen_lowpart (V2DFmode, op1);
15354 emit_insn (gen_sse2_movupd (op0, op1));
15358 /* When SSE registers are split into halves, we can avoid
15359 writing to the top half twice. */
15360 if (TARGET_SSE_SPLIT_REGS)
15362 emit_clobber (op0);
15367 /* ??? Not sure about the best option for the Intel chips.
15368 The following would seem to satisfy; the register is
15369 entirely cleared, breaking the dependency chain. We
15370 then store to the upper half, with a dependency depth
15371 of one. A rumor has it that Intel recommends two movsd
15372 followed by an unpacklpd, but this is unconfirmed. And
15373 given that the dependency depth of the unpacklpd would
15374 still be one, I'm not sure why this would be better. */
15375 zero = CONST0_RTX (V2DFmode);
15378 m = adjust_address (op1, DFmode, 0);
15379 emit_insn (gen_sse2_loadlpd (op0, zero, m));
15380 m = adjust_address (op1, DFmode, 8);
15381 emit_insn (gen_sse2_loadhpd (op0, op0, m));
15385 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
15387 op0 = gen_lowpart (V4SFmode, op0);
15388 op1 = gen_lowpart (V4SFmode, op1);
15389 emit_insn (gen_sse_movups (op0, op1));
15393 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
15394 emit_move_insn (op0, CONST0_RTX (mode));
15396 emit_clobber (op0);
15398 if (mode != V4SFmode)
15399 op0 = gen_lowpart (V4SFmode, op0);
15400 m = adjust_address (op1, V2SFmode, 0);
15401 emit_insn (gen_sse_loadlps (op0, op0, m));
15402 m = adjust_address (op1, V2SFmode, 8);
15403 emit_insn (gen_sse_loadhps (op0, op0, m));
15406 else if (MEM_P (op0))
15408 /* If we're optimizing for size, movups is the smallest. */
15409 if (optimize_insn_for_size_p ()
15410 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
15412 op0 = gen_lowpart (V4SFmode, op0);
15413 op1 = gen_lowpart (V4SFmode, op1);
15414 emit_insn (gen_sse_movups (op0, op1));
15418 /* ??? Similar to above, only less clear because of quote
15419 typeless stores unquote. */
15420 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
15421 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15423 op0 = gen_lowpart (V16QImode, op0);
15424 op1 = gen_lowpart (V16QImode, op1);
15425 emit_insn (gen_sse2_movdqu (op0, op1));
15429 if (TARGET_SSE2 && mode == V2DFmode)
15431 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15433 op0 = gen_lowpart (V2DFmode, op0);
15434 op1 = gen_lowpart (V2DFmode, op1);
15435 emit_insn (gen_sse2_movupd (op0, op1));
15439 m = adjust_address (op0, DFmode, 0);
15440 emit_insn (gen_sse2_storelpd (m, op1));
15441 m = adjust_address (op0, DFmode, 8);
15442 emit_insn (gen_sse2_storehpd (m, op1));
15447 if (mode != V4SFmode)
15448 op1 = gen_lowpart (V4SFmode, op1);
15450 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
15452 op0 = gen_lowpart (V4SFmode, op0);
15453 emit_insn (gen_sse_movups (op0, op1));
15457 m = adjust_address (op0, V2SFmode, 0);
15458 emit_insn (gen_sse_storelps (m, op1));
15459 m = adjust_address (op0, V2SFmode, 8);
15460 emit_insn (gen_sse_storehps (m, op1));
15465 gcc_unreachable ();
15468 /* Expand a push in MODE. This is some mode for which we do not support
15469 proper push instructions, at least from the registers that we expect
15470 the value to live in. */
15473 ix86_expand_push (enum machine_mode mode, rtx x)
15477 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
15478 GEN_INT (-GET_MODE_SIZE (mode)),
15479 stack_pointer_rtx, 1, OPTAB_DIRECT);
15480 if (tmp != stack_pointer_rtx)
15481 emit_move_insn (stack_pointer_rtx, tmp);
15483 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
15485 /* When we push an operand onto stack, it has to be aligned at least
15486 at the function argument boundary. However since we don't have
15487 the argument type, we can't determine the actual argument
15489 emit_move_insn (tmp, x);
15492 /* Helper function of ix86_fixup_binary_operands to canonicalize
15493 operand order. Returns true if the operands should be swapped. */
15496 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
15499 rtx dst = operands[0];
15500 rtx src1 = operands[1];
15501 rtx src2 = operands[2];
15503 /* If the operation is not commutative, we can't do anything. */
15504 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
15507 /* Highest priority is that src1 should match dst. */
15508 if (rtx_equal_p (dst, src1))
15510 if (rtx_equal_p (dst, src2))
15513 /* Next highest priority is that immediate constants come second. */
15514 if (immediate_operand (src2, mode))
15516 if (immediate_operand (src1, mode))
15519 /* Lowest priority is that memory references should come second. */
15529 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15530 destination to use for the operation. If different from the true
15531 destination in operands[0], a copy operation will be required. */
15534 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
15537 rtx dst = operands[0];
15538 rtx src1 = operands[1];
15539 rtx src2 = operands[2];
15541 /* Canonicalize operand order. */
15542 if (ix86_swap_binary_operands_p (code, mode, operands))
15546 /* It is invalid to swap operands of different modes. */
15547 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
15554 /* Both source operands cannot be in memory. */
15555 if (MEM_P (src1) && MEM_P (src2))
15557 /* Optimization: Only read from memory once. */
15558 if (rtx_equal_p (src1, src2))
15560 src2 = force_reg (mode, src2);
15564 src2 = force_reg (mode, src2);
15567 /* If the destination is memory, and we do not have matching source
15568 operands, do things in registers. */
15569 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15570 dst = gen_reg_rtx (mode);
15572 /* Source 1 cannot be a constant. */
15573 if (CONSTANT_P (src1))
15574 src1 = force_reg (mode, src1);
15576 /* Source 1 cannot be a non-matching memory. */
15577 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15578 src1 = force_reg (mode, src1);
15580 operands[1] = src1;
15581 operands[2] = src2;
15585 /* Similarly, but assume that the destination has already been
15586 set up properly. */
15589 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
15590 enum machine_mode mode, rtx operands[])
15592 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
15593 gcc_assert (dst == operands[0]);
15596 /* Attempt to expand a binary operator. Make the expansion closer to the
15597 actual machine, then just general_operand, which will allow 3 separate
15598 memory references (one output, two input) in a single insn. */
15601 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
15604 rtx src1, src2, dst, op, clob;
15606 dst = ix86_fixup_binary_operands (code, mode, operands);
15607 src1 = operands[1];
15608 src2 = operands[2];
15610 /* Emit the instruction. */
15612 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
15613 if (reload_in_progress)
15615 /* Reload doesn't know about the flags register, and doesn't know that
15616 it doesn't want to clobber it. We can only do this with PLUS. */
15617 gcc_assert (code == PLUS);
15620 else if (reload_completed
15622 && !rtx_equal_p (dst, src1))
15624 /* This is going to be an LEA; avoid splitting it later. */
15629 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15630 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15633 /* Fix up the destination if needed. */
15634 if (dst != operands[0])
15635 emit_move_insn (operands[0], dst);
15638 /* Return TRUE or FALSE depending on whether the binary operator meets the
15639 appropriate constraints. */
15642 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15645 rtx dst = operands[0];
15646 rtx src1 = operands[1];
15647 rtx src2 = operands[2];
15649 /* Both source operands cannot be in memory. */
15650 if (MEM_P (src1) && MEM_P (src2))
15653 /* Canonicalize operand order for commutative operators. */
15654 if (ix86_swap_binary_operands_p (code, mode, operands))
15661 /* If the destination is memory, we must have a matching source operand. */
15662 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15665 /* Source 1 cannot be a constant. */
15666 if (CONSTANT_P (src1))
15669 /* Source 1 cannot be a non-matching memory. */
15670 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15672 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15673 return (code == AND
15676 || (TARGET_64BIT && mode == DImode))
15677 && CONST_INT_P (src2)
15678 && (INTVAL (src2) == 0xff
15679 || INTVAL (src2) == 0xffff));
15685 /* Attempt to expand a unary operator. Make the expansion closer to the
15686 actual machine, then just general_operand, which will allow 2 separate
15687 memory references (one output, one input) in a single insn. */
15690 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15693 int matching_memory;
15694 rtx src, dst, op, clob;
15699 /* If the destination is memory, and we do not have matching source
15700 operands, do things in registers. */
15701 matching_memory = 0;
15704 if (rtx_equal_p (dst, src))
15705 matching_memory = 1;
15707 dst = gen_reg_rtx (mode);
15710 /* When source operand is memory, destination must match. */
15711 if (MEM_P (src) && !matching_memory)
15712 src = force_reg (mode, src);
15714 /* Emit the instruction. */
15716 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15717 if (reload_in_progress || code == NOT)
15719 /* Reload doesn't know about the flags register, and doesn't know that
15720 it doesn't want to clobber it. */
15721 gcc_assert (code == NOT);
15726 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15727 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15730 /* Fix up the destination if needed. */
15731 if (dst != operands[0])
15732 emit_move_insn (operands[0], dst);
15735 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15736 divisor are within the the range [0-255]. */
15739 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15742 rtx end_label, qimode_label;
15743 rtx insn, div, mod;
15744 rtx scratch, tmp0, tmp1, tmp2;
15745 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15746 rtx (*gen_zero_extend) (rtx, rtx);
15747 rtx (*gen_test_ccno_1) (rtx, rtx);
15752 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15753 gen_test_ccno_1 = gen_testsi_ccno_1;
15754 gen_zero_extend = gen_zero_extendqisi2;
15757 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15758 gen_test_ccno_1 = gen_testdi_ccno_1;
15759 gen_zero_extend = gen_zero_extendqidi2;
15762 gcc_unreachable ();
15765 end_label = gen_label_rtx ();
15766 qimode_label = gen_label_rtx ();
15768 scratch = gen_reg_rtx (mode);
15770 /* Use 8bit unsigned divimod if dividend and divisor are within the
15771 the range [0-255]. */
15772 emit_move_insn (scratch, operands[2]);
15773 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15774 scratch, 1, OPTAB_DIRECT);
15775 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15776 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15777 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15778 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15779 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15781 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15782 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15783 JUMP_LABEL (insn) = qimode_label;
15785 /* Generate original signed/unsigned divimod. */
15786 div = gen_divmod4_1 (operands[0], operands[1],
15787 operands[2], operands[3]);
15790 /* Branch to the end. */
15791 emit_jump_insn (gen_jump (end_label));
15794 /* Generate 8bit unsigned divide. */
15795 emit_label (qimode_label);
15796 /* Don't use operands[0] for result of 8bit divide since not all
15797 registers support QImode ZERO_EXTRACT. */
15798 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15799 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15800 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15801 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15805 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15806 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15810 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15811 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15814 /* Extract remainder from AH. */
15815 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15816 if (REG_P (operands[1]))
15817 insn = emit_move_insn (operands[1], tmp1);
15820 /* Need a new scratch register since the old one has result
15822 scratch = gen_reg_rtx (mode);
15823 emit_move_insn (scratch, tmp1);
15824 insn = emit_move_insn (operands[1], scratch);
15826 set_unique_reg_note (insn, REG_EQUAL, mod);
15828 /* Zero extend quotient from AL. */
15829 tmp1 = gen_lowpart (QImode, tmp0);
15830 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15831 set_unique_reg_note (insn, REG_EQUAL, div);
15833 emit_label (end_label);
15836 #define LEA_SEARCH_THRESHOLD 12
15838 /* Search backward for non-agu definition of register number REGNO1
15839 or register number REGNO2 in INSN's basic block until
15840 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15841 2. Reach BB boundary, or
15842 3. Reach agu definition.
15843 Returns the distance between the non-agu definition point and INSN.
15844 If no definition point, returns -1. */
15847 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15850 basic_block bb = BLOCK_FOR_INSN (insn);
15853 enum attr_type insn_type;
15855 if (insn != BB_HEAD (bb))
15857 rtx prev = PREV_INSN (insn);
15858 while (prev && distance < LEA_SEARCH_THRESHOLD)
15860 if (NONDEBUG_INSN_P (prev))
15863 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15864 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15865 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15866 && (regno1 == DF_REF_REGNO (*def_rec)
15867 || regno2 == DF_REF_REGNO (*def_rec)))
15869 insn_type = get_attr_type (prev);
15870 if (insn_type != TYPE_LEA)
15874 if (prev == BB_HEAD (bb))
15876 prev = PREV_INSN (prev);
15880 if (distance < LEA_SEARCH_THRESHOLD)
15884 bool simple_loop = false;
15886 FOR_EACH_EDGE (e, ei, bb->preds)
15889 simple_loop = true;
15895 rtx prev = BB_END (bb);
15898 && distance < LEA_SEARCH_THRESHOLD)
15900 if (NONDEBUG_INSN_P (prev))
15903 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15904 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15905 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15906 && (regno1 == DF_REF_REGNO (*def_rec)
15907 || regno2 == DF_REF_REGNO (*def_rec)))
15909 insn_type = get_attr_type (prev);
15910 if (insn_type != TYPE_LEA)
15914 prev = PREV_INSN (prev);
15922 /* get_attr_type may modify recog data. We want to make sure
15923 that recog data is valid for instruction INSN, on which
15924 distance_non_agu_define is called. INSN is unchanged here. */
15925 extract_insn_cached (insn);
15929 /* Return the distance between INSN and the next insn that uses
15930 register number REGNO0 in memory address. Return -1 if no such
15931 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15934 distance_agu_use (unsigned int regno0, rtx insn)
15936 basic_block bb = BLOCK_FOR_INSN (insn);
15941 if (insn != BB_END (bb))
15943 rtx next = NEXT_INSN (insn);
15944 while (next && distance < LEA_SEARCH_THRESHOLD)
15946 if (NONDEBUG_INSN_P (next))
15950 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15951 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15952 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15953 && regno0 == DF_REF_REGNO (*use_rec))
15955 /* Return DISTANCE if OP0 is used in memory
15956 address in NEXT. */
15960 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15961 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15962 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15963 && regno0 == DF_REF_REGNO (*def_rec))
15965 /* Return -1 if OP0 is set in NEXT. */
15969 if (next == BB_END (bb))
15971 next = NEXT_INSN (next);
15975 if (distance < LEA_SEARCH_THRESHOLD)
15979 bool simple_loop = false;
15981 FOR_EACH_EDGE (e, ei, bb->succs)
15984 simple_loop = true;
15990 rtx next = BB_HEAD (bb);
15993 && distance < LEA_SEARCH_THRESHOLD)
15995 if (NONDEBUG_INSN_P (next))
15999 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
16000 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
16001 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
16002 && regno0 == DF_REF_REGNO (*use_rec))
16004 /* Return DISTANCE if OP0 is used in memory
16005 address in NEXT. */
16009 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
16010 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
16011 && !DF_REF_IS_ARTIFICIAL (*def_rec)
16012 && regno0 == DF_REF_REGNO (*def_rec))
16014 /* Return -1 if OP0 is set in NEXT. */
16019 next = NEXT_INSN (next);
16027 /* Define this macro to tune LEA priority vs ADD, it take effect when
16028 there is a dilemma of choicing LEA or ADD
16029 Negative value: ADD is more preferred than LEA
16031 Positive value: LEA is more preferred than ADD*/
16032 #define IX86_LEA_PRIORITY 2
16034 /* Return true if it is ok to optimize an ADD operation to LEA
16035 operation to avoid flag register consumation. For most processors,
16036 ADD is faster than LEA. For the processors like ATOM, if the
16037 destination register of LEA holds an actual address which will be
16038 used soon, LEA is better and otherwise ADD is better. */
16041 ix86_lea_for_add_ok (rtx insn, rtx operands[])
16043 unsigned int regno0 = true_regnum (operands[0]);
16044 unsigned int regno1 = true_regnum (operands[1]);
16045 unsigned int regno2 = true_regnum (operands[2]);
16047 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16048 if (regno0 != regno1 && regno0 != regno2)
16051 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
16055 int dist_define, dist_use;
16057 /* Return false if REGNO0 isn't used in memory address. */
16058 dist_use = distance_agu_use (regno0, insn);
16062 dist_define = distance_non_agu_define (regno1, regno2, insn);
16063 if (dist_define <= 0)
16066 /* If this insn has both backward non-agu dependence and forward
16067 agu dependence, the one with short distance take effect. */
16068 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
16075 /* Return true if destination reg of SET_BODY is shift count of
16079 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
16085 /* Retrieve destination of SET_BODY. */
16086 switch (GET_CODE (set_body))
16089 set_dest = SET_DEST (set_body);
16090 if (!set_dest || !REG_P (set_dest))
16094 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
16095 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
16103 /* Retrieve shift count of USE_BODY. */
16104 switch (GET_CODE (use_body))
16107 shift_rtx = XEXP (use_body, 1);
16110 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
16111 if (ix86_dep_by_shift_count_body (set_body,
16112 XVECEXP (use_body, 0, i)))
16120 && (GET_CODE (shift_rtx) == ASHIFT
16121 || GET_CODE (shift_rtx) == LSHIFTRT
16122 || GET_CODE (shift_rtx) == ASHIFTRT
16123 || GET_CODE (shift_rtx) == ROTATE
16124 || GET_CODE (shift_rtx) == ROTATERT))
16126 rtx shift_count = XEXP (shift_rtx, 1);
16128 /* Return true if shift count is dest of SET_BODY. */
16129 if (REG_P (shift_count)
16130 && true_regnum (set_dest) == true_regnum (shift_count))
16137 /* Return true if destination reg of SET_INSN is shift count of
16141 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
16143 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
16144 PATTERN (use_insn));
16147 /* Return TRUE or FALSE depending on whether the unary operator meets the
16148 appropriate constraints. */
16151 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
16152 enum machine_mode mode ATTRIBUTE_UNUSED,
16153 rtx operands[2] ATTRIBUTE_UNUSED)
16155 /* If one of operands is memory, source and destination must match. */
16156 if ((MEM_P (operands[0])
16157 || MEM_P (operands[1]))
16158 && ! rtx_equal_p (operands[0], operands[1]))
16163 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16164 are ok, keeping in mind the possible movddup alternative. */
16167 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
16169 if (MEM_P (operands[0]))
16170 return rtx_equal_p (operands[0], operands[1 + high]);
16171 if (MEM_P (operands[1]) && MEM_P (operands[2]))
16172 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
16176 /* Post-reload splitter for converting an SF or DFmode value in an
16177 SSE register into an unsigned SImode. */
16180 ix86_split_convert_uns_si_sse (rtx operands[])
16182 enum machine_mode vecmode;
16183 rtx value, large, zero_or_two31, input, two31, x;
16185 large = operands[1];
16186 zero_or_two31 = operands[2];
16187 input = operands[3];
16188 two31 = operands[4];
16189 vecmode = GET_MODE (large);
16190 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
16192 /* Load up the value into the low element. We must ensure that the other
16193 elements are valid floats -- zero is the easiest such value. */
16196 if (vecmode == V4SFmode)
16197 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
16199 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
16203 input = gen_rtx_REG (vecmode, REGNO (input));
16204 emit_move_insn (value, CONST0_RTX (vecmode));
16205 if (vecmode == V4SFmode)
16206 emit_insn (gen_sse_movss (value, value, input));
16208 emit_insn (gen_sse2_movsd (value, value, input));
16211 emit_move_insn (large, two31);
16212 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
16214 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
16215 emit_insn (gen_rtx_SET (VOIDmode, large, x));
16217 x = gen_rtx_AND (vecmode, zero_or_two31, large);
16218 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
16220 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
16221 emit_insn (gen_rtx_SET (VOIDmode, value, x));
16223 large = gen_rtx_REG (V4SImode, REGNO (large));
16224 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
16226 x = gen_rtx_REG (V4SImode, REGNO (value));
16227 if (vecmode == V4SFmode)
16228 emit_insn (gen_sse2_cvttps2dq (x, value));
16230 emit_insn (gen_sse2_cvttpd2dq (x, value));
16233 emit_insn (gen_xorv4si3 (value, value, large));
16236 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16237 Expects the 64-bit DImode to be supplied in a pair of integral
16238 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16239 -mfpmath=sse, !optimize_size only. */
16242 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
16244 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
16245 rtx int_xmm, fp_xmm;
16246 rtx biases, exponents;
16249 int_xmm = gen_reg_rtx (V4SImode);
16250 if (TARGET_INTER_UNIT_MOVES)
16251 emit_insn (gen_movdi_to_sse (int_xmm, input));
16252 else if (TARGET_SSE_SPLIT_REGS)
16254 emit_clobber (int_xmm);
16255 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
16259 x = gen_reg_rtx (V2DImode);
16260 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
16261 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
16264 x = gen_rtx_CONST_VECTOR (V4SImode,
16265 gen_rtvec (4, GEN_INT (0x43300000UL),
16266 GEN_INT (0x45300000UL),
16267 const0_rtx, const0_rtx));
16268 exponents = validize_mem (force_const_mem (V4SImode, x));
16270 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
16271 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
16273 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
16274 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
16275 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
16276 (0x1.0p84 + double(fp_value_hi_xmm)).
16277 Note these exponents differ by 32. */
16279 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
16281 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
16282 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
16283 real_ldexp (&bias_lo_rvt, &dconst1, 52);
16284 real_ldexp (&bias_hi_rvt, &dconst1, 84);
16285 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
16286 x = const_double_from_real_value (bias_hi_rvt, DFmode);
16287 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
16288 biases = validize_mem (force_const_mem (V2DFmode, biases));
16289 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
16291 /* Add the upper and lower DFmode values together. */
16293 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
16296 x = copy_to_mode_reg (V2DFmode, fp_xmm);
16297 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
16298 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
16301 ix86_expand_vector_extract (false, target, fp_xmm, 0);
16304 /* Not used, but eases macroization of patterns. */
16306 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
16307 rtx input ATTRIBUTE_UNUSED)
16309 gcc_unreachable ();
16312 /* Convert an unsigned SImode value into a DFmode. Only currently used
16313 for SSE, but applicable anywhere. */
16316 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
16318 REAL_VALUE_TYPE TWO31r;
16321 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
16322 NULL, 1, OPTAB_DIRECT);
16324 fp = gen_reg_rtx (DFmode);
16325 emit_insn (gen_floatsidf2 (fp, x));
16327 real_ldexp (&TWO31r, &dconst1, 31);
16328 x = const_double_from_real_value (TWO31r, DFmode);
16330 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
16332 emit_move_insn (target, x);
16335 /* Convert a signed DImode value into a DFmode. Only used for SSE in
16336 32-bit mode; otherwise we have a direct convert instruction. */
16339 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
16341 REAL_VALUE_TYPE TWO32r;
16342 rtx fp_lo, fp_hi, x;
16344 fp_lo = gen_reg_rtx (DFmode);
16345 fp_hi = gen_reg_rtx (DFmode);
16347 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
16349 real_ldexp (&TWO32r, &dconst1, 32);
16350 x = const_double_from_real_value (TWO32r, DFmode);
16351 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
16353 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
16355 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
16358 emit_move_insn (target, x);
16361 /* Convert an unsigned SImode value into a SFmode, using only SSE.
16362 For x86_32, -mfpmath=sse, !optimize_size only. */
16364 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
16366 REAL_VALUE_TYPE ONE16r;
16367 rtx fp_hi, fp_lo, int_hi, int_lo, x;
16369 real_ldexp (&ONE16r, &dconst1, 16);
16370 x = const_double_from_real_value (ONE16r, SFmode);
16371 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
16372 NULL, 0, OPTAB_DIRECT);
16373 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
16374 NULL, 0, OPTAB_DIRECT);
16375 fp_hi = gen_reg_rtx (SFmode);
16376 fp_lo = gen_reg_rtx (SFmode);
16377 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
16378 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
16379 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
16381 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
16383 if (!rtx_equal_p (target, fp_hi))
16384 emit_move_insn (target, fp_hi);
16387 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
16388 then replicate the value for all elements of the vector
16392 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
16399 v = gen_rtvec (4, value, value, value, value);
16400 return gen_rtx_CONST_VECTOR (V4SImode, v);
16404 v = gen_rtvec (2, value, value);
16405 return gen_rtx_CONST_VECTOR (V2DImode, v);
16409 v = gen_rtvec (8, value, value, value, value,
16410 value, value, value, value);
16412 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
16413 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16414 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
16415 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16416 return gen_rtx_CONST_VECTOR (V8SFmode, v);
16420 v = gen_rtvec (4, value, value, value, value);
16422 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
16423 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
16424 return gen_rtx_CONST_VECTOR (V4SFmode, v);
16428 v = gen_rtvec (4, value, value, value, value);
16430 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
16431 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
16432 return gen_rtx_CONST_VECTOR (V4DFmode, v);
16436 v = gen_rtvec (2, value, value);
16438 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
16439 return gen_rtx_CONST_VECTOR (V2DFmode, v);
16442 gcc_unreachable ();
16446 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
16447 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
16448 for an SSE register. If VECT is true, then replicate the mask for
16449 all elements of the vector register. If INVERT is true, then create
16450 a mask excluding the sign bit. */
16453 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
16455 enum machine_mode vec_mode, imode;
16456 HOST_WIDE_INT hi, lo;
16461 /* Find the sign bit, sign extended to 2*HWI. */
16468 mode = GET_MODE_INNER (mode);
16470 lo = 0x80000000, hi = lo < 0;
16477 mode = GET_MODE_INNER (mode);
16479 if (HOST_BITS_PER_WIDE_INT >= 64)
16480 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
16482 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16487 vec_mode = VOIDmode;
16488 if (HOST_BITS_PER_WIDE_INT >= 64)
16491 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
16498 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
16502 lo = ~lo, hi = ~hi;
16508 mask = immed_double_const (lo, hi, imode);
16510 vec = gen_rtvec (2, v, mask);
16511 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
16512 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
16519 gcc_unreachable ();
16523 lo = ~lo, hi = ~hi;
16525 /* Force this value into the low part of a fp vector constant. */
16526 mask = immed_double_const (lo, hi, imode);
16527 mask = gen_lowpart (mode, mask);
16529 if (vec_mode == VOIDmode)
16530 return force_reg (mode, mask);
16532 v = ix86_build_const_vector (vec_mode, vect, mask);
16533 return force_reg (vec_mode, v);
16536 /* Generate code for floating point ABS or NEG. */
16539 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
16542 rtx mask, set, dst, src;
16543 bool use_sse = false;
16544 bool vector_mode = VECTOR_MODE_P (mode);
16545 enum machine_mode vmode = mode;
16549 else if (mode == TFmode)
16551 else if (TARGET_SSE_MATH)
16553 use_sse = SSE_FLOAT_MODE_P (mode);
16554 if (mode == SFmode)
16556 else if (mode == DFmode)
16560 /* NEG and ABS performed with SSE use bitwise mask operations.
16561 Create the appropriate mask now. */
16563 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
16570 set = gen_rtx_fmt_e (code, mode, src);
16571 set = gen_rtx_SET (VOIDmode, dst, set);
16578 use = gen_rtx_USE (VOIDmode, mask);
16580 par = gen_rtvec (2, set, use);
16583 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16584 par = gen_rtvec (3, set, use, clob);
16586 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
16592 /* Expand a copysign operation. Special case operand 0 being a constant. */
16595 ix86_expand_copysign (rtx operands[])
16597 enum machine_mode mode, vmode;
16598 rtx dest, op0, op1, mask, nmask;
16600 dest = operands[0];
16604 mode = GET_MODE (dest);
16606 if (mode == SFmode)
16608 else if (mode == DFmode)
16613 if (GET_CODE (op0) == CONST_DOUBLE)
16615 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
16617 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
16618 op0 = simplify_unary_operation (ABS, mode, op0, mode);
16620 if (mode == SFmode || mode == DFmode)
16622 if (op0 == CONST0_RTX (mode))
16623 op0 = CONST0_RTX (vmode);
16626 rtx v = ix86_build_const_vector (vmode, false, op0);
16628 op0 = force_reg (vmode, v);
16631 else if (op0 != CONST0_RTX (mode))
16632 op0 = force_reg (mode, op0);
16634 mask = ix86_build_signbit_mask (vmode, 0, 0);
16636 if (mode == SFmode)
16637 copysign_insn = gen_copysignsf3_const;
16638 else if (mode == DFmode)
16639 copysign_insn = gen_copysigndf3_const;
16641 copysign_insn = gen_copysigntf3_const;
16643 emit_insn (copysign_insn (dest, op0, op1, mask));
16647 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16649 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16650 mask = ix86_build_signbit_mask (vmode, 0, 0);
16652 if (mode == SFmode)
16653 copysign_insn = gen_copysignsf3_var;
16654 else if (mode == DFmode)
16655 copysign_insn = gen_copysigndf3_var;
16657 copysign_insn = gen_copysigntf3_var;
16659 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16663 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16664 be a constant, and so has already been expanded into a vector constant. */
16667 ix86_split_copysign_const (rtx operands[])
16669 enum machine_mode mode, vmode;
16670 rtx dest, op0, mask, x;
16672 dest = operands[0];
16674 mask = operands[3];
16676 mode = GET_MODE (dest);
16677 vmode = GET_MODE (mask);
16679 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16680 x = gen_rtx_AND (vmode, dest, mask);
16681 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16683 if (op0 != CONST0_RTX (vmode))
16685 x = gen_rtx_IOR (vmode, dest, op0);
16686 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16690 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16691 so we have to do two masks. */
16694 ix86_split_copysign_var (rtx operands[])
16696 enum machine_mode mode, vmode;
16697 rtx dest, scratch, op0, op1, mask, nmask, x;
16699 dest = operands[0];
16700 scratch = operands[1];
16703 nmask = operands[4];
16704 mask = operands[5];
16706 mode = GET_MODE (dest);
16707 vmode = GET_MODE (mask);
16709 if (rtx_equal_p (op0, op1))
16711 /* Shouldn't happen often (it's useless, obviously), but when it does
16712 we'd generate incorrect code if we continue below. */
16713 emit_move_insn (dest, op0);
16717 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16719 gcc_assert (REGNO (op1) == REGNO (scratch));
16721 x = gen_rtx_AND (vmode, scratch, mask);
16722 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16725 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16726 x = gen_rtx_NOT (vmode, dest);
16727 x = gen_rtx_AND (vmode, x, op0);
16728 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16732 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16734 x = gen_rtx_AND (vmode, scratch, mask);
16736 else /* alternative 2,4 */
16738 gcc_assert (REGNO (mask) == REGNO (scratch));
16739 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16740 x = gen_rtx_AND (vmode, scratch, op1);
16742 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16744 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16746 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16747 x = gen_rtx_AND (vmode, dest, nmask);
16749 else /* alternative 3,4 */
16751 gcc_assert (REGNO (nmask) == REGNO (dest));
16753 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16754 x = gen_rtx_AND (vmode, dest, op0);
16756 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16759 x = gen_rtx_IOR (vmode, dest, scratch);
16760 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16763 /* Return TRUE or FALSE depending on whether the first SET in INSN
16764 has source and destination with matching CC modes, and that the
16765 CC mode is at least as constrained as REQ_MODE. */
16768 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16771 enum machine_mode set_mode;
16773 set = PATTERN (insn);
16774 if (GET_CODE (set) == PARALLEL)
16775 set = XVECEXP (set, 0, 0);
16776 gcc_assert (GET_CODE (set) == SET);
16777 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16779 set_mode = GET_MODE (SET_DEST (set));
16783 if (req_mode != CCNOmode
16784 && (req_mode != CCmode
16785 || XEXP (SET_SRC (set), 1) != const0_rtx))
16789 if (req_mode == CCGCmode)
16793 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16797 if (req_mode == CCZmode)
16808 gcc_unreachable ();
16811 return GET_MODE (SET_SRC (set)) == set_mode;
16814 /* Generate insn patterns to do an integer compare of OPERANDS. */
16817 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16819 enum machine_mode cmpmode;
16822 cmpmode = SELECT_CC_MODE (code, op0, op1);
16823 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16825 /* This is very simple, but making the interface the same as in the
16826 FP case makes the rest of the code easier. */
16827 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16828 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16830 /* Return the test that should be put into the flags user, i.e.
16831 the bcc, scc, or cmov instruction. */
16832 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16835 /* Figure out whether to use ordered or unordered fp comparisons.
16836 Return the appropriate mode to use. */
16839 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16841 /* ??? In order to make all comparisons reversible, we do all comparisons
16842 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16843 all forms trapping and nontrapping comparisons, we can make inequality
16844 comparisons trapping again, since it results in better code when using
16845 FCOM based compares. */
16846 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16850 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16852 enum machine_mode mode = GET_MODE (op0);
16854 if (SCALAR_FLOAT_MODE_P (mode))
16856 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16857 return ix86_fp_compare_mode (code);
16862 /* Only zero flag is needed. */
16863 case EQ: /* ZF=0 */
16864 case NE: /* ZF!=0 */
16866 /* Codes needing carry flag. */
16867 case GEU: /* CF=0 */
16868 case LTU: /* CF=1 */
16869 /* Detect overflow checks. They need just the carry flag. */
16870 if (GET_CODE (op0) == PLUS
16871 && rtx_equal_p (op1, XEXP (op0, 0)))
16875 case GTU: /* CF=0 & ZF=0 */
16876 case LEU: /* CF=1 | ZF=1 */
16877 /* Detect overflow checks. They need just the carry flag. */
16878 if (GET_CODE (op0) == MINUS
16879 && rtx_equal_p (op1, XEXP (op0, 0)))
16883 /* Codes possibly doable only with sign flag when
16884 comparing against zero. */
16885 case GE: /* SF=OF or SF=0 */
16886 case LT: /* SF<>OF or SF=1 */
16887 if (op1 == const0_rtx)
16890 /* For other cases Carry flag is not required. */
16892 /* Codes doable only with sign flag when comparing
16893 against zero, but we miss jump instruction for it
16894 so we need to use relational tests against overflow
16895 that thus needs to be zero. */
16896 case GT: /* ZF=0 & SF=OF */
16897 case LE: /* ZF=1 | SF<>OF */
16898 if (op1 == const0_rtx)
16902 /* strcmp pattern do (use flags) and combine may ask us for proper
16907 gcc_unreachable ();
16911 /* Return the fixed registers used for condition codes. */
16914 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16921 /* If two condition code modes are compatible, return a condition code
16922 mode which is compatible with both. Otherwise, return
16925 static enum machine_mode
16926 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16931 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16934 if ((m1 == CCGCmode && m2 == CCGOCmode)
16935 || (m1 == CCGOCmode && m2 == CCGCmode))
16941 gcc_unreachable ();
16971 /* These are only compatible with themselves, which we already
16978 /* Return a comparison we can do and that it is equivalent to
16979 swap_condition (code) apart possibly from orderedness.
16980 But, never change orderedness if TARGET_IEEE_FP, returning
16981 UNKNOWN in that case if necessary. */
16983 static enum rtx_code
16984 ix86_fp_swap_condition (enum rtx_code code)
16988 case GT: /* GTU - CF=0 & ZF=0 */
16989 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16990 case GE: /* GEU - CF=0 */
16991 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16992 case UNLT: /* LTU - CF=1 */
16993 return TARGET_IEEE_FP ? UNKNOWN : GT;
16994 case UNLE: /* LEU - CF=1 | ZF=1 */
16995 return TARGET_IEEE_FP ? UNKNOWN : GE;
16997 return swap_condition (code);
17001 /* Return cost of comparison CODE using the best strategy for performance.
17002 All following functions do use number of instructions as a cost metrics.
17003 In future this should be tweaked to compute bytes for optimize_size and
17004 take into account performance of various instructions on various CPUs. */
17007 ix86_fp_comparison_cost (enum rtx_code code)
17011 /* The cost of code using bit-twiddling on %ah. */
17028 arith_cost = TARGET_IEEE_FP ? 5 : 4;
17032 arith_cost = TARGET_IEEE_FP ? 6 : 4;
17035 gcc_unreachable ();
17038 switch (ix86_fp_comparison_strategy (code))
17040 case IX86_FPCMP_COMI:
17041 return arith_cost > 4 ? 3 : 2;
17042 case IX86_FPCMP_SAHF:
17043 return arith_cost > 4 ? 4 : 3;
17049 /* Return strategy to use for floating-point. We assume that fcomi is always
17050 preferrable where available, since that is also true when looking at size
17051 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17053 enum ix86_fpcmp_strategy
17054 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
17056 /* Do fcomi/sahf based test when profitable. */
17059 return IX86_FPCMP_COMI;
17061 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
17062 return IX86_FPCMP_SAHF;
17064 return IX86_FPCMP_ARITH;
17067 /* Swap, force into registers, or otherwise massage the two operands
17068 to a fp comparison. The operands are updated in place; the new
17069 comparison code is returned. */
17071 static enum rtx_code
17072 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
17074 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
17075 rtx op0 = *pop0, op1 = *pop1;
17076 enum machine_mode op_mode = GET_MODE (op0);
17077 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
17079 /* All of the unordered compare instructions only work on registers.
17080 The same is true of the fcomi compare instructions. The XFmode
17081 compare instructions require registers except when comparing
17082 against zero or when converting operand 1 from fixed point to
17086 && (fpcmp_mode == CCFPUmode
17087 || (op_mode == XFmode
17088 && ! (standard_80387_constant_p (op0) == 1
17089 || standard_80387_constant_p (op1) == 1)
17090 && GET_CODE (op1) != FLOAT)
17091 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
17093 op0 = force_reg (op_mode, op0);
17094 op1 = force_reg (op_mode, op1);
17098 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17099 things around if they appear profitable, otherwise force op0
17100 into a register. */
17102 if (standard_80387_constant_p (op0) == 0
17104 && ! (standard_80387_constant_p (op1) == 0
17107 enum rtx_code new_code = ix86_fp_swap_condition (code);
17108 if (new_code != UNKNOWN)
17111 tmp = op0, op0 = op1, op1 = tmp;
17117 op0 = force_reg (op_mode, op0);
17119 if (CONSTANT_P (op1))
17121 int tmp = standard_80387_constant_p (op1);
17123 op1 = validize_mem (force_const_mem (op_mode, op1));
17127 op1 = force_reg (op_mode, op1);
17130 op1 = force_reg (op_mode, op1);
17134 /* Try to rearrange the comparison to make it cheaper. */
17135 if (ix86_fp_comparison_cost (code)
17136 > ix86_fp_comparison_cost (swap_condition (code))
17137 && (REG_P (op1) || can_create_pseudo_p ()))
17140 tmp = op0, op0 = op1, op1 = tmp;
17141 code = swap_condition (code);
17143 op0 = force_reg (op_mode, op0);
17151 /* Convert comparison codes we use to represent FP comparison to integer
17152 code that will result in proper branch. Return UNKNOWN if no such code
17156 ix86_fp_compare_code_to_integer (enum rtx_code code)
17185 /* Generate insn patterns to do a floating point compare of OPERANDS. */
17188 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
17190 enum machine_mode fpcmp_mode, intcmp_mode;
17193 fpcmp_mode = ix86_fp_compare_mode (code);
17194 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
17196 /* Do fcomi/sahf based test when profitable. */
17197 switch (ix86_fp_comparison_strategy (code))
17199 case IX86_FPCMP_COMI:
17200 intcmp_mode = fpcmp_mode;
17201 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17202 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17207 case IX86_FPCMP_SAHF:
17208 intcmp_mode = fpcmp_mode;
17209 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17210 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
17214 scratch = gen_reg_rtx (HImode);
17215 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
17216 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
17219 case IX86_FPCMP_ARITH:
17220 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
17221 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
17222 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
17224 scratch = gen_reg_rtx (HImode);
17225 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
17227 /* In the unordered case, we have to check C2 for NaN's, which
17228 doesn't happen to work out to anything nice combination-wise.
17229 So do some bit twiddling on the value we've got in AH to come
17230 up with an appropriate set of condition codes. */
17232 intcmp_mode = CCNOmode;
17237 if (code == GT || !TARGET_IEEE_FP)
17239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17244 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17245 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
17247 intcmp_mode = CCmode;
17253 if (code == LT && TARGET_IEEE_FP)
17255 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17256 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
17257 intcmp_mode = CCmode;
17262 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
17268 if (code == GE || !TARGET_IEEE_FP)
17270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
17275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17276 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
17282 if (code == LE && TARGET_IEEE_FP)
17284 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17285 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
17286 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17287 intcmp_mode = CCmode;
17292 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
17298 if (code == EQ && TARGET_IEEE_FP)
17300 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17301 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
17302 intcmp_mode = CCmode;
17307 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17313 if (code == NE && TARGET_IEEE_FP)
17315 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
17316 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
17322 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
17328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17332 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
17337 gcc_unreachable ();
17345 /* Return the test that should be put into the flags user, i.e.
17346 the bcc, scc, or cmov instruction. */
17347 return gen_rtx_fmt_ee (code, VOIDmode,
17348 gen_rtx_REG (intcmp_mode, FLAGS_REG),
17353 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
17357 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
17358 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
17360 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
17362 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
17363 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17366 ret = ix86_expand_int_compare (code, op0, op1);
17372 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
17374 enum machine_mode mode = GET_MODE (op0);
17386 tmp = ix86_expand_compare (code, op0, op1);
17387 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17388 gen_rtx_LABEL_REF (VOIDmode, label),
17390 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17397 /* Expand DImode branch into multiple compare+branch. */
17399 rtx lo[2], hi[2], label2;
17400 enum rtx_code code1, code2, code3;
17401 enum machine_mode submode;
17403 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
17405 tmp = op0, op0 = op1, op1 = tmp;
17406 code = swap_condition (code);
17409 split_double_mode (mode, &op0, 1, lo+0, hi+0);
17410 split_double_mode (mode, &op1, 1, lo+1, hi+1);
17412 submode = mode == DImode ? SImode : DImode;
17414 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
17415 avoid two branches. This costs one extra insn, so disable when
17416 optimizing for size. */
17418 if ((code == EQ || code == NE)
17419 && (!optimize_insn_for_size_p ()
17420 || hi[1] == const0_rtx || lo[1] == const0_rtx))
17425 if (hi[1] != const0_rtx)
17426 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
17427 NULL_RTX, 0, OPTAB_WIDEN);
17430 if (lo[1] != const0_rtx)
17431 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
17432 NULL_RTX, 0, OPTAB_WIDEN);
17434 tmp = expand_binop (submode, ior_optab, xor1, xor0,
17435 NULL_RTX, 0, OPTAB_WIDEN);
17437 ix86_expand_branch (code, tmp, const0_rtx, label);
17441 /* Otherwise, if we are doing less-than or greater-or-equal-than,
17442 op1 is a constant and the low word is zero, then we can just
17443 examine the high word. Similarly for low word -1 and
17444 less-or-equal-than or greater-than. */
17446 if (CONST_INT_P (hi[1]))
17449 case LT: case LTU: case GE: case GEU:
17450 if (lo[1] == const0_rtx)
17452 ix86_expand_branch (code, hi[0], hi[1], label);
17456 case LE: case LEU: case GT: case GTU:
17457 if (lo[1] == constm1_rtx)
17459 ix86_expand_branch (code, hi[0], hi[1], label);
17467 /* Otherwise, we need two or three jumps. */
17469 label2 = gen_label_rtx ();
17472 code2 = swap_condition (code);
17473 code3 = unsigned_condition (code);
17477 case LT: case GT: case LTU: case GTU:
17480 case LE: code1 = LT; code2 = GT; break;
17481 case GE: code1 = GT; code2 = LT; break;
17482 case LEU: code1 = LTU; code2 = GTU; break;
17483 case GEU: code1 = GTU; code2 = LTU; break;
17485 case EQ: code1 = UNKNOWN; code2 = NE; break;
17486 case NE: code2 = UNKNOWN; break;
17489 gcc_unreachable ();
17494 * if (hi(a) < hi(b)) goto true;
17495 * if (hi(a) > hi(b)) goto false;
17496 * if (lo(a) < lo(b)) goto true;
17500 if (code1 != UNKNOWN)
17501 ix86_expand_branch (code1, hi[0], hi[1], label);
17502 if (code2 != UNKNOWN)
17503 ix86_expand_branch (code2, hi[0], hi[1], label2);
17505 ix86_expand_branch (code3, lo[0], lo[1], label);
17507 if (code2 != UNKNOWN)
17508 emit_label (label2);
17513 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
17518 /* Split branch based on floating point condition. */
17520 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
17521 rtx target1, rtx target2, rtx tmp, rtx pushed)
17526 if (target2 != pc_rtx)
17529 code = reverse_condition_maybe_unordered (code);
17534 condition = ix86_expand_fp_compare (code, op1, op2,
17537 /* Remove pushed operand from stack. */
17539 ix86_free_from_memory (GET_MODE (pushed));
17541 i = emit_jump_insn (gen_rtx_SET
17543 gen_rtx_IF_THEN_ELSE (VOIDmode,
17544 condition, target1, target2)));
17545 if (split_branch_probability >= 0)
17546 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
17550 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
17554 gcc_assert (GET_MODE (dest) == QImode);
17556 ret = ix86_expand_compare (code, op0, op1);
17557 PUT_MODE (ret, QImode);
17558 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
17561 /* Expand comparison setting or clearing carry flag. Return true when
17562 successful and set pop for the operation. */
17564 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
17566 enum machine_mode mode =
17567 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
17569 /* Do not handle double-mode compares that go through special path. */
17570 if (mode == (TARGET_64BIT ? TImode : DImode))
17573 if (SCALAR_FLOAT_MODE_P (mode))
17575 rtx compare_op, compare_seq;
17577 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
17579 /* Shortcut: following common codes never translate
17580 into carry flag compares. */
17581 if (code == EQ || code == NE || code == UNEQ || code == LTGT
17582 || code == ORDERED || code == UNORDERED)
17585 /* These comparisons require zero flag; swap operands so they won't. */
17586 if ((code == GT || code == UNLE || code == LE || code == UNGT)
17587 && !TARGET_IEEE_FP)
17592 code = swap_condition (code);
17595 /* Try to expand the comparison and verify that we end up with
17596 carry flag based comparison. This fails to be true only when
17597 we decide to expand comparison using arithmetic that is not
17598 too common scenario. */
17600 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
17601 compare_seq = get_insns ();
17604 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
17605 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
17606 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
17608 code = GET_CODE (compare_op);
17610 if (code != LTU && code != GEU)
17613 emit_insn (compare_seq);
17618 if (!INTEGRAL_MODE_P (mode))
17627 /* Convert a==0 into (unsigned)a<1. */
17630 if (op1 != const0_rtx)
17633 code = (code == EQ ? LTU : GEU);
17636 /* Convert a>b into b<a or a>=b-1. */
17639 if (CONST_INT_P (op1))
17641 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17642 /* Bail out on overflow. We still can swap operands but that
17643 would force loading of the constant into register. */
17644 if (op1 == const0_rtx
17645 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17647 code = (code == GTU ? GEU : LTU);
17654 code = (code == GTU ? LTU : GEU);
17658 /* Convert a>=0 into (unsigned)a<0x80000000. */
17661 if (mode == DImode || op1 != const0_rtx)
17663 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17664 code = (code == LT ? GEU : LTU);
17668 if (mode == DImode || op1 != constm1_rtx)
17670 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17671 code = (code == LE ? GEU : LTU);
17677 /* Swapping operands may cause constant to appear as first operand. */
17678 if (!nonimmediate_operand (op0, VOIDmode))
17680 if (!can_create_pseudo_p ())
17682 op0 = force_reg (mode, op0);
17684 *pop = ix86_expand_compare (code, op0, op1);
17685 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17690 ix86_expand_int_movcc (rtx operands[])
17692 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17693 rtx compare_seq, compare_op;
17694 enum machine_mode mode = GET_MODE (operands[0]);
17695 bool sign_bit_compare_p = false;
17696 rtx op0 = XEXP (operands[1], 0);
17697 rtx op1 = XEXP (operands[1], 1);
17700 compare_op = ix86_expand_compare (code, op0, op1);
17701 compare_seq = get_insns ();
17704 compare_code = GET_CODE (compare_op);
17706 if ((op1 == const0_rtx && (code == GE || code == LT))
17707 || (op1 == constm1_rtx && (code == GT || code == LE)))
17708 sign_bit_compare_p = true;
17710 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17711 HImode insns, we'd be swallowed in word prefix ops. */
17713 if ((mode != HImode || TARGET_FAST_PREFIX)
17714 && (mode != (TARGET_64BIT ? TImode : DImode))
17715 && CONST_INT_P (operands[2])
17716 && CONST_INT_P (operands[3]))
17718 rtx out = operands[0];
17719 HOST_WIDE_INT ct = INTVAL (operands[2]);
17720 HOST_WIDE_INT cf = INTVAL (operands[3]);
17721 HOST_WIDE_INT diff;
17724 /* Sign bit compares are better done using shifts than we do by using
17726 if (sign_bit_compare_p
17727 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17729 /* Detect overlap between destination and compare sources. */
17732 if (!sign_bit_compare_p)
17735 bool fpcmp = false;
17737 compare_code = GET_CODE (compare_op);
17739 flags = XEXP (compare_op, 0);
17741 if (GET_MODE (flags) == CCFPmode
17742 || GET_MODE (flags) == CCFPUmode)
17746 = ix86_fp_compare_code_to_integer (compare_code);
17749 /* To simplify rest of code, restrict to the GEU case. */
17750 if (compare_code == LTU)
17752 HOST_WIDE_INT tmp = ct;
17755 compare_code = reverse_condition (compare_code);
17756 code = reverse_condition (code);
17761 PUT_CODE (compare_op,
17762 reverse_condition_maybe_unordered
17763 (GET_CODE (compare_op)));
17765 PUT_CODE (compare_op,
17766 reverse_condition (GET_CODE (compare_op)));
17770 if (reg_overlap_mentioned_p (out, op0)
17771 || reg_overlap_mentioned_p (out, op1))
17772 tmp = gen_reg_rtx (mode);
17774 if (mode == DImode)
17775 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17777 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17778 flags, compare_op));
17782 if (code == GT || code == GE)
17783 code = reverse_condition (code);
17786 HOST_WIDE_INT tmp = ct;
17791 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17804 tmp = expand_simple_binop (mode, PLUS,
17806 copy_rtx (tmp), 1, OPTAB_DIRECT);
17817 tmp = expand_simple_binop (mode, IOR,
17819 copy_rtx (tmp), 1, OPTAB_DIRECT);
17821 else if (diff == -1 && ct)
17831 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17833 tmp = expand_simple_binop (mode, PLUS,
17834 copy_rtx (tmp), GEN_INT (cf),
17835 copy_rtx (tmp), 1, OPTAB_DIRECT);
17843 * andl cf - ct, dest
17853 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17856 tmp = expand_simple_binop (mode, AND,
17858 gen_int_mode (cf - ct, mode),
17859 copy_rtx (tmp), 1, OPTAB_DIRECT);
17861 tmp = expand_simple_binop (mode, PLUS,
17862 copy_rtx (tmp), GEN_INT (ct),
17863 copy_rtx (tmp), 1, OPTAB_DIRECT);
17866 if (!rtx_equal_p (tmp, out))
17867 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17874 enum machine_mode cmp_mode = GET_MODE (op0);
17877 tmp = ct, ct = cf, cf = tmp;
17880 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17882 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17884 /* We may be reversing unordered compare to normal compare, that
17885 is not valid in general (we may convert non-trapping condition
17886 to trapping one), however on i386 we currently emit all
17887 comparisons unordered. */
17888 compare_code = reverse_condition_maybe_unordered (compare_code);
17889 code = reverse_condition_maybe_unordered (code);
17893 compare_code = reverse_condition (compare_code);
17894 code = reverse_condition (code);
17898 compare_code = UNKNOWN;
17899 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17900 && CONST_INT_P (op1))
17902 if (op1 == const0_rtx
17903 && (code == LT || code == GE))
17904 compare_code = code;
17905 else if (op1 == constm1_rtx)
17909 else if (code == GT)
17914 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17915 if (compare_code != UNKNOWN
17916 && GET_MODE (op0) == GET_MODE (out)
17917 && (cf == -1 || ct == -1))
17919 /* If lea code below could be used, only optimize
17920 if it results in a 2 insn sequence. */
17922 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17923 || diff == 3 || diff == 5 || diff == 9)
17924 || (compare_code == LT && ct == -1)
17925 || (compare_code == GE && cf == -1))
17928 * notl op1 (if necessary)
17936 code = reverse_condition (code);
17939 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17941 out = expand_simple_binop (mode, IOR,
17943 out, 1, OPTAB_DIRECT);
17944 if (out != operands[0])
17945 emit_move_insn (operands[0], out);
17952 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17953 || diff == 3 || diff == 5 || diff == 9)
17954 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17956 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17962 * lea cf(dest*(ct-cf)),dest
17966 * This also catches the degenerate setcc-only case.
17972 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17975 /* On x86_64 the lea instruction operates on Pmode, so we need
17976 to get arithmetics done in proper mode to match. */
17978 tmp = copy_rtx (out);
17982 out1 = copy_rtx (out);
17983 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17987 tmp = gen_rtx_PLUS (mode, tmp, out1);
17993 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17996 if (!rtx_equal_p (tmp, out))
17999 out = force_operand (tmp, copy_rtx (out));
18001 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
18003 if (!rtx_equal_p (out, operands[0]))
18004 emit_move_insn (operands[0], copy_rtx (out));
18010 * General case: Jumpful:
18011 * xorl dest,dest cmpl op1, op2
18012 * cmpl op1, op2 movl ct, dest
18013 * setcc dest jcc 1f
18014 * decl dest movl cf, dest
18015 * andl (cf-ct),dest 1:
18018 * Size 20. Size 14.
18020 * This is reasonably steep, but branch mispredict costs are
18021 * high on modern cpus, so consider failing only if optimizing
18025 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18026 && BRANCH_COST (optimize_insn_for_speed_p (),
18031 enum machine_mode cmp_mode = GET_MODE (op0);
18036 if (SCALAR_FLOAT_MODE_P (cmp_mode))
18038 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
18040 /* We may be reversing unordered compare to normal compare,
18041 that is not valid in general (we may convert non-trapping
18042 condition to trapping one), however on i386 we currently
18043 emit all comparisons unordered. */
18044 code = reverse_condition_maybe_unordered (code);
18048 code = reverse_condition (code);
18049 if (compare_code != UNKNOWN)
18050 compare_code = reverse_condition (compare_code);
18054 if (compare_code != UNKNOWN)
18056 /* notl op1 (if needed)
18061 For x < 0 (resp. x <= -1) there will be no notl,
18062 so if possible swap the constants to get rid of the
18064 True/false will be -1/0 while code below (store flag
18065 followed by decrement) is 0/-1, so the constants need
18066 to be exchanged once more. */
18068 if (compare_code == GE || !cf)
18070 code = reverse_condition (code);
18075 HOST_WIDE_INT tmp = cf;
18080 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
18084 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
18086 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
18088 copy_rtx (out), 1, OPTAB_DIRECT);
18091 out = expand_simple_binop (mode, AND, copy_rtx (out),
18092 gen_int_mode (cf - ct, mode),
18093 copy_rtx (out), 1, OPTAB_DIRECT);
18095 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
18096 copy_rtx (out), 1, OPTAB_DIRECT);
18097 if (!rtx_equal_p (out, operands[0]))
18098 emit_move_insn (operands[0], copy_rtx (out));
18104 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
18106 /* Try a few things more with specific constants and a variable. */
18109 rtx var, orig_out, out, tmp;
18111 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18114 /* If one of the two operands is an interesting constant, load a
18115 constant with the above and mask it in with a logical operation. */
18117 if (CONST_INT_P (operands[2]))
18120 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
18121 operands[3] = constm1_rtx, op = and_optab;
18122 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
18123 operands[3] = const0_rtx, op = ior_optab;
18127 else if (CONST_INT_P (operands[3]))
18130 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
18131 operands[2] = constm1_rtx, op = and_optab;
18132 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
18133 operands[2] = const0_rtx, op = ior_optab;
18140 orig_out = operands[0];
18141 tmp = gen_reg_rtx (mode);
18144 /* Recurse to get the constant loaded. */
18145 if (ix86_expand_int_movcc (operands) == 0)
18148 /* Mask in the interesting variable. */
18149 out = expand_binop (mode, op, var, tmp, orig_out, 0,
18151 if (!rtx_equal_p (out, orig_out))
18152 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
18158 * For comparison with above,
18168 if (! nonimmediate_operand (operands[2], mode))
18169 operands[2] = force_reg (mode, operands[2]);
18170 if (! nonimmediate_operand (operands[3], mode))
18171 operands[3] = force_reg (mode, operands[3]);
18173 if (! register_operand (operands[2], VOIDmode)
18175 || ! register_operand (operands[3], VOIDmode)))
18176 operands[2] = force_reg (mode, operands[2]);
18179 && ! register_operand (operands[3], VOIDmode))
18180 operands[3] = force_reg (mode, operands[3]);
18182 emit_insn (compare_seq);
18183 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18184 gen_rtx_IF_THEN_ELSE (mode,
18185 compare_op, operands[2],
18190 /* Swap, force into registers, or otherwise massage the two operands
18191 to an sse comparison with a mask result. Thus we differ a bit from
18192 ix86_prepare_fp_compare_args which expects to produce a flags result.
18194 The DEST operand exists to help determine whether to commute commutative
18195 operators. The POP0/POP1 operands are updated in place. The new
18196 comparison code is returned, or UNKNOWN if not implementable. */
18198 static enum rtx_code
18199 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
18200 rtx *pop0, rtx *pop1)
18208 /* We have no LTGT as an operator. We could implement it with
18209 NE & ORDERED, but this requires an extra temporary. It's
18210 not clear that it's worth it. */
18217 /* These are supported directly. */
18224 /* For commutative operators, try to canonicalize the destination
18225 operand to be first in the comparison - this helps reload to
18226 avoid extra moves. */
18227 if (!dest || !rtx_equal_p (dest, *pop1))
18235 /* These are not supported directly. Swap the comparison operands
18236 to transform into something that is supported. */
18240 code = swap_condition (code);
18244 gcc_unreachable ();
18250 /* Detect conditional moves that exactly match min/max operational
18251 semantics. Note that this is IEEE safe, as long as we don't
18252 interchange the operands.
18254 Returns FALSE if this conditional move doesn't match a MIN/MAX,
18255 and TRUE if the operation is successful and instructions are emitted. */
18258 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
18259 rtx cmp_op1, rtx if_true, rtx if_false)
18261 enum machine_mode mode;
18267 else if (code == UNGE)
18270 if_true = if_false;
18276 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
18278 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
18283 mode = GET_MODE (dest);
18285 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
18286 but MODE may be a vector mode and thus not appropriate. */
18287 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
18289 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
18292 if_true = force_reg (mode, if_true);
18293 v = gen_rtvec (2, if_true, if_false);
18294 tmp = gen_rtx_UNSPEC (mode, v, u);
18298 code = is_min ? SMIN : SMAX;
18299 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
18302 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
18306 /* Expand an sse vector comparison. Return the register with the result. */
18309 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
18310 rtx op_true, rtx op_false)
18312 enum machine_mode mode = GET_MODE (dest);
18315 cmp_op0 = force_reg (mode, cmp_op0);
18316 if (!nonimmediate_operand (cmp_op1, mode))
18317 cmp_op1 = force_reg (mode, cmp_op1);
18320 || reg_overlap_mentioned_p (dest, op_true)
18321 || reg_overlap_mentioned_p (dest, op_false))
18322 dest = gen_reg_rtx (mode);
18324 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
18325 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18330 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
18331 operations. This is used for both scalar and vector conditional moves. */
18334 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
18336 enum machine_mode mode = GET_MODE (dest);
18339 if (op_false == CONST0_RTX (mode))
18341 op_true = force_reg (mode, op_true);
18342 x = gen_rtx_AND (mode, cmp, op_true);
18343 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18345 else if (op_true == CONST0_RTX (mode))
18347 op_false = force_reg (mode, op_false);
18348 x = gen_rtx_NOT (mode, cmp);
18349 x = gen_rtx_AND (mode, x, op_false);
18350 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18352 else if (TARGET_XOP)
18354 rtx pcmov = gen_rtx_SET (mode, dest,
18355 gen_rtx_IF_THEN_ELSE (mode, cmp,
18362 op_true = force_reg (mode, op_true);
18363 op_false = force_reg (mode, op_false);
18365 t2 = gen_reg_rtx (mode);
18367 t3 = gen_reg_rtx (mode);
18371 x = gen_rtx_AND (mode, op_true, cmp);
18372 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
18374 x = gen_rtx_NOT (mode, cmp);
18375 x = gen_rtx_AND (mode, x, op_false);
18376 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
18378 x = gen_rtx_IOR (mode, t3, t2);
18379 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18383 /* Expand a floating-point conditional move. Return true if successful. */
18386 ix86_expand_fp_movcc (rtx operands[])
18388 enum machine_mode mode = GET_MODE (operands[0]);
18389 enum rtx_code code = GET_CODE (operands[1]);
18390 rtx tmp, compare_op;
18391 rtx op0 = XEXP (operands[1], 0);
18392 rtx op1 = XEXP (operands[1], 1);
18394 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18396 enum machine_mode cmode;
18398 /* Since we've no cmove for sse registers, don't force bad register
18399 allocation just to gain access to it. Deny movcc when the
18400 comparison mode doesn't match the move mode. */
18401 cmode = GET_MODE (op0);
18402 if (cmode == VOIDmode)
18403 cmode = GET_MODE (op1);
18407 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
18408 if (code == UNKNOWN)
18411 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
18412 operands[2], operands[3]))
18415 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
18416 operands[2], operands[3]);
18417 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
18421 /* The floating point conditional move instructions don't directly
18422 support conditions resulting from a signed integer comparison. */
18424 compare_op = ix86_expand_compare (code, op0, op1);
18425 if (!fcmov_comparison_operator (compare_op, VOIDmode))
18427 tmp = gen_reg_rtx (QImode);
18428 ix86_expand_setcc (tmp, code, op0, op1);
18430 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
18433 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18434 gen_rtx_IF_THEN_ELSE (mode, compare_op,
18435 operands[2], operands[3])));
18440 /* Expand a floating-point vector conditional move; a vcond operation
18441 rather than a movcc operation. */
18444 ix86_expand_fp_vcond (rtx operands[])
18446 enum rtx_code code = GET_CODE (operands[3]);
18449 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
18450 &operands[4], &operands[5]);
18451 if (code == UNKNOWN)
18454 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
18455 operands[5], operands[1], operands[2]))
18458 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
18459 operands[1], operands[2]);
18460 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
18464 /* Expand a signed/unsigned integral vector conditional move. */
18467 ix86_expand_int_vcond (rtx operands[])
18469 enum machine_mode mode = GET_MODE (operands[0]);
18470 enum rtx_code code = GET_CODE (operands[3]);
18471 bool negate = false;
18474 cop0 = operands[4];
18475 cop1 = operands[5];
18477 /* XOP supports all of the comparisons on all vector int types. */
18480 /* Canonicalize the comparison to EQ, GT, GTU. */
18491 code = reverse_condition (code);
18497 code = reverse_condition (code);
18503 code = swap_condition (code);
18504 x = cop0, cop0 = cop1, cop1 = x;
18508 gcc_unreachable ();
18511 /* Only SSE4.1/SSE4.2 supports V2DImode. */
18512 if (mode == V2DImode)
18517 /* SSE4.1 supports EQ. */
18518 if (!TARGET_SSE4_1)
18524 /* SSE4.2 supports GT/GTU. */
18525 if (!TARGET_SSE4_2)
18530 gcc_unreachable ();
18534 /* Unsigned parallel compare is not supported by the hardware.
18535 Play some tricks to turn this into a signed comparison
18539 cop0 = force_reg (mode, cop0);
18547 rtx (*gen_sub3) (rtx, rtx, rtx);
18549 /* Subtract (-(INT MAX) - 1) from both operands to make
18551 mask = ix86_build_signbit_mask (mode, true, false);
18552 gen_sub3 = (mode == V4SImode
18553 ? gen_subv4si3 : gen_subv2di3);
18554 t1 = gen_reg_rtx (mode);
18555 emit_insn (gen_sub3 (t1, cop0, mask));
18557 t2 = gen_reg_rtx (mode);
18558 emit_insn (gen_sub3 (t2, cop1, mask));
18568 /* Perform a parallel unsigned saturating subtraction. */
18569 x = gen_reg_rtx (mode);
18570 emit_insn (gen_rtx_SET (VOIDmode, x,
18571 gen_rtx_US_MINUS (mode, cop0, cop1)));
18574 cop1 = CONST0_RTX (mode);
18580 gcc_unreachable ();
18585 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
18586 operands[1+negate], operands[2-negate]);
18588 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
18589 operands[2-negate]);
18593 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
18594 true if we should do zero extension, else sign extension. HIGH_P is
18595 true if we want the N/2 high elements, else the low elements. */
18598 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18600 enum machine_mode imode = GET_MODE (operands[1]);
18601 rtx (*unpack)(rtx, rtx, rtx);
18608 unpack = gen_vec_interleave_highv16qi;
18610 unpack = gen_vec_interleave_lowv16qi;
18614 unpack = gen_vec_interleave_highv8hi;
18616 unpack = gen_vec_interleave_lowv8hi;
18620 unpack = gen_vec_interleave_highv4si;
18622 unpack = gen_vec_interleave_lowv4si;
18625 gcc_unreachable ();
18628 dest = gen_lowpart (imode, operands[0]);
18631 se = force_reg (imode, CONST0_RTX (imode));
18633 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18634 operands[1], pc_rtx, pc_rtx);
18636 emit_insn (unpack (dest, operands[1], se));
18639 /* This function performs the same task as ix86_expand_sse_unpack,
18640 but with SSE4.1 instructions. */
18643 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18645 enum machine_mode imode = GET_MODE (operands[1]);
18646 rtx (*unpack)(rtx, rtx);
18653 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18655 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18659 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18661 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18665 unpack = gen_sse4_1_zero_extendv2siv2di2;
18667 unpack = gen_sse4_1_sign_extendv2siv2di2;
18670 gcc_unreachable ();
18673 dest = operands[0];
18676 /* Shift higher 8 bytes to lower 8 bytes. */
18677 src = gen_reg_rtx (imode);
18678 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18679 gen_lowpart (V1TImode, operands[1]),
18685 emit_insn (unpack (dest, src));
18688 /* Expand conditional increment or decrement using adb/sbb instructions.
18689 The default case using setcc followed by the conditional move can be
18690 done by generic code. */
18692 ix86_expand_int_addcc (rtx operands[])
18694 enum rtx_code code = GET_CODE (operands[1]);
18696 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18698 rtx val = const0_rtx;
18699 bool fpcmp = false;
18700 enum machine_mode mode;
18701 rtx op0 = XEXP (operands[1], 0);
18702 rtx op1 = XEXP (operands[1], 1);
18704 if (operands[3] != const1_rtx
18705 && operands[3] != constm1_rtx)
18707 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18709 code = GET_CODE (compare_op);
18711 flags = XEXP (compare_op, 0);
18713 if (GET_MODE (flags) == CCFPmode
18714 || GET_MODE (flags) == CCFPUmode)
18717 code = ix86_fp_compare_code_to_integer (code);
18724 PUT_CODE (compare_op,
18725 reverse_condition_maybe_unordered
18726 (GET_CODE (compare_op)));
18728 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18731 mode = GET_MODE (operands[0]);
18733 /* Construct either adc or sbb insn. */
18734 if ((code == LTU) == (operands[3] == constm1_rtx))
18739 insn = gen_subqi3_carry;
18742 insn = gen_subhi3_carry;
18745 insn = gen_subsi3_carry;
18748 insn = gen_subdi3_carry;
18751 gcc_unreachable ();
18759 insn = gen_addqi3_carry;
18762 insn = gen_addhi3_carry;
18765 insn = gen_addsi3_carry;
18768 insn = gen_adddi3_carry;
18771 gcc_unreachable ();
18774 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18780 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18781 but works for floating pointer parameters and nonoffsetable memories.
18782 For pushes, it returns just stack offsets; the values will be saved
18783 in the right order. Maximally three parts are generated. */
18786 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18791 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18793 size = (GET_MODE_SIZE (mode) + 4) / 8;
18795 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18796 gcc_assert (size >= 2 && size <= 4);
18798 /* Optimize constant pool reference to immediates. This is used by fp
18799 moves, that force all constants to memory to allow combining. */
18800 if (MEM_P (operand) && MEM_READONLY_P (operand))
18802 rtx tmp = maybe_get_pool_constant (operand);
18807 if (MEM_P (operand) && !offsettable_memref_p (operand))
18809 /* The only non-offsetable memories we handle are pushes. */
18810 int ok = push_operand (operand, VOIDmode);
18814 operand = copy_rtx (operand);
18815 PUT_MODE (operand, Pmode);
18816 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18820 if (GET_CODE (operand) == CONST_VECTOR)
18822 enum machine_mode imode = int_mode_for_mode (mode);
18823 /* Caution: if we looked through a constant pool memory above,
18824 the operand may actually have a different mode now. That's
18825 ok, since we want to pun this all the way back to an integer. */
18826 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18827 gcc_assert (operand != NULL);
18833 if (mode == DImode)
18834 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18839 if (REG_P (operand))
18841 gcc_assert (reload_completed);
18842 for (i = 0; i < size; i++)
18843 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18845 else if (offsettable_memref_p (operand))
18847 operand = adjust_address (operand, SImode, 0);
18848 parts[0] = operand;
18849 for (i = 1; i < size; i++)
18850 parts[i] = adjust_address (operand, SImode, 4 * i);
18852 else if (GET_CODE (operand) == CONST_DOUBLE)
18857 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18861 real_to_target (l, &r, mode);
18862 parts[3] = gen_int_mode (l[3], SImode);
18863 parts[2] = gen_int_mode (l[2], SImode);
18866 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18867 parts[2] = gen_int_mode (l[2], SImode);
18870 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18873 gcc_unreachable ();
18875 parts[1] = gen_int_mode (l[1], SImode);
18876 parts[0] = gen_int_mode (l[0], SImode);
18879 gcc_unreachable ();
18884 if (mode == TImode)
18885 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18886 if (mode == XFmode || mode == TFmode)
18888 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18889 if (REG_P (operand))
18891 gcc_assert (reload_completed);
18892 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18893 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18895 else if (offsettable_memref_p (operand))
18897 operand = adjust_address (operand, DImode, 0);
18898 parts[0] = operand;
18899 parts[1] = adjust_address (operand, upper_mode, 8);
18901 else if (GET_CODE (operand) == CONST_DOUBLE)
18906 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18907 real_to_target (l, &r, mode);
18909 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18910 if (HOST_BITS_PER_WIDE_INT >= 64)
18913 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18914 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18917 parts[0] = immed_double_const (l[0], l[1], DImode);
18919 if (upper_mode == SImode)
18920 parts[1] = gen_int_mode (l[2], SImode);
18921 else if (HOST_BITS_PER_WIDE_INT >= 64)
18924 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18925 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18928 parts[1] = immed_double_const (l[2], l[3], DImode);
18931 gcc_unreachable ();
18938 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18939 Return false when normal moves are needed; true when all required
18940 insns have been emitted. Operands 2-4 contain the input values
18941 int the correct order; operands 5-7 contain the output values. */
18944 ix86_split_long_move (rtx operands[])
18949 int collisions = 0;
18950 enum machine_mode mode = GET_MODE (operands[0]);
18951 bool collisionparts[4];
18953 /* The DFmode expanders may ask us to move double.
18954 For 64bit target this is single move. By hiding the fact
18955 here we simplify i386.md splitters. */
18956 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18958 /* Optimize constant pool reference to immediates. This is used by
18959 fp moves, that force all constants to memory to allow combining. */
18961 if (MEM_P (operands[1])
18962 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18963 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18964 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18965 if (push_operand (operands[0], VOIDmode))
18967 operands[0] = copy_rtx (operands[0]);
18968 PUT_MODE (operands[0], Pmode);
18971 operands[0] = gen_lowpart (DImode, operands[0]);
18972 operands[1] = gen_lowpart (DImode, operands[1]);
18973 emit_move_insn (operands[0], operands[1]);
18977 /* The only non-offsettable memory we handle is push. */
18978 if (push_operand (operands[0], VOIDmode))
18981 gcc_assert (!MEM_P (operands[0])
18982 || offsettable_memref_p (operands[0]));
18984 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18985 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18987 /* When emitting push, take care for source operands on the stack. */
18988 if (push && MEM_P (operands[1])
18989 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18991 rtx src_base = XEXP (part[1][nparts - 1], 0);
18993 /* Compensate for the stack decrement by 4. */
18994 if (!TARGET_64BIT && nparts == 3
18995 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18996 src_base = plus_constant (src_base, 4);
18998 /* src_base refers to the stack pointer and is
18999 automatically decreased by emitted push. */
19000 for (i = 0; i < nparts; i++)
19001 part[1][i] = change_address (part[1][i],
19002 GET_MODE (part[1][i]), src_base);
19005 /* We need to do copy in the right order in case an address register
19006 of the source overlaps the destination. */
19007 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
19011 for (i = 0; i < nparts; i++)
19014 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
19015 if (collisionparts[i])
19019 /* Collision in the middle part can be handled by reordering. */
19020 if (collisions == 1 && nparts == 3 && collisionparts [1])
19022 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19023 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19025 else if (collisions == 1
19027 && (collisionparts [1] || collisionparts [2]))
19029 if (collisionparts [1])
19031 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
19032 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
19036 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
19037 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
19041 /* If there are more collisions, we can't handle it by reordering.
19042 Do an lea to the last part and use only one colliding move. */
19043 else if (collisions > 1)
19049 base = part[0][nparts - 1];
19051 /* Handle the case when the last part isn't valid for lea.
19052 Happens in 64-bit mode storing the 12-byte XFmode. */
19053 if (GET_MODE (base) != Pmode)
19054 base = gen_rtx_REG (Pmode, REGNO (base));
19056 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
19057 part[1][0] = replace_equiv_address (part[1][0], base);
19058 for (i = 1; i < nparts; i++)
19060 tmp = plus_constant (base, UNITS_PER_WORD * i);
19061 part[1][i] = replace_equiv_address (part[1][i], tmp);
19072 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
19073 emit_insn (gen_addsi3 (stack_pointer_rtx,
19074 stack_pointer_rtx, GEN_INT (-4)));
19075 emit_move_insn (part[0][2], part[1][2]);
19077 else if (nparts == 4)
19079 emit_move_insn (part[0][3], part[1][3]);
19080 emit_move_insn (part[0][2], part[1][2]);
19085 /* In 64bit mode we don't have 32bit push available. In case this is
19086 register, it is OK - we will just use larger counterpart. We also
19087 retype memory - these comes from attempt to avoid REX prefix on
19088 moving of second half of TFmode value. */
19089 if (GET_MODE (part[1][1]) == SImode)
19091 switch (GET_CODE (part[1][1]))
19094 part[1][1] = adjust_address (part[1][1], DImode, 0);
19098 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
19102 gcc_unreachable ();
19105 if (GET_MODE (part[1][0]) == SImode)
19106 part[1][0] = part[1][1];
19109 emit_move_insn (part[0][1], part[1][1]);
19110 emit_move_insn (part[0][0], part[1][0]);
19114 /* Choose correct order to not overwrite the source before it is copied. */
19115 if ((REG_P (part[0][0])
19116 && REG_P (part[1][1])
19117 && (REGNO (part[0][0]) == REGNO (part[1][1])
19119 && REGNO (part[0][0]) == REGNO (part[1][2]))
19121 && REGNO (part[0][0]) == REGNO (part[1][3]))))
19123 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
19125 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
19127 operands[2 + i] = part[0][j];
19128 operands[6 + i] = part[1][j];
19133 for (i = 0; i < nparts; i++)
19135 operands[2 + i] = part[0][i];
19136 operands[6 + i] = part[1][i];
19140 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
19141 if (optimize_insn_for_size_p ())
19143 for (j = 0; j < nparts - 1; j++)
19144 if (CONST_INT_P (operands[6 + j])
19145 && operands[6 + j] != const0_rtx
19146 && REG_P (operands[2 + j]))
19147 for (i = j; i < nparts - 1; i++)
19148 if (CONST_INT_P (operands[7 + i])
19149 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
19150 operands[7 + i] = operands[2 + j];
19153 for (i = 0; i < nparts; i++)
19154 emit_move_insn (operands[2 + i], operands[6 + i]);
19159 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
19160 left shift by a constant, either using a single shift or
19161 a sequence of add instructions. */
19164 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
19166 rtx (*insn)(rtx, rtx, rtx);
19169 || (count * ix86_cost->add <= ix86_cost->shift_const
19170 && !optimize_insn_for_size_p ()))
19172 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
19173 while (count-- > 0)
19174 emit_insn (insn (operand, operand, operand));
19178 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19179 emit_insn (insn (operand, operand, GEN_INT (count)));
19184 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
19186 rtx (*gen_ashl3)(rtx, rtx, rtx);
19187 rtx (*gen_shld)(rtx, rtx, rtx);
19188 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19190 rtx low[2], high[2];
19193 if (CONST_INT_P (operands[2]))
19195 split_double_mode (mode, operands, 2, low, high);
19196 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19198 if (count >= half_width)
19200 emit_move_insn (high[0], low[1]);
19201 emit_move_insn (low[0], const0_rtx);
19203 if (count > half_width)
19204 ix86_expand_ashl_const (high[0], count - half_width, mode);
19208 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19210 if (!rtx_equal_p (operands[0], operands[1]))
19211 emit_move_insn (operands[0], operands[1]);
19213 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
19214 ix86_expand_ashl_const (low[0], count, mode);
19219 split_double_mode (mode, operands, 1, low, high);
19221 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
19223 if (operands[1] == const1_rtx)
19225 /* Assuming we've chosen a QImode capable registers, then 1 << N
19226 can be done with two 32/64-bit shifts, no branches, no cmoves. */
19227 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
19229 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
19231 ix86_expand_clear (low[0]);
19232 ix86_expand_clear (high[0]);
19233 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
19235 d = gen_lowpart (QImode, low[0]);
19236 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19237 s = gen_rtx_EQ (QImode, flags, const0_rtx);
19238 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19240 d = gen_lowpart (QImode, high[0]);
19241 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
19242 s = gen_rtx_NE (QImode, flags, const0_rtx);
19243 emit_insn (gen_rtx_SET (VOIDmode, d, s));
19246 /* Otherwise, we can get the same results by manually performing
19247 a bit extract operation on bit 5/6, and then performing the two
19248 shifts. The two methods of getting 0/1 into low/high are exactly
19249 the same size. Avoiding the shift in the bit extract case helps
19250 pentium4 a bit; no one else seems to care much either way. */
19253 enum machine_mode half_mode;
19254 rtx (*gen_lshr3)(rtx, rtx, rtx);
19255 rtx (*gen_and3)(rtx, rtx, rtx);
19256 rtx (*gen_xor3)(rtx, rtx, rtx);
19257 HOST_WIDE_INT bits;
19260 if (mode == DImode)
19262 half_mode = SImode;
19263 gen_lshr3 = gen_lshrsi3;
19264 gen_and3 = gen_andsi3;
19265 gen_xor3 = gen_xorsi3;
19270 half_mode = DImode;
19271 gen_lshr3 = gen_lshrdi3;
19272 gen_and3 = gen_anddi3;
19273 gen_xor3 = gen_xordi3;
19277 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
19278 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
19280 x = gen_lowpart (half_mode, operands[2]);
19281 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
19283 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
19284 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
19285 emit_move_insn (low[0], high[0]);
19286 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
19289 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19290 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
19294 if (operands[1] == constm1_rtx)
19296 /* For -1 << N, we can avoid the shld instruction, because we
19297 know that we're shifting 0...31/63 ones into a -1. */
19298 emit_move_insn (low[0], constm1_rtx);
19299 if (optimize_insn_for_size_p ())
19300 emit_move_insn (high[0], low[0]);
19302 emit_move_insn (high[0], constm1_rtx);
19306 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
19308 if (!rtx_equal_p (operands[0], operands[1]))
19309 emit_move_insn (operands[0], operands[1]);
19311 split_double_mode (mode, operands, 1, low, high);
19312 emit_insn (gen_shld (high[0], low[0], operands[2]));
19315 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
19317 if (TARGET_CMOVE && scratch)
19319 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19320 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19322 ix86_expand_clear (scratch);
19323 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
19327 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19328 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19330 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
19335 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
19337 rtx (*gen_ashr3)(rtx, rtx, rtx)
19338 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
19339 rtx (*gen_shrd)(rtx, rtx, rtx);
19340 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19342 rtx low[2], high[2];
19345 if (CONST_INT_P (operands[2]))
19347 split_double_mode (mode, operands, 2, low, high);
19348 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19350 if (count == GET_MODE_BITSIZE (mode) - 1)
19352 emit_move_insn (high[0], high[1]);
19353 emit_insn (gen_ashr3 (high[0], high[0],
19354 GEN_INT (half_width - 1)));
19355 emit_move_insn (low[0], high[0]);
19358 else if (count >= half_width)
19360 emit_move_insn (low[0], high[1]);
19361 emit_move_insn (high[0], low[0]);
19362 emit_insn (gen_ashr3 (high[0], high[0],
19363 GEN_INT (half_width - 1)));
19365 if (count > half_width)
19366 emit_insn (gen_ashr3 (low[0], low[0],
19367 GEN_INT (count - half_width)));
19371 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19373 if (!rtx_equal_p (operands[0], operands[1]))
19374 emit_move_insn (operands[0], operands[1]);
19376 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19377 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
19382 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19384 if (!rtx_equal_p (operands[0], operands[1]))
19385 emit_move_insn (operands[0], operands[1]);
19387 split_double_mode (mode, operands, 1, low, high);
19389 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19390 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
19392 if (TARGET_CMOVE && scratch)
19394 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19395 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19397 emit_move_insn (scratch, high[0]);
19398 emit_insn (gen_ashr3 (scratch, scratch,
19399 GEN_INT (half_width - 1)));
19400 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19405 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
19406 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
19408 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
19414 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
19416 rtx (*gen_lshr3)(rtx, rtx, rtx)
19417 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
19418 rtx (*gen_shrd)(rtx, rtx, rtx);
19419 int half_width = GET_MODE_BITSIZE (mode) >> 1;
19421 rtx low[2], high[2];
19424 if (CONST_INT_P (operands[2]))
19426 split_double_mode (mode, operands, 2, low, high);
19427 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
19429 if (count >= half_width)
19431 emit_move_insn (low[0], high[1]);
19432 ix86_expand_clear (high[0]);
19434 if (count > half_width)
19435 emit_insn (gen_lshr3 (low[0], low[0],
19436 GEN_INT (count - half_width)));
19440 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19442 if (!rtx_equal_p (operands[0], operands[1]))
19443 emit_move_insn (operands[0], operands[1]);
19445 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
19446 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
19451 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
19453 if (!rtx_equal_p (operands[0], operands[1]))
19454 emit_move_insn (operands[0], operands[1]);
19456 split_double_mode (mode, operands, 1, low, high);
19458 emit_insn (gen_shrd (low[0], high[0], operands[2]));
19459 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
19461 if (TARGET_CMOVE && scratch)
19463 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
19464 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
19466 ix86_expand_clear (scratch);
19467 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
19472 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
19473 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
19475 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
19480 /* Predict just emitted jump instruction to be taken with probability PROB. */
19482 predict_jump (int prob)
19484 rtx insn = get_last_insn ();
19485 gcc_assert (JUMP_P (insn));
19486 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
19489 /* Helper function for the string operations below. Dest VARIABLE whether
19490 it is aligned to VALUE bytes. If true, jump to the label. */
19492 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
19494 rtx label = gen_label_rtx ();
19495 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
19496 if (GET_MODE (variable) == DImode)
19497 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
19499 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
19500 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
19503 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19505 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19509 /* Adjust COUNTER by the VALUE. */
19511 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
19513 rtx (*gen_add)(rtx, rtx, rtx)
19514 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
19516 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
19519 /* Zero extend possibly SImode EXP to Pmode register. */
19521 ix86_zero_extend_to_Pmode (rtx exp)
19524 if (GET_MODE (exp) == VOIDmode)
19525 return force_reg (Pmode, exp);
19526 if (GET_MODE (exp) == Pmode)
19527 return copy_to_mode_reg (Pmode, exp);
19528 r = gen_reg_rtx (Pmode);
19529 emit_insn (gen_zero_extendsidi2 (r, exp));
19533 /* Divide COUNTREG by SCALE. */
19535 scale_counter (rtx countreg, int scale)
19541 if (CONST_INT_P (countreg))
19542 return GEN_INT (INTVAL (countreg) / scale);
19543 gcc_assert (REG_P (countreg));
19545 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
19546 GEN_INT (exact_log2 (scale)),
19547 NULL, 1, OPTAB_DIRECT);
19551 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
19552 DImode for constant loop counts. */
19554 static enum machine_mode
19555 counter_mode (rtx count_exp)
19557 if (GET_MODE (count_exp) != VOIDmode)
19558 return GET_MODE (count_exp);
19559 if (!CONST_INT_P (count_exp))
19561 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
19566 /* When SRCPTR is non-NULL, output simple loop to move memory
19567 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
19568 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
19569 equivalent loop to set memory by VALUE (supposed to be in MODE).
19571 The size is rounded down to whole number of chunk size moved at once.
19572 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
19576 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
19577 rtx destptr, rtx srcptr, rtx value,
19578 rtx count, enum machine_mode mode, int unroll,
19581 rtx out_label, top_label, iter, tmp;
19582 enum machine_mode iter_mode = counter_mode (count);
19583 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
19584 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
19590 top_label = gen_label_rtx ();
19591 out_label = gen_label_rtx ();
19592 iter = gen_reg_rtx (iter_mode);
19594 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
19595 NULL, 1, OPTAB_DIRECT);
19596 /* Those two should combine. */
19597 if (piece_size == const1_rtx)
19599 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
19601 predict_jump (REG_BR_PROB_BASE * 10 / 100);
19603 emit_move_insn (iter, const0_rtx);
19605 emit_label (top_label);
19607 tmp = convert_modes (Pmode, iter_mode, iter, true);
19608 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
19609 destmem = change_address (destmem, mode, x_addr);
19613 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
19614 srcmem = change_address (srcmem, mode, y_addr);
19616 /* When unrolling for chips that reorder memory reads and writes,
19617 we can save registers by using single temporary.
19618 Also using 4 temporaries is overkill in 32bit mode. */
19619 if (!TARGET_64BIT && 0)
19621 for (i = 0; i < unroll; i++)
19626 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19628 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19630 emit_move_insn (destmem, srcmem);
19636 gcc_assert (unroll <= 4);
19637 for (i = 0; i < unroll; i++)
19639 tmpreg[i] = gen_reg_rtx (mode);
19643 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19645 emit_move_insn (tmpreg[i], srcmem);
19647 for (i = 0; i < unroll; i++)
19652 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19654 emit_move_insn (destmem, tmpreg[i]);
19659 for (i = 0; i < unroll; i++)
19663 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19664 emit_move_insn (destmem, value);
19667 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19668 true, OPTAB_LIB_WIDEN);
19670 emit_move_insn (iter, tmp);
19672 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19674 if (expected_size != -1)
19676 expected_size /= GET_MODE_SIZE (mode) * unroll;
19677 if (expected_size == 0)
19679 else if (expected_size > REG_BR_PROB_BASE)
19680 predict_jump (REG_BR_PROB_BASE - 1);
19682 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19685 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19686 iter = ix86_zero_extend_to_Pmode (iter);
19687 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19688 true, OPTAB_LIB_WIDEN);
19689 if (tmp != destptr)
19690 emit_move_insn (destptr, tmp);
19693 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19694 true, OPTAB_LIB_WIDEN);
19696 emit_move_insn (srcptr, tmp);
19698 emit_label (out_label);
19701 /* Output "rep; mov" instruction.
19702 Arguments have same meaning as for previous function */
19704 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19705 rtx destptr, rtx srcptr,
19707 enum machine_mode mode)
19713 /* If the size is known, it is shorter to use rep movs. */
19714 if (mode == QImode && CONST_INT_P (count)
19715 && !(INTVAL (count) & 3))
19718 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19719 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19720 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19721 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19722 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19723 if (mode != QImode)
19725 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19726 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19727 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19728 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19729 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19730 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19734 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19735 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19737 if (CONST_INT_P (count))
19739 count = GEN_INT (INTVAL (count)
19740 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19741 destmem = shallow_copy_rtx (destmem);
19742 srcmem = shallow_copy_rtx (srcmem);
19743 set_mem_size (destmem, count);
19744 set_mem_size (srcmem, count);
19748 if (MEM_SIZE (destmem))
19749 set_mem_size (destmem, NULL_RTX);
19750 if (MEM_SIZE (srcmem))
19751 set_mem_size (srcmem, NULL_RTX);
19753 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19757 /* Output "rep; stos" instruction.
19758 Arguments have same meaning as for previous function */
19760 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19761 rtx count, enum machine_mode mode,
19767 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19768 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19769 value = force_reg (mode, gen_lowpart (mode, value));
19770 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19771 if (mode != QImode)
19773 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19774 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19775 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19778 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19779 if (orig_value == const0_rtx && CONST_INT_P (count))
19781 count = GEN_INT (INTVAL (count)
19782 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19783 destmem = shallow_copy_rtx (destmem);
19784 set_mem_size (destmem, count);
19786 else if (MEM_SIZE (destmem))
19787 set_mem_size (destmem, NULL_RTX);
19788 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19792 emit_strmov (rtx destmem, rtx srcmem,
19793 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19795 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19796 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19797 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19800 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19802 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19803 rtx destptr, rtx srcptr, rtx count, int max_size)
19806 if (CONST_INT_P (count))
19808 HOST_WIDE_INT countval = INTVAL (count);
19811 if ((countval & 0x10) && max_size > 16)
19815 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19816 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19819 gcc_unreachable ();
19822 if ((countval & 0x08) && max_size > 8)
19825 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19828 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19829 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19833 if ((countval & 0x04) && max_size > 4)
19835 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19838 if ((countval & 0x02) && max_size > 2)
19840 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19843 if ((countval & 0x01) && max_size > 1)
19845 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19852 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19853 count, 1, OPTAB_DIRECT);
19854 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19855 count, QImode, 1, 4);
19859 /* When there are stringops, we can cheaply increase dest and src pointers.
19860 Otherwise we save code size by maintaining offset (zero is readily
19861 available from preceding rep operation) and using x86 addressing modes.
19863 if (TARGET_SINGLE_STRINGOP)
19867 rtx label = ix86_expand_aligntest (count, 4, true);
19868 src = change_address (srcmem, SImode, srcptr);
19869 dest = change_address (destmem, SImode, destptr);
19870 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19871 emit_label (label);
19872 LABEL_NUSES (label) = 1;
19876 rtx label = ix86_expand_aligntest (count, 2, true);
19877 src = change_address (srcmem, HImode, srcptr);
19878 dest = change_address (destmem, HImode, destptr);
19879 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19880 emit_label (label);
19881 LABEL_NUSES (label) = 1;
19885 rtx label = ix86_expand_aligntest (count, 1, true);
19886 src = change_address (srcmem, QImode, srcptr);
19887 dest = change_address (destmem, QImode, destptr);
19888 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19889 emit_label (label);
19890 LABEL_NUSES (label) = 1;
19895 rtx offset = force_reg (Pmode, const0_rtx);
19900 rtx label = ix86_expand_aligntest (count, 4, true);
19901 src = change_address (srcmem, SImode, srcptr);
19902 dest = change_address (destmem, SImode, destptr);
19903 emit_move_insn (dest, src);
19904 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19905 true, OPTAB_LIB_WIDEN);
19907 emit_move_insn (offset, tmp);
19908 emit_label (label);
19909 LABEL_NUSES (label) = 1;
19913 rtx label = ix86_expand_aligntest (count, 2, true);
19914 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19915 src = change_address (srcmem, HImode, tmp);
19916 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19917 dest = change_address (destmem, HImode, tmp);
19918 emit_move_insn (dest, src);
19919 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19920 true, OPTAB_LIB_WIDEN);
19922 emit_move_insn (offset, tmp);
19923 emit_label (label);
19924 LABEL_NUSES (label) = 1;
19928 rtx label = ix86_expand_aligntest (count, 1, true);
19929 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19930 src = change_address (srcmem, QImode, tmp);
19931 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19932 dest = change_address (destmem, QImode, tmp);
19933 emit_move_insn (dest, src);
19934 emit_label (label);
19935 LABEL_NUSES (label) = 1;
19940 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19942 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19943 rtx count, int max_size)
19946 expand_simple_binop (counter_mode (count), AND, count,
19947 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19948 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19949 gen_lowpart (QImode, value), count, QImode,
19953 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19955 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19959 if (CONST_INT_P (count))
19961 HOST_WIDE_INT countval = INTVAL (count);
19964 if ((countval & 0x10) && max_size > 16)
19968 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19969 emit_insn (gen_strset (destptr, dest, value));
19970 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19971 emit_insn (gen_strset (destptr, dest, value));
19974 gcc_unreachable ();
19977 if ((countval & 0x08) && max_size > 8)
19981 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19982 emit_insn (gen_strset (destptr, dest, value));
19986 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19987 emit_insn (gen_strset (destptr, dest, value));
19988 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19989 emit_insn (gen_strset (destptr, dest, value));
19993 if ((countval & 0x04) && max_size > 4)
19995 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19996 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19999 if ((countval & 0x02) && max_size > 2)
20001 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
20002 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20005 if ((countval & 0x01) && max_size > 1)
20007 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
20008 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20015 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
20020 rtx label = ix86_expand_aligntest (count, 16, true);
20023 dest = change_address (destmem, DImode, destptr);
20024 emit_insn (gen_strset (destptr, dest, value));
20025 emit_insn (gen_strset (destptr, dest, value));
20029 dest = change_address (destmem, SImode, destptr);
20030 emit_insn (gen_strset (destptr, dest, value));
20031 emit_insn (gen_strset (destptr, dest, value));
20032 emit_insn (gen_strset (destptr, dest, value));
20033 emit_insn (gen_strset (destptr, dest, value));
20035 emit_label (label);
20036 LABEL_NUSES (label) = 1;
20040 rtx label = ix86_expand_aligntest (count, 8, true);
20043 dest = change_address (destmem, DImode, destptr);
20044 emit_insn (gen_strset (destptr, dest, value));
20048 dest = change_address (destmem, SImode, destptr);
20049 emit_insn (gen_strset (destptr, dest, value));
20050 emit_insn (gen_strset (destptr, dest, value));
20052 emit_label (label);
20053 LABEL_NUSES (label) = 1;
20057 rtx label = ix86_expand_aligntest (count, 4, true);
20058 dest = change_address (destmem, SImode, destptr);
20059 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
20060 emit_label (label);
20061 LABEL_NUSES (label) = 1;
20065 rtx label = ix86_expand_aligntest (count, 2, true);
20066 dest = change_address (destmem, HImode, destptr);
20067 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
20068 emit_label (label);
20069 LABEL_NUSES (label) = 1;
20073 rtx label = ix86_expand_aligntest (count, 1, true);
20074 dest = change_address (destmem, QImode, destptr);
20075 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
20076 emit_label (label);
20077 LABEL_NUSES (label) = 1;
20081 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
20082 DESIRED_ALIGNMENT. */
20084 expand_movmem_prologue (rtx destmem, rtx srcmem,
20085 rtx destptr, rtx srcptr, rtx count,
20086 int align, int desired_alignment)
20088 if (align <= 1 && desired_alignment > 1)
20090 rtx label = ix86_expand_aligntest (destptr, 1, false);
20091 srcmem = change_address (srcmem, QImode, srcptr);
20092 destmem = change_address (destmem, QImode, destptr);
20093 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20094 ix86_adjust_counter (count, 1);
20095 emit_label (label);
20096 LABEL_NUSES (label) = 1;
20098 if (align <= 2 && desired_alignment > 2)
20100 rtx label = ix86_expand_aligntest (destptr, 2, false);
20101 srcmem = change_address (srcmem, HImode, srcptr);
20102 destmem = change_address (destmem, HImode, destptr);
20103 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20104 ix86_adjust_counter (count, 2);
20105 emit_label (label);
20106 LABEL_NUSES (label) = 1;
20108 if (align <= 4 && desired_alignment > 4)
20110 rtx label = ix86_expand_aligntest (destptr, 4, false);
20111 srcmem = change_address (srcmem, SImode, srcptr);
20112 destmem = change_address (destmem, SImode, destptr);
20113 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
20114 ix86_adjust_counter (count, 4);
20115 emit_label (label);
20116 LABEL_NUSES (label) = 1;
20118 gcc_assert (desired_alignment <= 8);
20121 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
20122 ALIGN_BYTES is how many bytes need to be copied. */
20124 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
20125 int desired_align, int align_bytes)
20128 rtx src_size, dst_size;
20130 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
20131 if (src_align_bytes >= 0)
20132 src_align_bytes = desired_align - src_align_bytes;
20133 src_size = MEM_SIZE (src);
20134 dst_size = MEM_SIZE (dst);
20135 if (align_bytes & 1)
20137 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20138 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
20140 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20142 if (align_bytes & 2)
20144 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20145 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
20146 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20147 set_mem_align (dst, 2 * BITS_PER_UNIT);
20148 if (src_align_bytes >= 0
20149 && (src_align_bytes & 1) == (align_bytes & 1)
20150 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
20151 set_mem_align (src, 2 * BITS_PER_UNIT);
20153 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20155 if (align_bytes & 4)
20157 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20158 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
20159 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20160 set_mem_align (dst, 4 * BITS_PER_UNIT);
20161 if (src_align_bytes >= 0)
20163 unsigned int src_align = 0;
20164 if ((src_align_bytes & 3) == (align_bytes & 3))
20166 else if ((src_align_bytes & 1) == (align_bytes & 1))
20168 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20169 set_mem_align (src, src_align * BITS_PER_UNIT);
20172 emit_insn (gen_strmov (destreg, dst, srcreg, src));
20174 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20175 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
20176 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20177 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20178 if (src_align_bytes >= 0)
20180 unsigned int src_align = 0;
20181 if ((src_align_bytes & 7) == (align_bytes & 7))
20183 else if ((src_align_bytes & 3) == (align_bytes & 3))
20185 else if ((src_align_bytes & 1) == (align_bytes & 1))
20187 if (src_align > (unsigned int) desired_align)
20188 src_align = desired_align;
20189 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
20190 set_mem_align (src, src_align * BITS_PER_UNIT);
20193 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20195 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
20200 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
20201 DESIRED_ALIGNMENT. */
20203 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
20204 int align, int desired_alignment)
20206 if (align <= 1 && desired_alignment > 1)
20208 rtx label = ix86_expand_aligntest (destptr, 1, false);
20209 destmem = change_address (destmem, QImode, destptr);
20210 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
20211 ix86_adjust_counter (count, 1);
20212 emit_label (label);
20213 LABEL_NUSES (label) = 1;
20215 if (align <= 2 && desired_alignment > 2)
20217 rtx label = ix86_expand_aligntest (destptr, 2, false);
20218 destmem = change_address (destmem, HImode, destptr);
20219 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
20220 ix86_adjust_counter (count, 2);
20221 emit_label (label);
20222 LABEL_NUSES (label) = 1;
20224 if (align <= 4 && desired_alignment > 4)
20226 rtx label = ix86_expand_aligntest (destptr, 4, false);
20227 destmem = change_address (destmem, SImode, destptr);
20228 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
20229 ix86_adjust_counter (count, 4);
20230 emit_label (label);
20231 LABEL_NUSES (label) = 1;
20233 gcc_assert (desired_alignment <= 8);
20236 /* Set enough from DST to align DST known to by aligned by ALIGN to
20237 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
20239 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
20240 int desired_align, int align_bytes)
20243 rtx dst_size = MEM_SIZE (dst);
20244 if (align_bytes & 1)
20246 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
20248 emit_insn (gen_strset (destreg, dst,
20249 gen_lowpart (QImode, value)));
20251 if (align_bytes & 2)
20253 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
20254 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
20255 set_mem_align (dst, 2 * BITS_PER_UNIT);
20257 emit_insn (gen_strset (destreg, dst,
20258 gen_lowpart (HImode, value)));
20260 if (align_bytes & 4)
20262 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
20263 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
20264 set_mem_align (dst, 4 * BITS_PER_UNIT);
20266 emit_insn (gen_strset (destreg, dst,
20267 gen_lowpart (SImode, value)));
20269 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
20270 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
20271 set_mem_align (dst, desired_align * BITS_PER_UNIT);
20273 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
20277 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
20278 static enum stringop_alg
20279 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
20280 int *dynamic_check)
20282 const struct stringop_algs * algs;
20283 bool optimize_for_speed;
20284 /* Algorithms using the rep prefix want at least edi and ecx;
20285 additionally, memset wants eax and memcpy wants esi. Don't
20286 consider such algorithms if the user has appropriated those
20287 registers for their own purposes. */
20288 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
20290 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
20292 #define ALG_USABLE_P(alg) (rep_prefix_usable \
20293 || (alg != rep_prefix_1_byte \
20294 && alg != rep_prefix_4_byte \
20295 && alg != rep_prefix_8_byte))
20296 const struct processor_costs *cost;
20298 /* Even if the string operation call is cold, we still might spend a lot
20299 of time processing large blocks. */
20300 if (optimize_function_for_size_p (cfun)
20301 || (optimize_insn_for_size_p ()
20302 && expected_size != -1 && expected_size < 256))
20303 optimize_for_speed = false;
20305 optimize_for_speed = true;
20307 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
20309 *dynamic_check = -1;
20311 algs = &cost->memset[TARGET_64BIT != 0];
20313 algs = &cost->memcpy[TARGET_64BIT != 0];
20314 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
20315 return stringop_alg;
20316 /* rep; movq or rep; movl is the smallest variant. */
20317 else if (!optimize_for_speed)
20319 if (!count || (count & 3))
20320 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
20322 return rep_prefix_usable ? rep_prefix_4_byte : loop;
20324 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
20326 else if (expected_size != -1 && expected_size < 4)
20327 return loop_1_byte;
20328 else if (expected_size != -1)
20331 enum stringop_alg alg = libcall;
20332 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20334 /* We get here if the algorithms that were not libcall-based
20335 were rep-prefix based and we are unable to use rep prefixes
20336 based on global register usage. Break out of the loop and
20337 use the heuristic below. */
20338 if (algs->size[i].max == 0)
20340 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
20342 enum stringop_alg candidate = algs->size[i].alg;
20344 if (candidate != libcall && ALG_USABLE_P (candidate))
20346 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
20347 last non-libcall inline algorithm. */
20348 if (TARGET_INLINE_ALL_STRINGOPS)
20350 /* When the current size is best to be copied by a libcall,
20351 but we are still forced to inline, run the heuristic below
20352 that will pick code for medium sized blocks. */
20353 if (alg != libcall)
20357 else if (ALG_USABLE_P (candidate))
20361 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
20363 /* When asked to inline the call anyway, try to pick meaningful choice.
20364 We look for maximal size of block that is faster to copy by hand and
20365 take blocks of at most of that size guessing that average size will
20366 be roughly half of the block.
20368 If this turns out to be bad, we might simply specify the preferred
20369 choice in ix86_costs. */
20370 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20371 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
20374 enum stringop_alg alg;
20376 bool any_alg_usable_p = true;
20378 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
20380 enum stringop_alg candidate = algs->size[i].alg;
20381 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
20383 if (candidate != libcall && candidate
20384 && ALG_USABLE_P (candidate))
20385 max = algs->size[i].max;
20387 /* If there aren't any usable algorithms, then recursing on
20388 smaller sizes isn't going to find anything. Just return the
20389 simple byte-at-a-time copy loop. */
20390 if (!any_alg_usable_p)
20392 /* Pick something reasonable. */
20393 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20394 *dynamic_check = 128;
20395 return loop_1_byte;
20399 alg = decide_alg (count, max / 2, memset, dynamic_check);
20400 gcc_assert (*dynamic_check == -1);
20401 gcc_assert (alg != libcall);
20402 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
20403 *dynamic_check = max;
20406 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
20407 #undef ALG_USABLE_P
20410 /* Decide on alignment. We know that the operand is already aligned to ALIGN
20411 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
20413 decide_alignment (int align,
20414 enum stringop_alg alg,
20417 int desired_align = 0;
20421 gcc_unreachable ();
20423 case unrolled_loop:
20424 desired_align = GET_MODE_SIZE (Pmode);
20426 case rep_prefix_8_byte:
20429 case rep_prefix_4_byte:
20430 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20431 copying whole cacheline at once. */
20432 if (TARGET_PENTIUMPRO)
20437 case rep_prefix_1_byte:
20438 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
20439 copying whole cacheline at once. */
20440 if (TARGET_PENTIUMPRO)
20454 if (desired_align < align)
20455 desired_align = align;
20456 if (expected_size != -1 && expected_size < 4)
20457 desired_align = align;
20458 return desired_align;
20461 /* Return the smallest power of 2 greater than VAL. */
20463 smallest_pow2_greater_than (int val)
20471 /* Expand string move (memcpy) operation. Use i386 string operations when
20472 profitable. expand_setmem contains similar code. The code depends upon
20473 architecture, block size and alignment, but always has the same
20476 1) Prologue guard: Conditional that jumps up to epilogues for small
20477 blocks that can be handled by epilogue alone. This is faster but
20478 also needed for correctness, since prologue assume the block is larger
20479 than the desired alignment.
20481 Optional dynamic check for size and libcall for large
20482 blocks is emitted here too, with -minline-stringops-dynamically.
20484 2) Prologue: copy first few bytes in order to get destination aligned
20485 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
20486 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
20487 We emit either a jump tree on power of two sized blocks, or a byte loop.
20489 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
20490 with specified algorithm.
20492 4) Epilogue: code copying tail of the block that is too small to be
20493 handled by main body (or up to size guarded by prologue guard). */
20496 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
20497 rtx expected_align_exp, rtx expected_size_exp)
20503 rtx jump_around_label = NULL;
20504 HOST_WIDE_INT align = 1;
20505 unsigned HOST_WIDE_INT count = 0;
20506 HOST_WIDE_INT expected_size = -1;
20507 int size_needed = 0, epilogue_size_needed;
20508 int desired_align = 0, align_bytes = 0;
20509 enum stringop_alg alg;
20511 bool need_zero_guard = false;
20513 if (CONST_INT_P (align_exp))
20514 align = INTVAL (align_exp);
20515 /* i386 can do misaligned access on reasonably increased cost. */
20516 if (CONST_INT_P (expected_align_exp)
20517 && INTVAL (expected_align_exp) > align)
20518 align = INTVAL (expected_align_exp);
20519 /* ALIGN is the minimum of destination and source alignment, but we care here
20520 just about destination alignment. */
20521 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
20522 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
20524 if (CONST_INT_P (count_exp))
20525 count = expected_size = INTVAL (count_exp);
20526 if (CONST_INT_P (expected_size_exp) && count == 0)
20527 expected_size = INTVAL (expected_size_exp);
20529 /* Make sure we don't need to care about overflow later on. */
20530 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20533 /* Step 0: Decide on preferred algorithm, desired alignment and
20534 size of chunks to be copied by main loop. */
20536 alg = decide_alg (count, expected_size, false, &dynamic_check);
20537 desired_align = decide_alignment (align, alg, expected_size);
20539 if (!TARGET_ALIGN_STRINGOPS)
20540 align = desired_align;
20542 if (alg == libcall)
20544 gcc_assert (alg != no_stringop);
20546 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
20547 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20548 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
20553 gcc_unreachable ();
20555 need_zero_guard = true;
20556 size_needed = GET_MODE_SIZE (Pmode);
20558 case unrolled_loop:
20559 need_zero_guard = true;
20560 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
20562 case rep_prefix_8_byte:
20565 case rep_prefix_4_byte:
20568 case rep_prefix_1_byte:
20572 need_zero_guard = true;
20577 epilogue_size_needed = size_needed;
20579 /* Step 1: Prologue guard. */
20581 /* Alignment code needs count to be in register. */
20582 if (CONST_INT_P (count_exp) && desired_align > align)
20584 if (INTVAL (count_exp) > desired_align
20585 && INTVAL (count_exp) > size_needed)
20588 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20589 if (align_bytes <= 0)
20592 align_bytes = desired_align - align_bytes;
20594 if (align_bytes == 0)
20595 count_exp = force_reg (counter_mode (count_exp), count_exp);
20597 gcc_assert (desired_align >= 1 && align >= 1);
20599 /* Ensure that alignment prologue won't copy past end of block. */
20600 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20602 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20603 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
20604 Make sure it is power of 2. */
20605 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20609 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20611 /* If main algorithm works on QImode, no epilogue is needed.
20612 For small sizes just don't align anything. */
20613 if (size_needed == 1)
20614 desired_align = align;
20621 label = gen_label_rtx ();
20622 emit_cmp_and_jump_insns (count_exp,
20623 GEN_INT (epilogue_size_needed),
20624 LTU, 0, counter_mode (count_exp), 1, label);
20625 if (expected_size == -1 || expected_size < epilogue_size_needed)
20626 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20628 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20632 /* Emit code to decide on runtime whether library call or inline should be
20634 if (dynamic_check != -1)
20636 if (CONST_INT_P (count_exp))
20638 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20640 emit_block_move_via_libcall (dst, src, count_exp, false);
20641 count_exp = const0_rtx;
20647 rtx hot_label = gen_label_rtx ();
20648 jump_around_label = gen_label_rtx ();
20649 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20650 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20651 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20652 emit_block_move_via_libcall (dst, src, count_exp, false);
20653 emit_jump (jump_around_label);
20654 emit_label (hot_label);
20658 /* Step 2: Alignment prologue. */
20660 if (desired_align > align)
20662 if (align_bytes == 0)
20664 /* Except for the first move in epilogue, we no longer know
20665 constant offset in aliasing info. It don't seems to worth
20666 the pain to maintain it for the first move, so throw away
20668 src = change_address (src, BLKmode, srcreg);
20669 dst = change_address (dst, BLKmode, destreg);
20670 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20675 /* If we know how many bytes need to be stored before dst is
20676 sufficiently aligned, maintain aliasing info accurately. */
20677 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20678 desired_align, align_bytes);
20679 count_exp = plus_constant (count_exp, -align_bytes);
20680 count -= align_bytes;
20682 if (need_zero_guard
20683 && (count < (unsigned HOST_WIDE_INT) size_needed
20684 || (align_bytes == 0
20685 && count < ((unsigned HOST_WIDE_INT) size_needed
20686 + desired_align - align))))
20688 /* It is possible that we copied enough so the main loop will not
20690 gcc_assert (size_needed > 1);
20691 if (label == NULL_RTX)
20692 label = gen_label_rtx ();
20693 emit_cmp_and_jump_insns (count_exp,
20694 GEN_INT (size_needed),
20695 LTU, 0, counter_mode (count_exp), 1, label);
20696 if (expected_size == -1
20697 || expected_size < (desired_align - align) / 2 + size_needed)
20698 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20700 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20703 if (label && size_needed == 1)
20705 emit_label (label);
20706 LABEL_NUSES (label) = 1;
20708 epilogue_size_needed = 1;
20710 else if (label == NULL_RTX)
20711 epilogue_size_needed = size_needed;
20713 /* Step 3: Main loop. */
20719 gcc_unreachable ();
20721 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20722 count_exp, QImode, 1, expected_size);
20725 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20726 count_exp, Pmode, 1, expected_size);
20728 case unrolled_loop:
20729 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20730 registers for 4 temporaries anyway. */
20731 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20732 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20735 case rep_prefix_8_byte:
20736 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20739 case rep_prefix_4_byte:
20740 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20743 case rep_prefix_1_byte:
20744 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20748 /* Adjust properly the offset of src and dest memory for aliasing. */
20749 if (CONST_INT_P (count_exp))
20751 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20752 (count / size_needed) * size_needed);
20753 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20754 (count / size_needed) * size_needed);
20758 src = change_address (src, BLKmode, srcreg);
20759 dst = change_address (dst, BLKmode, destreg);
20762 /* Step 4: Epilogue to copy the remaining bytes. */
20766 /* When the main loop is done, COUNT_EXP might hold original count,
20767 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20768 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20769 bytes. Compensate if needed. */
20771 if (size_needed < epilogue_size_needed)
20774 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20775 GEN_INT (size_needed - 1), count_exp, 1,
20777 if (tmp != count_exp)
20778 emit_move_insn (count_exp, tmp);
20780 emit_label (label);
20781 LABEL_NUSES (label) = 1;
20784 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20785 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20786 epilogue_size_needed);
20787 if (jump_around_label)
20788 emit_label (jump_around_label);
20792 /* Helper function for memcpy. For QImode value 0xXY produce
20793 0xXYXYXYXY of wide specified by MODE. This is essentially
20794 a * 0x10101010, but we can do slightly better than
20795 synth_mult by unwinding the sequence by hand on CPUs with
20798 promote_duplicated_reg (enum machine_mode mode, rtx val)
20800 enum machine_mode valmode = GET_MODE (val);
20802 int nops = mode == DImode ? 3 : 2;
20804 gcc_assert (mode == SImode || mode == DImode);
20805 if (val == const0_rtx)
20806 return copy_to_mode_reg (mode, const0_rtx);
20807 if (CONST_INT_P (val))
20809 HOST_WIDE_INT v = INTVAL (val) & 255;
20813 if (mode == DImode)
20814 v |= (v << 16) << 16;
20815 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20818 if (valmode == VOIDmode)
20820 if (valmode != QImode)
20821 val = gen_lowpart (QImode, val);
20822 if (mode == QImode)
20824 if (!TARGET_PARTIAL_REG_STALL)
20826 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20827 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20828 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20829 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20831 rtx reg = convert_modes (mode, QImode, val, true);
20832 tmp = promote_duplicated_reg (mode, const1_rtx);
20833 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20838 rtx reg = convert_modes (mode, QImode, val, true);
20840 if (!TARGET_PARTIAL_REG_STALL)
20841 if (mode == SImode)
20842 emit_insn (gen_movsi_insv_1 (reg, reg));
20844 emit_insn (gen_movdi_insv_1 (reg, reg));
20847 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20848 NULL, 1, OPTAB_DIRECT);
20850 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20852 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20853 NULL, 1, OPTAB_DIRECT);
20854 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20855 if (mode == SImode)
20857 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20858 NULL, 1, OPTAB_DIRECT);
20859 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20864 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20865 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20866 alignment from ALIGN to DESIRED_ALIGN. */
20868 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20873 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20874 promoted_val = promote_duplicated_reg (DImode, val);
20875 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20876 promoted_val = promote_duplicated_reg (SImode, val);
20877 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20878 promoted_val = promote_duplicated_reg (HImode, val);
20880 promoted_val = val;
20882 return promoted_val;
20885 /* Expand string clear operation (bzero). Use i386 string operations when
20886 profitable. See expand_movmem comment for explanation of individual
20887 steps performed. */
20889 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20890 rtx expected_align_exp, rtx expected_size_exp)
20895 rtx jump_around_label = NULL;
20896 HOST_WIDE_INT align = 1;
20897 unsigned HOST_WIDE_INT count = 0;
20898 HOST_WIDE_INT expected_size = -1;
20899 int size_needed = 0, epilogue_size_needed;
20900 int desired_align = 0, align_bytes = 0;
20901 enum stringop_alg alg;
20902 rtx promoted_val = NULL;
20903 bool force_loopy_epilogue = false;
20905 bool need_zero_guard = false;
20907 if (CONST_INT_P (align_exp))
20908 align = INTVAL (align_exp);
20909 /* i386 can do misaligned access on reasonably increased cost. */
20910 if (CONST_INT_P (expected_align_exp)
20911 && INTVAL (expected_align_exp) > align)
20912 align = INTVAL (expected_align_exp);
20913 if (CONST_INT_P (count_exp))
20914 count = expected_size = INTVAL (count_exp);
20915 if (CONST_INT_P (expected_size_exp) && count == 0)
20916 expected_size = INTVAL (expected_size_exp);
20918 /* Make sure we don't need to care about overflow later on. */
20919 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20922 /* Step 0: Decide on preferred algorithm, desired alignment and
20923 size of chunks to be copied by main loop. */
20925 alg = decide_alg (count, expected_size, true, &dynamic_check);
20926 desired_align = decide_alignment (align, alg, expected_size);
20928 if (!TARGET_ALIGN_STRINGOPS)
20929 align = desired_align;
20931 if (alg == libcall)
20933 gcc_assert (alg != no_stringop);
20935 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20936 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20941 gcc_unreachable ();
20943 need_zero_guard = true;
20944 size_needed = GET_MODE_SIZE (Pmode);
20946 case unrolled_loop:
20947 need_zero_guard = true;
20948 size_needed = GET_MODE_SIZE (Pmode) * 4;
20950 case rep_prefix_8_byte:
20953 case rep_prefix_4_byte:
20956 case rep_prefix_1_byte:
20960 need_zero_guard = true;
20964 epilogue_size_needed = size_needed;
20966 /* Step 1: Prologue guard. */
20968 /* Alignment code needs count to be in register. */
20969 if (CONST_INT_P (count_exp) && desired_align > align)
20971 if (INTVAL (count_exp) > desired_align
20972 && INTVAL (count_exp) > size_needed)
20975 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20976 if (align_bytes <= 0)
20979 align_bytes = desired_align - align_bytes;
20981 if (align_bytes == 0)
20983 enum machine_mode mode = SImode;
20984 if (TARGET_64BIT && (count & ~0xffffffff))
20986 count_exp = force_reg (mode, count_exp);
20989 /* Do the cheap promotion to allow better CSE across the
20990 main loop and epilogue (ie one load of the big constant in the
20991 front of all code. */
20992 if (CONST_INT_P (val_exp))
20993 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20994 desired_align, align);
20995 /* Ensure that alignment prologue won't copy past end of block. */
20996 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20998 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20999 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
21000 Make sure it is power of 2. */
21001 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
21003 /* To improve performance of small blocks, we jump around the VAL
21004 promoting mode. This mean that if the promoted VAL is not constant,
21005 we might not use it in the epilogue and have to use byte
21007 if (epilogue_size_needed > 2 && !promoted_val)
21008 force_loopy_epilogue = true;
21011 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
21013 /* If main algorithm works on QImode, no epilogue is needed.
21014 For small sizes just don't align anything. */
21015 if (size_needed == 1)
21016 desired_align = align;
21023 label = gen_label_rtx ();
21024 emit_cmp_and_jump_insns (count_exp,
21025 GEN_INT (epilogue_size_needed),
21026 LTU, 0, counter_mode (count_exp), 1, label);
21027 if (expected_size == -1 || expected_size <= epilogue_size_needed)
21028 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21030 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21033 if (dynamic_check != -1)
21035 rtx hot_label = gen_label_rtx ();
21036 jump_around_label = gen_label_rtx ();
21037 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
21038 LEU, 0, counter_mode (count_exp), 1, hot_label);
21039 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21040 set_storage_via_libcall (dst, count_exp, val_exp, false);
21041 emit_jump (jump_around_label);
21042 emit_label (hot_label);
21045 /* Step 2: Alignment prologue. */
21047 /* Do the expensive promotion once we branched off the small blocks. */
21049 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
21050 desired_align, align);
21051 gcc_assert (desired_align >= 1 && align >= 1);
21053 if (desired_align > align)
21055 if (align_bytes == 0)
21057 /* Except for the first move in epilogue, we no longer know
21058 constant offset in aliasing info. It don't seems to worth
21059 the pain to maintain it for the first move, so throw away
21061 dst = change_address (dst, BLKmode, destreg);
21062 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
21067 /* If we know how many bytes need to be stored before dst is
21068 sufficiently aligned, maintain aliasing info accurately. */
21069 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
21070 desired_align, align_bytes);
21071 count_exp = plus_constant (count_exp, -align_bytes);
21072 count -= align_bytes;
21074 if (need_zero_guard
21075 && (count < (unsigned HOST_WIDE_INT) size_needed
21076 || (align_bytes == 0
21077 && count < ((unsigned HOST_WIDE_INT) size_needed
21078 + desired_align - align))))
21080 /* It is possible that we copied enough so the main loop will not
21082 gcc_assert (size_needed > 1);
21083 if (label == NULL_RTX)
21084 label = gen_label_rtx ();
21085 emit_cmp_and_jump_insns (count_exp,
21086 GEN_INT (size_needed),
21087 LTU, 0, counter_mode (count_exp), 1, label);
21088 if (expected_size == -1
21089 || expected_size < (desired_align - align) / 2 + size_needed)
21090 predict_jump (REG_BR_PROB_BASE * 20 / 100);
21092 predict_jump (REG_BR_PROB_BASE * 60 / 100);
21095 if (label && size_needed == 1)
21097 emit_label (label);
21098 LABEL_NUSES (label) = 1;
21100 promoted_val = val_exp;
21101 epilogue_size_needed = 1;
21103 else if (label == NULL_RTX)
21104 epilogue_size_needed = size_needed;
21106 /* Step 3: Main loop. */
21112 gcc_unreachable ();
21114 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21115 count_exp, QImode, 1, expected_size);
21118 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21119 count_exp, Pmode, 1, expected_size);
21121 case unrolled_loop:
21122 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
21123 count_exp, Pmode, 4, expected_size);
21125 case rep_prefix_8_byte:
21126 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21129 case rep_prefix_4_byte:
21130 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21133 case rep_prefix_1_byte:
21134 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
21138 /* Adjust properly the offset of src and dest memory for aliasing. */
21139 if (CONST_INT_P (count_exp))
21140 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
21141 (count / size_needed) * size_needed);
21143 dst = change_address (dst, BLKmode, destreg);
21145 /* Step 4: Epilogue to copy the remaining bytes. */
21149 /* When the main loop is done, COUNT_EXP might hold original count,
21150 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
21151 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
21152 bytes. Compensate if needed. */
21154 if (size_needed < epilogue_size_needed)
21157 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
21158 GEN_INT (size_needed - 1), count_exp, 1,
21160 if (tmp != count_exp)
21161 emit_move_insn (count_exp, tmp);
21163 emit_label (label);
21164 LABEL_NUSES (label) = 1;
21167 if (count_exp != const0_rtx && epilogue_size_needed > 1)
21169 if (force_loopy_epilogue)
21170 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
21171 epilogue_size_needed);
21173 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
21174 epilogue_size_needed);
21176 if (jump_around_label)
21177 emit_label (jump_around_label);
21181 /* Expand the appropriate insns for doing strlen if not just doing
21184 out = result, initialized with the start address
21185 align_rtx = alignment of the address.
21186 scratch = scratch register, initialized with the startaddress when
21187 not aligned, otherwise undefined
21189 This is just the body. It needs the initializations mentioned above and
21190 some address computing at the end. These things are done in i386.md. */
21193 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
21197 rtx align_2_label = NULL_RTX;
21198 rtx align_3_label = NULL_RTX;
21199 rtx align_4_label = gen_label_rtx ();
21200 rtx end_0_label = gen_label_rtx ();
21202 rtx tmpreg = gen_reg_rtx (SImode);
21203 rtx scratch = gen_reg_rtx (SImode);
21207 if (CONST_INT_P (align_rtx))
21208 align = INTVAL (align_rtx);
21210 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
21212 /* Is there a known alignment and is it less than 4? */
21215 rtx scratch1 = gen_reg_rtx (Pmode);
21216 emit_move_insn (scratch1, out);
21217 /* Is there a known alignment and is it not 2? */
21220 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
21221 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
21223 /* Leave just the 3 lower bits. */
21224 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
21225 NULL_RTX, 0, OPTAB_WIDEN);
21227 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21228 Pmode, 1, align_4_label);
21229 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
21230 Pmode, 1, align_2_label);
21231 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
21232 Pmode, 1, align_3_label);
21236 /* Since the alignment is 2, we have to check 2 or 0 bytes;
21237 check if is aligned to 4 - byte. */
21239 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
21240 NULL_RTX, 0, OPTAB_WIDEN);
21242 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
21243 Pmode, 1, align_4_label);
21246 mem = change_address (src, QImode, out);
21248 /* Now compare the bytes. */
21250 /* Compare the first n unaligned byte on a byte per byte basis. */
21251 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
21252 QImode, 1, end_0_label);
21254 /* Increment the address. */
21255 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21257 /* Not needed with an alignment of 2 */
21260 emit_label (align_2_label);
21262 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21265 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21267 emit_label (align_3_label);
21270 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
21273 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
21276 /* Generate loop to check 4 bytes at a time. It is not a good idea to
21277 align this loop. It gives only huge programs, but does not help to
21279 emit_label (align_4_label);
21281 mem = change_address (src, SImode, out);
21282 emit_move_insn (scratch, mem);
21283 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
21285 /* This formula yields a nonzero result iff one of the bytes is zero.
21286 This saves three branches inside loop and many cycles. */
21288 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
21289 emit_insn (gen_one_cmplsi2 (scratch, scratch));
21290 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
21291 emit_insn (gen_andsi3 (tmpreg, tmpreg,
21292 gen_int_mode (0x80808080, SImode)));
21293 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
21298 rtx reg = gen_reg_rtx (SImode);
21299 rtx reg2 = gen_reg_rtx (Pmode);
21300 emit_move_insn (reg, tmpreg);
21301 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
21303 /* If zero is not in the first two bytes, move two bytes forward. */
21304 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21305 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21306 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21307 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
21308 gen_rtx_IF_THEN_ELSE (SImode, tmp,
21311 /* Emit lea manually to avoid clobbering of flags. */
21312 emit_insn (gen_rtx_SET (SImode, reg2,
21313 gen_rtx_PLUS (Pmode, out, const2_rtx)));
21315 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21316 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
21317 emit_insn (gen_rtx_SET (VOIDmode, out,
21318 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
21324 rtx end_2_label = gen_label_rtx ();
21325 /* Is zero in the first two bytes? */
21327 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
21328 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21329 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
21330 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21331 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
21333 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21334 JUMP_LABEL (tmp) = end_2_label;
21336 /* Not in the first two. Move two bytes forward. */
21337 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
21338 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
21340 emit_label (end_2_label);
21344 /* Avoid branch in fixing the byte. */
21345 tmpreg = gen_lowpart (QImode, tmpreg);
21346 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
21347 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
21348 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
21349 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
21351 emit_label (end_0_label);
21354 /* Expand strlen. */
21357 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
21359 rtx addr, scratch1, scratch2, scratch3, scratch4;
21361 /* The generic case of strlen expander is long. Avoid it's
21362 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
21364 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21365 && !TARGET_INLINE_ALL_STRINGOPS
21366 && !optimize_insn_for_size_p ()
21367 && (!CONST_INT_P (align) || INTVAL (align) < 4))
21370 addr = force_reg (Pmode, XEXP (src, 0));
21371 scratch1 = gen_reg_rtx (Pmode);
21373 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
21374 && !optimize_insn_for_size_p ())
21376 /* Well it seems that some optimizer does not combine a call like
21377 foo(strlen(bar), strlen(bar));
21378 when the move and the subtraction is done here. It does calculate
21379 the length just once when these instructions are done inside of
21380 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
21381 often used and I use one fewer register for the lifetime of
21382 output_strlen_unroll() this is better. */
21384 emit_move_insn (out, addr);
21386 ix86_expand_strlensi_unroll_1 (out, src, align);
21388 /* strlensi_unroll_1 returns the address of the zero at the end of
21389 the string, like memchr(), so compute the length by subtracting
21390 the start address. */
21391 emit_insn (ix86_gen_sub3 (out, out, addr));
21397 /* Can't use this if the user has appropriated eax, ecx, or edi. */
21398 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
21401 scratch2 = gen_reg_rtx (Pmode);
21402 scratch3 = gen_reg_rtx (Pmode);
21403 scratch4 = force_reg (Pmode, constm1_rtx);
21405 emit_move_insn (scratch3, addr);
21406 eoschar = force_reg (QImode, eoschar);
21408 src = replace_equiv_address_nv (src, scratch3);
21410 /* If .md starts supporting :P, this can be done in .md. */
21411 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
21412 scratch4), UNSPEC_SCAS);
21413 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
21414 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
21415 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
21420 /* For given symbol (function) construct code to compute address of it's PLT
21421 entry in large x86-64 PIC model. */
21423 construct_plt_address (rtx symbol)
21425 rtx tmp = gen_reg_rtx (Pmode);
21426 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
21428 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
21429 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
21431 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
21432 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
21437 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
21439 rtx pop, int sibcall)
21441 rtx use = NULL, call;
21443 if (pop == const0_rtx)
21445 gcc_assert (!TARGET_64BIT || !pop);
21447 if (TARGET_MACHO && !TARGET_64BIT)
21450 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
21451 fnaddr = machopic_indirect_call_target (fnaddr);
21456 /* Static functions and indirect calls don't need the pic register. */
21457 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
21458 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21459 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
21460 use_reg (&use, pic_offset_table_rtx);
21463 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
21465 rtx al = gen_rtx_REG (QImode, AX_REG);
21466 emit_move_insn (al, callarg2);
21467 use_reg (&use, al);
21470 if (ix86_cmodel == CM_LARGE_PIC
21472 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
21473 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
21474 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
21476 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
21477 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
21479 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
21480 fnaddr = gen_rtx_MEM (QImode, fnaddr);
21483 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
21485 call = gen_rtx_SET (VOIDmode, retval, call);
21488 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
21489 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
21490 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
21493 && ix86_cfun_abi () == MS_ABI
21494 && (!callarg2 || INTVAL (callarg2) != -2))
21496 /* We need to represent that SI and DI registers are clobbered
21498 static int clobbered_registers[] = {
21499 XMM6_REG, XMM7_REG, XMM8_REG,
21500 XMM9_REG, XMM10_REG, XMM11_REG,
21501 XMM12_REG, XMM13_REG, XMM14_REG,
21502 XMM15_REG, SI_REG, DI_REG
21505 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
21506 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
21507 UNSPEC_MS_TO_SYSV_CALL);
21511 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
21512 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
21515 (SSE_REGNO_P (clobbered_registers[i])
21517 clobbered_registers[i]));
21519 call = gen_rtx_PARALLEL (VOIDmode,
21520 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
21524 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
21525 if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
21530 cfun->machine->use_vzeroupper_p = 1;
21531 if (cfun->machine->callee_pass_avx256_p)
21533 if (cfun->machine->callee_return_avx256_p)
21534 avx256 = callee_return_pass_avx256;
21536 avx256 = callee_pass_avx256;
21538 else if (cfun->machine->callee_return_avx256_p)
21539 avx256 = callee_return_avx256;
21541 avx256 = call_no_avx256;
21543 unspec = gen_rtx_UNSPEC (VOIDmode,
21544 gen_rtvec (1, GEN_INT (avx256)),
21545 UNSPEC_CALL_NEEDS_VZEROUPPER);
21546 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, unspec));
21549 call = emit_call_insn (call);
21551 CALL_INSN_FUNCTION_USAGE (call) = use;
21557 ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
21559 rtx call = XVECEXP (PATTERN (insn), 0, 0);
21560 emit_insn (gen_avx_vzeroupper (vzeroupper));
21561 emit_call_insn (call);
21564 /* Output the assembly for a call instruction. */
21567 ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
21569 bool direct_p = constant_call_address_operand (call_op, Pmode);
21570 bool seh_nop_p = false;
21572 gcc_assert (addr_op == 0 || addr_op == 1);
21574 if (SIBLING_CALL_P (insn))
21577 return addr_op ? "jmp\t%P1" : "jmp\t%P0";
21578 /* SEH epilogue detection requires the indirect branch case
21579 to include REX.W. */
21580 else if (TARGET_SEH)
21581 return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
21583 return addr_op ? "jmp\t%A1" : "jmp\t%A0";
21586 /* SEH unwinding can require an extra nop to be emitted in several
21587 circumstances. Determine if we have one of those. */
21592 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
21594 /* If we get to another real insn, we don't need the nop. */
21598 /* If we get to the epilogue note, prevent a catch region from
21599 being adjacent to the standard epilogue sequence. If non-
21600 call-exceptions, we'll have done this during epilogue emission. */
21601 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
21602 && !flag_non_call_exceptions
21603 && !can_throw_internal (insn))
21610 /* If we didn't find a real insn following the call, prevent the
21611 unwinder from looking into the next function. */
21619 return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
21621 return addr_op ? "call\t%P1" : "call\t%P0";
21626 return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
21628 return addr_op ? "call\t%A1" : "call\t%A0";
21632 /* Clear stack slot assignments remembered from previous functions.
21633 This is called from INIT_EXPANDERS once before RTL is emitted for each
21636 static struct machine_function *
21637 ix86_init_machine_status (void)
21639 struct machine_function *f;
21641 f = ggc_alloc_cleared_machine_function ();
21642 f->use_fast_prologue_epilogue_nregs = -1;
21643 f->tls_descriptor_call_expanded_p = 0;
21644 f->call_abi = ix86_abi;
21649 /* Return a MEM corresponding to a stack slot with mode MODE.
21650 Allocate a new slot if necessary.
21652 The RTL for a function can have several slots available: N is
21653 which slot to use. */
21656 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
21658 struct stack_local_entry *s;
21660 gcc_assert (n < MAX_386_STACK_LOCALS);
21662 /* Virtual slot is valid only before vregs are instantiated. */
21663 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
21665 for (s = ix86_stack_locals; s; s = s->next)
21666 if (s->mode == mode && s->n == n)
21667 return copy_rtx (s->rtl);
21669 s = ggc_alloc_stack_local_entry ();
21672 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
21674 s->next = ix86_stack_locals;
21675 ix86_stack_locals = s;
21679 /* Construct the SYMBOL_REF for the tls_get_addr function. */
21681 static GTY(()) rtx ix86_tls_symbol;
21683 ix86_tls_get_addr (void)
21686 if (!ix86_tls_symbol)
21688 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
21689 (TARGET_ANY_GNU_TLS
21691 ? "___tls_get_addr"
21692 : "__tls_get_addr");
21695 return ix86_tls_symbol;
21698 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
21700 static GTY(()) rtx ix86_tls_module_base_symbol;
21702 ix86_tls_module_base (void)
21705 if (!ix86_tls_module_base_symbol)
21707 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
21708 "_TLS_MODULE_BASE_");
21709 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
21710 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
21713 return ix86_tls_module_base_symbol;
21716 /* Calculate the length of the memory address in the instruction
21717 encoding. Does not include the one-byte modrm, opcode, or prefix. */
21720 memory_address_length (rtx addr)
21722 struct ix86_address parts;
21723 rtx base, index, disp;
21727 if (GET_CODE (addr) == PRE_DEC
21728 || GET_CODE (addr) == POST_INC
21729 || GET_CODE (addr) == PRE_MODIFY
21730 || GET_CODE (addr) == POST_MODIFY)
21733 ok = ix86_decompose_address (addr, &parts);
21736 if (parts.base && GET_CODE (parts.base) == SUBREG)
21737 parts.base = SUBREG_REG (parts.base);
21738 if (parts.index && GET_CODE (parts.index) == SUBREG)
21739 parts.index = SUBREG_REG (parts.index);
21742 index = parts.index;
21747 - esp as the base always wants an index,
21748 - ebp as the base always wants a displacement,
21749 - r12 as the base always wants an index,
21750 - r13 as the base always wants a displacement. */
21752 /* Register Indirect. */
21753 if (base && !index && !disp)
21755 /* esp (for its index) and ebp (for its displacement) need
21756 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21759 && (addr == arg_pointer_rtx
21760 || addr == frame_pointer_rtx
21761 || REGNO (addr) == SP_REG
21762 || REGNO (addr) == BP_REG
21763 || REGNO (addr) == R12_REG
21764 || REGNO (addr) == R13_REG))
21768 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21769 is not disp32, but disp32(%rip), so for disp32
21770 SIB byte is needed, unless print_operand_address
21771 optimizes it into disp32(%rip) or (%rip) is implied
21773 else if (disp && !base && !index)
21780 if (GET_CODE (disp) == CONST)
21781 symbol = XEXP (disp, 0);
21782 if (GET_CODE (symbol) == PLUS
21783 && CONST_INT_P (XEXP (symbol, 1)))
21784 symbol = XEXP (symbol, 0);
21786 if (GET_CODE (symbol) != LABEL_REF
21787 && (GET_CODE (symbol) != SYMBOL_REF
21788 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21789 && (GET_CODE (symbol) != UNSPEC
21790 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21791 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21798 /* Find the length of the displacement constant. */
21801 if (base && satisfies_constraint_K (disp))
21806 /* ebp always wants a displacement. Similarly r13. */
21807 else if (base && REG_P (base)
21808 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21811 /* An index requires the two-byte modrm form.... */
21813 /* ...like esp (or r12), which always wants an index. */
21814 || base == arg_pointer_rtx
21815 || base == frame_pointer_rtx
21816 || (base && REG_P (base)
21817 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21834 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21835 is set, expect that insn have 8bit immediate alternative. */
21837 ix86_attr_length_immediate_default (rtx insn, int shortform)
21841 extract_insn_cached (insn);
21842 for (i = recog_data.n_operands - 1; i >= 0; --i)
21843 if (CONSTANT_P (recog_data.operand[i]))
21845 enum attr_mode mode = get_attr_mode (insn);
21848 if (shortform && CONST_INT_P (recog_data.operand[i]))
21850 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21857 ival = trunc_int_for_mode (ival, HImode);
21860 ival = trunc_int_for_mode (ival, SImode);
21865 if (IN_RANGE (ival, -128, 127))
21882 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21887 fatal_insn ("unknown insn mode", insn);
21892 /* Compute default value for "length_address" attribute. */
21894 ix86_attr_length_address_default (rtx insn)
21898 if (get_attr_type (insn) == TYPE_LEA)
21900 rtx set = PATTERN (insn), addr;
21902 if (GET_CODE (set) == PARALLEL)
21903 set = XVECEXP (set, 0, 0);
21905 gcc_assert (GET_CODE (set) == SET);
21907 addr = SET_SRC (set);
21908 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21910 if (GET_CODE (addr) == ZERO_EXTEND)
21911 addr = XEXP (addr, 0);
21912 if (GET_CODE (addr) == SUBREG)
21913 addr = SUBREG_REG (addr);
21916 return memory_address_length (addr);
21919 extract_insn_cached (insn);
21920 for (i = recog_data.n_operands - 1; i >= 0; --i)
21921 if (MEM_P (recog_data.operand[i]))
21923 constrain_operands_cached (reload_completed);
21924 if (which_alternative != -1)
21926 const char *constraints = recog_data.constraints[i];
21927 int alt = which_alternative;
21929 while (*constraints == '=' || *constraints == '+')
21932 while (*constraints++ != ',')
21934 /* Skip ignored operands. */
21935 if (*constraints == 'X')
21938 return memory_address_length (XEXP (recog_data.operand[i], 0));
21943 /* Compute default value for "length_vex" attribute. It includes
21944 2 or 3 byte VEX prefix and 1 opcode byte. */
21947 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21952 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21953 byte VEX prefix. */
21954 if (!has_0f_opcode || has_vex_w)
21957 /* We can always use 2 byte VEX prefix in 32bit. */
21961 extract_insn_cached (insn);
21963 for (i = recog_data.n_operands - 1; i >= 0; --i)
21964 if (REG_P (recog_data.operand[i]))
21966 /* REX.W bit uses 3 byte VEX prefix. */
21967 if (GET_MODE (recog_data.operand[i]) == DImode
21968 && GENERAL_REG_P (recog_data.operand[i]))
21973 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21974 if (MEM_P (recog_data.operand[i])
21975 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21982 /* Return the maximum number of instructions a cpu can issue. */
21985 ix86_issue_rate (void)
21989 case PROCESSOR_PENTIUM:
21990 case PROCESSOR_ATOM:
21994 case PROCESSOR_PENTIUMPRO:
21995 case PROCESSOR_PENTIUM4:
21996 case PROCESSOR_ATHLON:
21998 case PROCESSOR_AMDFAM10:
21999 case PROCESSOR_NOCONA:
22000 case PROCESSOR_GENERIC32:
22001 case PROCESSOR_GENERIC64:
22002 case PROCESSOR_BDVER1:
22005 case PROCESSOR_CORE2:
22013 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
22014 by DEP_INSN and nothing set by DEP_INSN. */
22017 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
22021 /* Simplify the test for uninteresting insns. */
22022 if (insn_type != TYPE_SETCC
22023 && insn_type != TYPE_ICMOV
22024 && insn_type != TYPE_FCMOV
22025 && insn_type != TYPE_IBR)
22028 if ((set = single_set (dep_insn)) != 0)
22030 set = SET_DEST (set);
22033 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
22034 && XVECLEN (PATTERN (dep_insn), 0) == 2
22035 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
22036 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
22038 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22039 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
22044 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
22047 /* This test is true if the dependent insn reads the flags but
22048 not any other potentially set register. */
22049 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
22052 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
22058 /* Return true iff USE_INSN has a memory address with operands set by
22062 ix86_agi_dependent (rtx set_insn, rtx use_insn)
22065 extract_insn_cached (use_insn);
22066 for (i = recog_data.n_operands - 1; i >= 0; --i)
22067 if (MEM_P (recog_data.operand[i]))
22069 rtx addr = XEXP (recog_data.operand[i], 0);
22070 return modified_in_p (addr, set_insn) != 0;
22076 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
22078 enum attr_type insn_type, dep_insn_type;
22079 enum attr_memory memory;
22081 int dep_insn_code_number;
22083 /* Anti and output dependencies have zero cost on all CPUs. */
22084 if (REG_NOTE_KIND (link) != 0)
22087 dep_insn_code_number = recog_memoized (dep_insn);
22089 /* If we can't recognize the insns, we can't really do anything. */
22090 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
22093 insn_type = get_attr_type (insn);
22094 dep_insn_type = get_attr_type (dep_insn);
22098 case PROCESSOR_PENTIUM:
22099 /* Address Generation Interlock adds a cycle of latency. */
22100 if (insn_type == TYPE_LEA)
22102 rtx addr = PATTERN (insn);
22104 if (GET_CODE (addr) == PARALLEL)
22105 addr = XVECEXP (addr, 0, 0);
22107 gcc_assert (GET_CODE (addr) == SET);
22109 addr = SET_SRC (addr);
22110 if (modified_in_p (addr, dep_insn))
22113 else if (ix86_agi_dependent (dep_insn, insn))
22116 /* ??? Compares pair with jump/setcc. */
22117 if (ix86_flags_dependent (insn, dep_insn, insn_type))
22120 /* Floating point stores require value to be ready one cycle earlier. */
22121 if (insn_type == TYPE_FMOV
22122 && get_attr_memory (insn) == MEMORY_STORE
22123 && !ix86_agi_dependent (dep_insn, insn))
22127 case PROCESSOR_PENTIUMPRO:
22128 memory = get_attr_memory (insn);
22130 /* INT->FP conversion is expensive. */
22131 if (get_attr_fp_int_src (dep_insn))
22134 /* There is one cycle extra latency between an FP op and a store. */
22135 if (insn_type == TYPE_FMOV
22136 && (set = single_set (dep_insn)) != NULL_RTX
22137 && (set2 = single_set (insn)) != NULL_RTX
22138 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
22139 && MEM_P (SET_DEST (set2)))
22142 /* Show ability of reorder buffer to hide latency of load by executing
22143 in parallel with previous instruction in case
22144 previous instruction is not needed to compute the address. */
22145 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22146 && !ix86_agi_dependent (dep_insn, insn))
22148 /* Claim moves to take one cycle, as core can issue one load
22149 at time and the next load can start cycle later. */
22150 if (dep_insn_type == TYPE_IMOV
22151 || dep_insn_type == TYPE_FMOV)
22159 memory = get_attr_memory (insn);
22161 /* The esp dependency is resolved before the instruction is really
22163 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
22164 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
22167 /* INT->FP conversion is expensive. */
22168 if (get_attr_fp_int_src (dep_insn))
22171 /* Show ability of reorder buffer to hide latency of load by executing
22172 in parallel with previous instruction in case
22173 previous instruction is not needed to compute the address. */
22174 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22175 && !ix86_agi_dependent (dep_insn, insn))
22177 /* Claim moves to take one cycle, as core can issue one load
22178 at time and the next load can start cycle later. */
22179 if (dep_insn_type == TYPE_IMOV
22180 || dep_insn_type == TYPE_FMOV)
22189 case PROCESSOR_ATHLON:
22191 case PROCESSOR_AMDFAM10:
22192 case PROCESSOR_BDVER1:
22193 case PROCESSOR_ATOM:
22194 case PROCESSOR_GENERIC32:
22195 case PROCESSOR_GENERIC64:
22196 memory = get_attr_memory (insn);
22198 /* Show ability of reorder buffer to hide latency of load by executing
22199 in parallel with previous instruction in case
22200 previous instruction is not needed to compute the address. */
22201 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
22202 && !ix86_agi_dependent (dep_insn, insn))
22204 enum attr_unit unit = get_attr_unit (insn);
22207 /* Because of the difference between the length of integer and
22208 floating unit pipeline preparation stages, the memory operands
22209 for floating point are cheaper.
22211 ??? For Athlon it the difference is most probably 2. */
22212 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
22215 loadcost = TARGET_ATHLON ? 2 : 0;
22217 if (cost >= loadcost)
22230 /* How many alternative schedules to try. This should be as wide as the
22231 scheduling freedom in the DFA, but no wider. Making this value too
22232 large results extra work for the scheduler. */
22235 ia32_multipass_dfa_lookahead (void)
22239 case PROCESSOR_PENTIUM:
22242 case PROCESSOR_PENTIUMPRO:
22246 case PROCESSOR_CORE2:
22247 case PROCESSOR_COREI7_32:
22248 case PROCESSOR_COREI7_64:
22249 /* Generally, we want haifa-sched:max_issue() to look ahead as far
22250 as many instructions can be executed on a cycle, i.e.,
22251 issue_rate. I wonder why tuning for many CPUs does not do this. */
22252 return ix86_issue_rate ();
22261 /* Model decoder of Core 2/i7.
22262 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
22263 track the instruction fetch block boundaries and make sure that long
22264 (9+ bytes) instructions are assigned to D0. */
22266 /* Maximum length of an insn that can be handled by
22267 a secondary decoder unit. '8' for Core 2/i7. */
22268 static int core2i7_secondary_decoder_max_insn_size;
22270 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
22271 '16' for Core 2/i7. */
22272 static int core2i7_ifetch_block_size;
22274 /* Maximum number of instructions decoder can handle per cycle.
22275 '6' for Core 2/i7. */
22276 static int core2i7_ifetch_block_max_insns;
22278 typedef struct ix86_first_cycle_multipass_data_ *
22279 ix86_first_cycle_multipass_data_t;
22280 typedef const struct ix86_first_cycle_multipass_data_ *
22281 const_ix86_first_cycle_multipass_data_t;
22283 /* A variable to store target state across calls to max_issue within
22285 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
22286 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
22288 /* Initialize DATA. */
22290 core2i7_first_cycle_multipass_init (void *_data)
22292 ix86_first_cycle_multipass_data_t data
22293 = (ix86_first_cycle_multipass_data_t) _data;
22295 data->ifetch_block_len = 0;
22296 data->ifetch_block_n_insns = 0;
22297 data->ready_try_change = NULL;
22298 data->ready_try_change_size = 0;
22301 /* Advancing the cycle; reset ifetch block counts. */
22303 core2i7_dfa_post_advance_cycle (void)
22305 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
22307 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22309 data->ifetch_block_len = 0;
22310 data->ifetch_block_n_insns = 0;
22313 static int min_insn_size (rtx);
22315 /* Filter out insns from ready_try that the core will not be able to issue
22316 on current cycle due to decoder. */
22318 core2i7_first_cycle_multipass_filter_ready_try
22319 (const_ix86_first_cycle_multipass_data_t data,
22320 char *ready_try, int n_ready, bool first_cycle_insn_p)
22327 if (ready_try[n_ready])
22330 insn = get_ready_element (n_ready);
22331 insn_size = min_insn_size (insn);
22333 if (/* If this is a too long an insn for a secondary decoder ... */
22334 (!first_cycle_insn_p
22335 && insn_size > core2i7_secondary_decoder_max_insn_size)
22336 /* ... or it would not fit into the ifetch block ... */
22337 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
22338 /* ... or the decoder is full already ... */
22339 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
22340 /* ... mask the insn out. */
22342 ready_try[n_ready] = 1;
22344 if (data->ready_try_change)
22345 SET_BIT (data->ready_try_change, n_ready);
22350 /* Prepare for a new round of multipass lookahead scheduling. */
22352 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
22353 bool first_cycle_insn_p)
22355 ix86_first_cycle_multipass_data_t data
22356 = (ix86_first_cycle_multipass_data_t) _data;
22357 const_ix86_first_cycle_multipass_data_t prev_data
22358 = ix86_first_cycle_multipass_data;
22360 /* Restore the state from the end of the previous round. */
22361 data->ifetch_block_len = prev_data->ifetch_block_len;
22362 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
22364 /* Filter instructions that cannot be issued on current cycle due to
22365 decoder restrictions. */
22366 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22367 first_cycle_insn_p);
22370 /* INSN is being issued in current solution. Account for its impact on
22371 the decoder model. */
22373 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
22374 rtx insn, const void *_prev_data)
22376 ix86_first_cycle_multipass_data_t data
22377 = (ix86_first_cycle_multipass_data_t) _data;
22378 const_ix86_first_cycle_multipass_data_t prev_data
22379 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
22381 int insn_size = min_insn_size (insn);
22383 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
22384 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
22385 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
22386 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
22388 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
22389 if (!data->ready_try_change)
22391 data->ready_try_change = sbitmap_alloc (n_ready);
22392 data->ready_try_change_size = n_ready;
22394 else if (data->ready_try_change_size < n_ready)
22396 data->ready_try_change = sbitmap_resize (data->ready_try_change,
22398 data->ready_try_change_size = n_ready;
22400 sbitmap_zero (data->ready_try_change);
22402 /* Filter out insns from ready_try that the core will not be able to issue
22403 on current cycle due to decoder. */
22404 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
22408 /* Revert the effect on ready_try. */
22410 core2i7_first_cycle_multipass_backtrack (const void *_data,
22412 int n_ready ATTRIBUTE_UNUSED)
22414 const_ix86_first_cycle_multipass_data_t data
22415 = (const_ix86_first_cycle_multipass_data_t) _data;
22416 unsigned int i = 0;
22417 sbitmap_iterator sbi;
22419 gcc_assert (sbitmap_last_set_bit (data->ready_try_change) < n_ready);
22420 EXECUTE_IF_SET_IN_SBITMAP (data->ready_try_change, 0, i, sbi)
22426 /* Save the result of multipass lookahead scheduling for the next round. */
22428 core2i7_first_cycle_multipass_end (const void *_data)
22430 const_ix86_first_cycle_multipass_data_t data
22431 = (const_ix86_first_cycle_multipass_data_t) _data;
22432 ix86_first_cycle_multipass_data_t next_data
22433 = ix86_first_cycle_multipass_data;
22437 next_data->ifetch_block_len = data->ifetch_block_len;
22438 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
22442 /* Deallocate target data. */
22444 core2i7_first_cycle_multipass_fini (void *_data)
22446 ix86_first_cycle_multipass_data_t data
22447 = (ix86_first_cycle_multipass_data_t) _data;
22449 if (data->ready_try_change)
22451 sbitmap_free (data->ready_try_change);
22452 data->ready_try_change = NULL;
22453 data->ready_try_change_size = 0;
22457 /* Prepare for scheduling pass. */
22459 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
22460 int verbose ATTRIBUTE_UNUSED,
22461 int max_uid ATTRIBUTE_UNUSED)
22463 /* Install scheduling hooks for current CPU. Some of these hooks are used
22464 in time-critical parts of the scheduler, so we only set them up when
22465 they are actually used. */
22468 case PROCESSOR_CORE2:
22469 case PROCESSOR_COREI7_32:
22470 case PROCESSOR_COREI7_64:
22471 targetm.sched.dfa_post_advance_cycle
22472 = core2i7_dfa_post_advance_cycle;
22473 targetm.sched.first_cycle_multipass_init
22474 = core2i7_first_cycle_multipass_init;
22475 targetm.sched.first_cycle_multipass_begin
22476 = core2i7_first_cycle_multipass_begin;
22477 targetm.sched.first_cycle_multipass_issue
22478 = core2i7_first_cycle_multipass_issue;
22479 targetm.sched.first_cycle_multipass_backtrack
22480 = core2i7_first_cycle_multipass_backtrack;
22481 targetm.sched.first_cycle_multipass_end
22482 = core2i7_first_cycle_multipass_end;
22483 targetm.sched.first_cycle_multipass_fini
22484 = core2i7_first_cycle_multipass_fini;
22486 /* Set decoder parameters. */
22487 core2i7_secondary_decoder_max_insn_size = 8;
22488 core2i7_ifetch_block_size = 16;
22489 core2i7_ifetch_block_max_insns = 6;
22493 targetm.sched.dfa_post_advance_cycle = NULL;
22494 targetm.sched.first_cycle_multipass_init = NULL;
22495 targetm.sched.first_cycle_multipass_begin = NULL;
22496 targetm.sched.first_cycle_multipass_issue = NULL;
22497 targetm.sched.first_cycle_multipass_backtrack = NULL;
22498 targetm.sched.first_cycle_multipass_end = NULL;
22499 targetm.sched.first_cycle_multipass_fini = NULL;
22505 /* Compute the alignment given to a constant that is being placed in memory.
22506 EXP is the constant and ALIGN is the alignment that the object would
22508 The value of this function is used instead of that alignment to align
22512 ix86_constant_alignment (tree exp, int align)
22514 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
22515 || TREE_CODE (exp) == INTEGER_CST)
22517 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
22519 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
22522 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
22523 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
22524 return BITS_PER_WORD;
22529 /* Compute the alignment for a static variable.
22530 TYPE is the data type, and ALIGN is the alignment that
22531 the object would ordinarily have. The value of this function is used
22532 instead of that alignment to align the object. */
22535 ix86_data_alignment (tree type, int align)
22537 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
22539 if (AGGREGATE_TYPE_P (type)
22540 && TYPE_SIZE (type)
22541 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22542 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
22543 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
22544 && align < max_align)
22547 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22548 to 16byte boundary. */
22551 if (AGGREGATE_TYPE_P (type)
22552 && TYPE_SIZE (type)
22553 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22554 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
22555 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22559 if (TREE_CODE (type) == ARRAY_TYPE)
22561 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22563 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22566 else if (TREE_CODE (type) == COMPLEX_TYPE)
22569 if (TYPE_MODE (type) == DCmode && align < 64)
22571 if ((TYPE_MODE (type) == XCmode
22572 || TYPE_MODE (type) == TCmode) && align < 128)
22575 else if ((TREE_CODE (type) == RECORD_TYPE
22576 || TREE_CODE (type) == UNION_TYPE
22577 || TREE_CODE (type) == QUAL_UNION_TYPE)
22578 && TYPE_FIELDS (type))
22580 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22582 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22585 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22586 || TREE_CODE (type) == INTEGER_TYPE)
22588 if (TYPE_MODE (type) == DFmode && align < 64)
22590 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22597 /* Compute the alignment for a local variable or a stack slot. EXP is
22598 the data type or decl itself, MODE is the widest mode available and
22599 ALIGN is the alignment that the object would ordinarily have. The
22600 value of this macro is used instead of that alignment to align the
22604 ix86_local_alignment (tree exp, enum machine_mode mode,
22605 unsigned int align)
22609 if (exp && DECL_P (exp))
22611 type = TREE_TYPE (exp);
22620 if (use_avx256_p (mode, type))
22621 cfun->machine->use_avx256_p = true;
22623 /* Don't do dynamic stack realignment for long long objects with
22624 -mpreferred-stack-boundary=2. */
22627 && ix86_preferred_stack_boundary < 64
22628 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
22629 && (!type || !TYPE_USER_ALIGN (type))
22630 && (!decl || !DECL_USER_ALIGN (decl)))
22633 /* If TYPE is NULL, we are allocating a stack slot for caller-save
22634 register in MODE. We will return the largest alignment of XF
22638 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
22639 align = GET_MODE_ALIGNMENT (DFmode);
22643 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
22644 to 16byte boundary. Exact wording is:
22646 An array uses the same alignment as its elements, except that a local or
22647 global array variable of length at least 16 bytes or
22648 a C99 variable-length array variable always has alignment of at least 16 bytes.
22650 This was added to allow use of aligned SSE instructions at arrays. This
22651 rule is meant for static storage (where compiler can not do the analysis
22652 by itself). We follow it for automatic variables only when convenient.
22653 We fully control everything in the function compiled and functions from
22654 other unit can not rely on the alignment.
22656 Exclude va_list type. It is the common case of local array where
22657 we can not benefit from the alignment. */
22658 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
22661 if (AGGREGATE_TYPE_P (type)
22662 && (TYPE_MAIN_VARIANT (type)
22663 != TYPE_MAIN_VARIANT (va_list_type_node))
22664 && TYPE_SIZE (type)
22665 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
22666 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
22667 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
22670 if (TREE_CODE (type) == ARRAY_TYPE)
22672 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
22674 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
22677 else if (TREE_CODE (type) == COMPLEX_TYPE)
22679 if (TYPE_MODE (type) == DCmode && align < 64)
22681 if ((TYPE_MODE (type) == XCmode
22682 || TYPE_MODE (type) == TCmode) && align < 128)
22685 else if ((TREE_CODE (type) == RECORD_TYPE
22686 || TREE_CODE (type) == UNION_TYPE
22687 || TREE_CODE (type) == QUAL_UNION_TYPE)
22688 && TYPE_FIELDS (type))
22690 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
22692 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
22695 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
22696 || TREE_CODE (type) == INTEGER_TYPE)
22699 if (TYPE_MODE (type) == DFmode && align < 64)
22701 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
22707 /* Compute the minimum required alignment for dynamic stack realignment
22708 purposes for a local variable, parameter or a stack slot. EXP is
22709 the data type or decl itself, MODE is its mode and ALIGN is the
22710 alignment that the object would ordinarily have. */
22713 ix86_minimum_alignment (tree exp, enum machine_mode mode,
22714 unsigned int align)
22718 if (exp && DECL_P (exp))
22720 type = TREE_TYPE (exp);
22729 if (use_avx256_p (mode, type))
22730 cfun->machine->use_avx256_p = true;
22732 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
22735 /* Don't do dynamic stack realignment for long long objects with
22736 -mpreferred-stack-boundary=2. */
22737 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
22738 && (!type || !TYPE_USER_ALIGN (type))
22739 && (!decl || !DECL_USER_ALIGN (decl)))
22745 /* Find a location for the static chain incoming to a nested function.
22746 This is a register, unless all free registers are used by arguments. */
22749 ix86_static_chain (const_tree fndecl, bool incoming_p)
22753 if (!DECL_STATIC_CHAIN (fndecl))
22758 /* We always use R10 in 64-bit mode. */
22764 /* By default in 32-bit mode we use ECX to pass the static chain. */
22767 fntype = TREE_TYPE (fndecl);
22768 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
22770 /* Fastcall functions use ecx/edx for arguments, which leaves
22771 us with EAX for the static chain. */
22774 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
22776 /* Thiscall functions use ecx for arguments, which leaves
22777 us with EAX for the static chain. */
22780 else if (ix86_function_regparm (fntype, fndecl) == 3)
22782 /* For regparm 3, we have no free call-clobbered registers in
22783 which to store the static chain. In order to implement this,
22784 we have the trampoline push the static chain to the stack.
22785 However, we can't push a value below the return address when
22786 we call the nested function directly, so we have to use an
22787 alternate entry point. For this we use ESI, and have the
22788 alternate entry point push ESI, so that things appear the
22789 same once we're executing the nested function. */
22792 if (fndecl == current_function_decl)
22793 ix86_static_chain_on_stack = true;
22794 return gen_frame_mem (SImode,
22795 plus_constant (arg_pointer_rtx, -8));
22801 return gen_rtx_REG (Pmode, regno);
22804 /* Emit RTL insns to initialize the variable parts of a trampoline.
22805 FNDECL is the decl of the target address; M_TRAMP is a MEM for
22806 the trampoline, and CHAIN_VALUE is an RTX for the static chain
22807 to be passed to the target function. */
22810 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
22814 fnaddr = XEXP (DECL_RTL (fndecl), 0);
22821 /* Depending on the static chain location, either load a register
22822 with a constant, or push the constant to the stack. All of the
22823 instructions are the same size. */
22824 chain = ix86_static_chain (fndecl, true);
22827 if (REGNO (chain) == CX_REG)
22829 else if (REGNO (chain) == AX_REG)
22832 gcc_unreachable ();
22837 mem = adjust_address (m_tramp, QImode, 0);
22838 emit_move_insn (mem, gen_int_mode (opcode, QImode));
22840 mem = adjust_address (m_tramp, SImode, 1);
22841 emit_move_insn (mem, chain_value);
22843 /* Compute offset from the end of the jmp to the target function.
22844 In the case in which the trampoline stores the static chain on
22845 the stack, we need to skip the first insn which pushes the
22846 (call-saved) register static chain; this push is 1 byte. */
22847 disp = expand_binop (SImode, sub_optab, fnaddr,
22848 plus_constant (XEXP (m_tramp, 0),
22849 MEM_P (chain) ? 9 : 10),
22850 NULL_RTX, 1, OPTAB_DIRECT);
22852 mem = adjust_address (m_tramp, QImode, 5);
22853 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
22855 mem = adjust_address (m_tramp, SImode, 6);
22856 emit_move_insn (mem, disp);
22862 /* Load the function address to r11. Try to load address using
22863 the shorter movl instead of movabs. We may want to support
22864 movq for kernel mode, but kernel does not use trampolines at
22866 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
22868 fnaddr = copy_to_mode_reg (DImode, fnaddr);
22870 mem = adjust_address (m_tramp, HImode, offset);
22871 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
22873 mem = adjust_address (m_tramp, SImode, offset + 2);
22874 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
22879 mem = adjust_address (m_tramp, HImode, offset);
22880 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
22882 mem = adjust_address (m_tramp, DImode, offset + 2);
22883 emit_move_insn (mem, fnaddr);
22887 /* Load static chain using movabs to r10. */
22888 mem = adjust_address (m_tramp, HImode, offset);
22889 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
22891 mem = adjust_address (m_tramp, DImode, offset + 2);
22892 emit_move_insn (mem, chain_value);
22895 /* Jump to r11; the last (unused) byte is a nop, only there to
22896 pad the write out to a single 32-bit store. */
22897 mem = adjust_address (m_tramp, SImode, offset);
22898 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
22901 gcc_assert (offset <= TRAMPOLINE_SIZE);
22904 #ifdef ENABLE_EXECUTE_STACK
22905 #ifdef CHECK_EXECUTE_STACK_ENABLED
22906 if (CHECK_EXECUTE_STACK_ENABLED)
22908 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
22909 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
22913 /* The following file contains several enumerations and data structures
22914 built from the definitions in i386-builtin-types.def. */
22916 #include "i386-builtin-types.inc"
22918 /* Table for the ix86 builtin non-function types. */
22919 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
22921 /* Retrieve an element from the above table, building some of
22922 the types lazily. */
22925 ix86_get_builtin_type (enum ix86_builtin_type tcode)
22927 unsigned int index;
22930 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
22932 type = ix86_builtin_type_tab[(int) tcode];
22936 gcc_assert (tcode > IX86_BT_LAST_PRIM);
22937 if (tcode <= IX86_BT_LAST_VECT)
22939 enum machine_mode mode;
22941 index = tcode - IX86_BT_LAST_PRIM - 1;
22942 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
22943 mode = ix86_builtin_type_vect_mode[index];
22945 type = build_vector_type_for_mode (itype, mode);
22951 index = tcode - IX86_BT_LAST_VECT - 1;
22952 if (tcode <= IX86_BT_LAST_PTR)
22953 quals = TYPE_UNQUALIFIED;
22955 quals = TYPE_QUAL_CONST;
22957 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
22958 if (quals != TYPE_UNQUALIFIED)
22959 itype = build_qualified_type (itype, quals);
22961 type = build_pointer_type (itype);
22964 ix86_builtin_type_tab[(int) tcode] = type;
22968 /* Table for the ix86 builtin function types. */
22969 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
22971 /* Retrieve an element from the above table, building some of
22972 the types lazily. */
22975 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
22979 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
22981 type = ix86_builtin_func_type_tab[(int) tcode];
22985 if (tcode <= IX86_BT_LAST_FUNC)
22987 unsigned start = ix86_builtin_func_start[(int) tcode];
22988 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
22989 tree rtype, atype, args = void_list_node;
22992 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
22993 for (i = after - 1; i > start; --i)
22995 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22996 args = tree_cons (NULL, atype, args);
22999 type = build_function_type (rtype, args);
23003 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
23004 enum ix86_builtin_func_type icode;
23006 icode = ix86_builtin_func_alias_base[index];
23007 type = ix86_get_builtin_func_type (icode);
23010 ix86_builtin_func_type_tab[(int) tcode] = type;
23015 /* Codes for all the SSE/MMX builtins. */
23018 IX86_BUILTIN_ADDPS,
23019 IX86_BUILTIN_ADDSS,
23020 IX86_BUILTIN_DIVPS,
23021 IX86_BUILTIN_DIVSS,
23022 IX86_BUILTIN_MULPS,
23023 IX86_BUILTIN_MULSS,
23024 IX86_BUILTIN_SUBPS,
23025 IX86_BUILTIN_SUBSS,
23027 IX86_BUILTIN_CMPEQPS,
23028 IX86_BUILTIN_CMPLTPS,
23029 IX86_BUILTIN_CMPLEPS,
23030 IX86_BUILTIN_CMPGTPS,
23031 IX86_BUILTIN_CMPGEPS,
23032 IX86_BUILTIN_CMPNEQPS,
23033 IX86_BUILTIN_CMPNLTPS,
23034 IX86_BUILTIN_CMPNLEPS,
23035 IX86_BUILTIN_CMPNGTPS,
23036 IX86_BUILTIN_CMPNGEPS,
23037 IX86_BUILTIN_CMPORDPS,
23038 IX86_BUILTIN_CMPUNORDPS,
23039 IX86_BUILTIN_CMPEQSS,
23040 IX86_BUILTIN_CMPLTSS,
23041 IX86_BUILTIN_CMPLESS,
23042 IX86_BUILTIN_CMPNEQSS,
23043 IX86_BUILTIN_CMPNLTSS,
23044 IX86_BUILTIN_CMPNLESS,
23045 IX86_BUILTIN_CMPNGTSS,
23046 IX86_BUILTIN_CMPNGESS,
23047 IX86_BUILTIN_CMPORDSS,
23048 IX86_BUILTIN_CMPUNORDSS,
23050 IX86_BUILTIN_COMIEQSS,
23051 IX86_BUILTIN_COMILTSS,
23052 IX86_BUILTIN_COMILESS,
23053 IX86_BUILTIN_COMIGTSS,
23054 IX86_BUILTIN_COMIGESS,
23055 IX86_BUILTIN_COMINEQSS,
23056 IX86_BUILTIN_UCOMIEQSS,
23057 IX86_BUILTIN_UCOMILTSS,
23058 IX86_BUILTIN_UCOMILESS,
23059 IX86_BUILTIN_UCOMIGTSS,
23060 IX86_BUILTIN_UCOMIGESS,
23061 IX86_BUILTIN_UCOMINEQSS,
23063 IX86_BUILTIN_CVTPI2PS,
23064 IX86_BUILTIN_CVTPS2PI,
23065 IX86_BUILTIN_CVTSI2SS,
23066 IX86_BUILTIN_CVTSI642SS,
23067 IX86_BUILTIN_CVTSS2SI,
23068 IX86_BUILTIN_CVTSS2SI64,
23069 IX86_BUILTIN_CVTTPS2PI,
23070 IX86_BUILTIN_CVTTSS2SI,
23071 IX86_BUILTIN_CVTTSS2SI64,
23073 IX86_BUILTIN_MAXPS,
23074 IX86_BUILTIN_MAXSS,
23075 IX86_BUILTIN_MINPS,
23076 IX86_BUILTIN_MINSS,
23078 IX86_BUILTIN_LOADUPS,
23079 IX86_BUILTIN_STOREUPS,
23080 IX86_BUILTIN_MOVSS,
23082 IX86_BUILTIN_MOVHLPS,
23083 IX86_BUILTIN_MOVLHPS,
23084 IX86_BUILTIN_LOADHPS,
23085 IX86_BUILTIN_LOADLPS,
23086 IX86_BUILTIN_STOREHPS,
23087 IX86_BUILTIN_STORELPS,
23089 IX86_BUILTIN_MASKMOVQ,
23090 IX86_BUILTIN_MOVMSKPS,
23091 IX86_BUILTIN_PMOVMSKB,
23093 IX86_BUILTIN_MOVNTPS,
23094 IX86_BUILTIN_MOVNTQ,
23096 IX86_BUILTIN_LOADDQU,
23097 IX86_BUILTIN_STOREDQU,
23099 IX86_BUILTIN_PACKSSWB,
23100 IX86_BUILTIN_PACKSSDW,
23101 IX86_BUILTIN_PACKUSWB,
23103 IX86_BUILTIN_PADDB,
23104 IX86_BUILTIN_PADDW,
23105 IX86_BUILTIN_PADDD,
23106 IX86_BUILTIN_PADDQ,
23107 IX86_BUILTIN_PADDSB,
23108 IX86_BUILTIN_PADDSW,
23109 IX86_BUILTIN_PADDUSB,
23110 IX86_BUILTIN_PADDUSW,
23111 IX86_BUILTIN_PSUBB,
23112 IX86_BUILTIN_PSUBW,
23113 IX86_BUILTIN_PSUBD,
23114 IX86_BUILTIN_PSUBQ,
23115 IX86_BUILTIN_PSUBSB,
23116 IX86_BUILTIN_PSUBSW,
23117 IX86_BUILTIN_PSUBUSB,
23118 IX86_BUILTIN_PSUBUSW,
23121 IX86_BUILTIN_PANDN,
23125 IX86_BUILTIN_PAVGB,
23126 IX86_BUILTIN_PAVGW,
23128 IX86_BUILTIN_PCMPEQB,
23129 IX86_BUILTIN_PCMPEQW,
23130 IX86_BUILTIN_PCMPEQD,
23131 IX86_BUILTIN_PCMPGTB,
23132 IX86_BUILTIN_PCMPGTW,
23133 IX86_BUILTIN_PCMPGTD,
23135 IX86_BUILTIN_PMADDWD,
23137 IX86_BUILTIN_PMAXSW,
23138 IX86_BUILTIN_PMAXUB,
23139 IX86_BUILTIN_PMINSW,
23140 IX86_BUILTIN_PMINUB,
23142 IX86_BUILTIN_PMULHUW,
23143 IX86_BUILTIN_PMULHW,
23144 IX86_BUILTIN_PMULLW,
23146 IX86_BUILTIN_PSADBW,
23147 IX86_BUILTIN_PSHUFW,
23149 IX86_BUILTIN_PSLLW,
23150 IX86_BUILTIN_PSLLD,
23151 IX86_BUILTIN_PSLLQ,
23152 IX86_BUILTIN_PSRAW,
23153 IX86_BUILTIN_PSRAD,
23154 IX86_BUILTIN_PSRLW,
23155 IX86_BUILTIN_PSRLD,
23156 IX86_BUILTIN_PSRLQ,
23157 IX86_BUILTIN_PSLLWI,
23158 IX86_BUILTIN_PSLLDI,
23159 IX86_BUILTIN_PSLLQI,
23160 IX86_BUILTIN_PSRAWI,
23161 IX86_BUILTIN_PSRADI,
23162 IX86_BUILTIN_PSRLWI,
23163 IX86_BUILTIN_PSRLDI,
23164 IX86_BUILTIN_PSRLQI,
23166 IX86_BUILTIN_PUNPCKHBW,
23167 IX86_BUILTIN_PUNPCKHWD,
23168 IX86_BUILTIN_PUNPCKHDQ,
23169 IX86_BUILTIN_PUNPCKLBW,
23170 IX86_BUILTIN_PUNPCKLWD,
23171 IX86_BUILTIN_PUNPCKLDQ,
23173 IX86_BUILTIN_SHUFPS,
23175 IX86_BUILTIN_RCPPS,
23176 IX86_BUILTIN_RCPSS,
23177 IX86_BUILTIN_RSQRTPS,
23178 IX86_BUILTIN_RSQRTPS_NR,
23179 IX86_BUILTIN_RSQRTSS,
23180 IX86_BUILTIN_RSQRTF,
23181 IX86_BUILTIN_SQRTPS,
23182 IX86_BUILTIN_SQRTPS_NR,
23183 IX86_BUILTIN_SQRTSS,
23185 IX86_BUILTIN_UNPCKHPS,
23186 IX86_BUILTIN_UNPCKLPS,
23188 IX86_BUILTIN_ANDPS,
23189 IX86_BUILTIN_ANDNPS,
23191 IX86_BUILTIN_XORPS,
23194 IX86_BUILTIN_LDMXCSR,
23195 IX86_BUILTIN_STMXCSR,
23196 IX86_BUILTIN_SFENCE,
23198 /* 3DNow! Original */
23199 IX86_BUILTIN_FEMMS,
23200 IX86_BUILTIN_PAVGUSB,
23201 IX86_BUILTIN_PF2ID,
23202 IX86_BUILTIN_PFACC,
23203 IX86_BUILTIN_PFADD,
23204 IX86_BUILTIN_PFCMPEQ,
23205 IX86_BUILTIN_PFCMPGE,
23206 IX86_BUILTIN_PFCMPGT,
23207 IX86_BUILTIN_PFMAX,
23208 IX86_BUILTIN_PFMIN,
23209 IX86_BUILTIN_PFMUL,
23210 IX86_BUILTIN_PFRCP,
23211 IX86_BUILTIN_PFRCPIT1,
23212 IX86_BUILTIN_PFRCPIT2,
23213 IX86_BUILTIN_PFRSQIT1,
23214 IX86_BUILTIN_PFRSQRT,
23215 IX86_BUILTIN_PFSUB,
23216 IX86_BUILTIN_PFSUBR,
23217 IX86_BUILTIN_PI2FD,
23218 IX86_BUILTIN_PMULHRW,
23220 /* 3DNow! Athlon Extensions */
23221 IX86_BUILTIN_PF2IW,
23222 IX86_BUILTIN_PFNACC,
23223 IX86_BUILTIN_PFPNACC,
23224 IX86_BUILTIN_PI2FW,
23225 IX86_BUILTIN_PSWAPDSI,
23226 IX86_BUILTIN_PSWAPDSF,
23229 IX86_BUILTIN_ADDPD,
23230 IX86_BUILTIN_ADDSD,
23231 IX86_BUILTIN_DIVPD,
23232 IX86_BUILTIN_DIVSD,
23233 IX86_BUILTIN_MULPD,
23234 IX86_BUILTIN_MULSD,
23235 IX86_BUILTIN_SUBPD,
23236 IX86_BUILTIN_SUBSD,
23238 IX86_BUILTIN_CMPEQPD,
23239 IX86_BUILTIN_CMPLTPD,
23240 IX86_BUILTIN_CMPLEPD,
23241 IX86_BUILTIN_CMPGTPD,
23242 IX86_BUILTIN_CMPGEPD,
23243 IX86_BUILTIN_CMPNEQPD,
23244 IX86_BUILTIN_CMPNLTPD,
23245 IX86_BUILTIN_CMPNLEPD,
23246 IX86_BUILTIN_CMPNGTPD,
23247 IX86_BUILTIN_CMPNGEPD,
23248 IX86_BUILTIN_CMPORDPD,
23249 IX86_BUILTIN_CMPUNORDPD,
23250 IX86_BUILTIN_CMPEQSD,
23251 IX86_BUILTIN_CMPLTSD,
23252 IX86_BUILTIN_CMPLESD,
23253 IX86_BUILTIN_CMPNEQSD,
23254 IX86_BUILTIN_CMPNLTSD,
23255 IX86_BUILTIN_CMPNLESD,
23256 IX86_BUILTIN_CMPORDSD,
23257 IX86_BUILTIN_CMPUNORDSD,
23259 IX86_BUILTIN_COMIEQSD,
23260 IX86_BUILTIN_COMILTSD,
23261 IX86_BUILTIN_COMILESD,
23262 IX86_BUILTIN_COMIGTSD,
23263 IX86_BUILTIN_COMIGESD,
23264 IX86_BUILTIN_COMINEQSD,
23265 IX86_BUILTIN_UCOMIEQSD,
23266 IX86_BUILTIN_UCOMILTSD,
23267 IX86_BUILTIN_UCOMILESD,
23268 IX86_BUILTIN_UCOMIGTSD,
23269 IX86_BUILTIN_UCOMIGESD,
23270 IX86_BUILTIN_UCOMINEQSD,
23272 IX86_BUILTIN_MAXPD,
23273 IX86_BUILTIN_MAXSD,
23274 IX86_BUILTIN_MINPD,
23275 IX86_BUILTIN_MINSD,
23277 IX86_BUILTIN_ANDPD,
23278 IX86_BUILTIN_ANDNPD,
23280 IX86_BUILTIN_XORPD,
23282 IX86_BUILTIN_SQRTPD,
23283 IX86_BUILTIN_SQRTSD,
23285 IX86_BUILTIN_UNPCKHPD,
23286 IX86_BUILTIN_UNPCKLPD,
23288 IX86_BUILTIN_SHUFPD,
23290 IX86_BUILTIN_LOADUPD,
23291 IX86_BUILTIN_STOREUPD,
23292 IX86_BUILTIN_MOVSD,
23294 IX86_BUILTIN_LOADHPD,
23295 IX86_BUILTIN_LOADLPD,
23297 IX86_BUILTIN_CVTDQ2PD,
23298 IX86_BUILTIN_CVTDQ2PS,
23300 IX86_BUILTIN_CVTPD2DQ,
23301 IX86_BUILTIN_CVTPD2PI,
23302 IX86_BUILTIN_CVTPD2PS,
23303 IX86_BUILTIN_CVTTPD2DQ,
23304 IX86_BUILTIN_CVTTPD2PI,
23306 IX86_BUILTIN_CVTPI2PD,
23307 IX86_BUILTIN_CVTSI2SD,
23308 IX86_BUILTIN_CVTSI642SD,
23310 IX86_BUILTIN_CVTSD2SI,
23311 IX86_BUILTIN_CVTSD2SI64,
23312 IX86_BUILTIN_CVTSD2SS,
23313 IX86_BUILTIN_CVTSS2SD,
23314 IX86_BUILTIN_CVTTSD2SI,
23315 IX86_BUILTIN_CVTTSD2SI64,
23317 IX86_BUILTIN_CVTPS2DQ,
23318 IX86_BUILTIN_CVTPS2PD,
23319 IX86_BUILTIN_CVTTPS2DQ,
23321 IX86_BUILTIN_MOVNTI,
23322 IX86_BUILTIN_MOVNTPD,
23323 IX86_BUILTIN_MOVNTDQ,
23325 IX86_BUILTIN_MOVQ128,
23328 IX86_BUILTIN_MASKMOVDQU,
23329 IX86_BUILTIN_MOVMSKPD,
23330 IX86_BUILTIN_PMOVMSKB128,
23332 IX86_BUILTIN_PACKSSWB128,
23333 IX86_BUILTIN_PACKSSDW128,
23334 IX86_BUILTIN_PACKUSWB128,
23336 IX86_BUILTIN_PADDB128,
23337 IX86_BUILTIN_PADDW128,
23338 IX86_BUILTIN_PADDD128,
23339 IX86_BUILTIN_PADDQ128,
23340 IX86_BUILTIN_PADDSB128,
23341 IX86_BUILTIN_PADDSW128,
23342 IX86_BUILTIN_PADDUSB128,
23343 IX86_BUILTIN_PADDUSW128,
23344 IX86_BUILTIN_PSUBB128,
23345 IX86_BUILTIN_PSUBW128,
23346 IX86_BUILTIN_PSUBD128,
23347 IX86_BUILTIN_PSUBQ128,
23348 IX86_BUILTIN_PSUBSB128,
23349 IX86_BUILTIN_PSUBSW128,
23350 IX86_BUILTIN_PSUBUSB128,
23351 IX86_BUILTIN_PSUBUSW128,
23353 IX86_BUILTIN_PAND128,
23354 IX86_BUILTIN_PANDN128,
23355 IX86_BUILTIN_POR128,
23356 IX86_BUILTIN_PXOR128,
23358 IX86_BUILTIN_PAVGB128,
23359 IX86_BUILTIN_PAVGW128,
23361 IX86_BUILTIN_PCMPEQB128,
23362 IX86_BUILTIN_PCMPEQW128,
23363 IX86_BUILTIN_PCMPEQD128,
23364 IX86_BUILTIN_PCMPGTB128,
23365 IX86_BUILTIN_PCMPGTW128,
23366 IX86_BUILTIN_PCMPGTD128,
23368 IX86_BUILTIN_PMADDWD128,
23370 IX86_BUILTIN_PMAXSW128,
23371 IX86_BUILTIN_PMAXUB128,
23372 IX86_BUILTIN_PMINSW128,
23373 IX86_BUILTIN_PMINUB128,
23375 IX86_BUILTIN_PMULUDQ,
23376 IX86_BUILTIN_PMULUDQ128,
23377 IX86_BUILTIN_PMULHUW128,
23378 IX86_BUILTIN_PMULHW128,
23379 IX86_BUILTIN_PMULLW128,
23381 IX86_BUILTIN_PSADBW128,
23382 IX86_BUILTIN_PSHUFHW,
23383 IX86_BUILTIN_PSHUFLW,
23384 IX86_BUILTIN_PSHUFD,
23386 IX86_BUILTIN_PSLLDQI128,
23387 IX86_BUILTIN_PSLLWI128,
23388 IX86_BUILTIN_PSLLDI128,
23389 IX86_BUILTIN_PSLLQI128,
23390 IX86_BUILTIN_PSRAWI128,
23391 IX86_BUILTIN_PSRADI128,
23392 IX86_BUILTIN_PSRLDQI128,
23393 IX86_BUILTIN_PSRLWI128,
23394 IX86_BUILTIN_PSRLDI128,
23395 IX86_BUILTIN_PSRLQI128,
23397 IX86_BUILTIN_PSLLDQ128,
23398 IX86_BUILTIN_PSLLW128,
23399 IX86_BUILTIN_PSLLD128,
23400 IX86_BUILTIN_PSLLQ128,
23401 IX86_BUILTIN_PSRAW128,
23402 IX86_BUILTIN_PSRAD128,
23403 IX86_BUILTIN_PSRLW128,
23404 IX86_BUILTIN_PSRLD128,
23405 IX86_BUILTIN_PSRLQ128,
23407 IX86_BUILTIN_PUNPCKHBW128,
23408 IX86_BUILTIN_PUNPCKHWD128,
23409 IX86_BUILTIN_PUNPCKHDQ128,
23410 IX86_BUILTIN_PUNPCKHQDQ128,
23411 IX86_BUILTIN_PUNPCKLBW128,
23412 IX86_BUILTIN_PUNPCKLWD128,
23413 IX86_BUILTIN_PUNPCKLDQ128,
23414 IX86_BUILTIN_PUNPCKLQDQ128,
23416 IX86_BUILTIN_CLFLUSH,
23417 IX86_BUILTIN_MFENCE,
23418 IX86_BUILTIN_LFENCE,
23420 IX86_BUILTIN_BSRSI,
23421 IX86_BUILTIN_BSRDI,
23422 IX86_BUILTIN_RDPMC,
23423 IX86_BUILTIN_RDTSC,
23424 IX86_BUILTIN_RDTSCP,
23425 IX86_BUILTIN_ROLQI,
23426 IX86_BUILTIN_ROLHI,
23427 IX86_BUILTIN_RORQI,
23428 IX86_BUILTIN_RORHI,
23431 IX86_BUILTIN_ADDSUBPS,
23432 IX86_BUILTIN_HADDPS,
23433 IX86_BUILTIN_HSUBPS,
23434 IX86_BUILTIN_MOVSHDUP,
23435 IX86_BUILTIN_MOVSLDUP,
23436 IX86_BUILTIN_ADDSUBPD,
23437 IX86_BUILTIN_HADDPD,
23438 IX86_BUILTIN_HSUBPD,
23439 IX86_BUILTIN_LDDQU,
23441 IX86_BUILTIN_MONITOR,
23442 IX86_BUILTIN_MWAIT,
23445 IX86_BUILTIN_PHADDW,
23446 IX86_BUILTIN_PHADDD,
23447 IX86_BUILTIN_PHADDSW,
23448 IX86_BUILTIN_PHSUBW,
23449 IX86_BUILTIN_PHSUBD,
23450 IX86_BUILTIN_PHSUBSW,
23451 IX86_BUILTIN_PMADDUBSW,
23452 IX86_BUILTIN_PMULHRSW,
23453 IX86_BUILTIN_PSHUFB,
23454 IX86_BUILTIN_PSIGNB,
23455 IX86_BUILTIN_PSIGNW,
23456 IX86_BUILTIN_PSIGND,
23457 IX86_BUILTIN_PALIGNR,
23458 IX86_BUILTIN_PABSB,
23459 IX86_BUILTIN_PABSW,
23460 IX86_BUILTIN_PABSD,
23462 IX86_BUILTIN_PHADDW128,
23463 IX86_BUILTIN_PHADDD128,
23464 IX86_BUILTIN_PHADDSW128,
23465 IX86_BUILTIN_PHSUBW128,
23466 IX86_BUILTIN_PHSUBD128,
23467 IX86_BUILTIN_PHSUBSW128,
23468 IX86_BUILTIN_PMADDUBSW128,
23469 IX86_BUILTIN_PMULHRSW128,
23470 IX86_BUILTIN_PSHUFB128,
23471 IX86_BUILTIN_PSIGNB128,
23472 IX86_BUILTIN_PSIGNW128,
23473 IX86_BUILTIN_PSIGND128,
23474 IX86_BUILTIN_PALIGNR128,
23475 IX86_BUILTIN_PABSB128,
23476 IX86_BUILTIN_PABSW128,
23477 IX86_BUILTIN_PABSD128,
23479 /* AMDFAM10 - SSE4A New Instructions. */
23480 IX86_BUILTIN_MOVNTSD,
23481 IX86_BUILTIN_MOVNTSS,
23482 IX86_BUILTIN_EXTRQI,
23483 IX86_BUILTIN_EXTRQ,
23484 IX86_BUILTIN_INSERTQI,
23485 IX86_BUILTIN_INSERTQ,
23488 IX86_BUILTIN_BLENDPD,
23489 IX86_BUILTIN_BLENDPS,
23490 IX86_BUILTIN_BLENDVPD,
23491 IX86_BUILTIN_BLENDVPS,
23492 IX86_BUILTIN_PBLENDVB128,
23493 IX86_BUILTIN_PBLENDW128,
23498 IX86_BUILTIN_INSERTPS128,
23500 IX86_BUILTIN_MOVNTDQA,
23501 IX86_BUILTIN_MPSADBW128,
23502 IX86_BUILTIN_PACKUSDW128,
23503 IX86_BUILTIN_PCMPEQQ,
23504 IX86_BUILTIN_PHMINPOSUW128,
23506 IX86_BUILTIN_PMAXSB128,
23507 IX86_BUILTIN_PMAXSD128,
23508 IX86_BUILTIN_PMAXUD128,
23509 IX86_BUILTIN_PMAXUW128,
23511 IX86_BUILTIN_PMINSB128,
23512 IX86_BUILTIN_PMINSD128,
23513 IX86_BUILTIN_PMINUD128,
23514 IX86_BUILTIN_PMINUW128,
23516 IX86_BUILTIN_PMOVSXBW128,
23517 IX86_BUILTIN_PMOVSXBD128,
23518 IX86_BUILTIN_PMOVSXBQ128,
23519 IX86_BUILTIN_PMOVSXWD128,
23520 IX86_BUILTIN_PMOVSXWQ128,
23521 IX86_BUILTIN_PMOVSXDQ128,
23523 IX86_BUILTIN_PMOVZXBW128,
23524 IX86_BUILTIN_PMOVZXBD128,
23525 IX86_BUILTIN_PMOVZXBQ128,
23526 IX86_BUILTIN_PMOVZXWD128,
23527 IX86_BUILTIN_PMOVZXWQ128,
23528 IX86_BUILTIN_PMOVZXDQ128,
23530 IX86_BUILTIN_PMULDQ128,
23531 IX86_BUILTIN_PMULLD128,
23533 IX86_BUILTIN_ROUNDPD,
23534 IX86_BUILTIN_ROUNDPS,
23535 IX86_BUILTIN_ROUNDSD,
23536 IX86_BUILTIN_ROUNDSS,
23538 IX86_BUILTIN_PTESTZ,
23539 IX86_BUILTIN_PTESTC,
23540 IX86_BUILTIN_PTESTNZC,
23542 IX86_BUILTIN_VEC_INIT_V2SI,
23543 IX86_BUILTIN_VEC_INIT_V4HI,
23544 IX86_BUILTIN_VEC_INIT_V8QI,
23545 IX86_BUILTIN_VEC_EXT_V2DF,
23546 IX86_BUILTIN_VEC_EXT_V2DI,
23547 IX86_BUILTIN_VEC_EXT_V4SF,
23548 IX86_BUILTIN_VEC_EXT_V4SI,
23549 IX86_BUILTIN_VEC_EXT_V8HI,
23550 IX86_BUILTIN_VEC_EXT_V2SI,
23551 IX86_BUILTIN_VEC_EXT_V4HI,
23552 IX86_BUILTIN_VEC_EXT_V16QI,
23553 IX86_BUILTIN_VEC_SET_V2DI,
23554 IX86_BUILTIN_VEC_SET_V4SF,
23555 IX86_BUILTIN_VEC_SET_V4SI,
23556 IX86_BUILTIN_VEC_SET_V8HI,
23557 IX86_BUILTIN_VEC_SET_V4HI,
23558 IX86_BUILTIN_VEC_SET_V16QI,
23560 IX86_BUILTIN_VEC_PACK_SFIX,
23563 IX86_BUILTIN_CRC32QI,
23564 IX86_BUILTIN_CRC32HI,
23565 IX86_BUILTIN_CRC32SI,
23566 IX86_BUILTIN_CRC32DI,
23568 IX86_BUILTIN_PCMPESTRI128,
23569 IX86_BUILTIN_PCMPESTRM128,
23570 IX86_BUILTIN_PCMPESTRA128,
23571 IX86_BUILTIN_PCMPESTRC128,
23572 IX86_BUILTIN_PCMPESTRO128,
23573 IX86_BUILTIN_PCMPESTRS128,
23574 IX86_BUILTIN_PCMPESTRZ128,
23575 IX86_BUILTIN_PCMPISTRI128,
23576 IX86_BUILTIN_PCMPISTRM128,
23577 IX86_BUILTIN_PCMPISTRA128,
23578 IX86_BUILTIN_PCMPISTRC128,
23579 IX86_BUILTIN_PCMPISTRO128,
23580 IX86_BUILTIN_PCMPISTRS128,
23581 IX86_BUILTIN_PCMPISTRZ128,
23583 IX86_BUILTIN_PCMPGTQ,
23585 /* AES instructions */
23586 IX86_BUILTIN_AESENC128,
23587 IX86_BUILTIN_AESENCLAST128,
23588 IX86_BUILTIN_AESDEC128,
23589 IX86_BUILTIN_AESDECLAST128,
23590 IX86_BUILTIN_AESIMC128,
23591 IX86_BUILTIN_AESKEYGENASSIST128,
23593 /* PCLMUL instruction */
23594 IX86_BUILTIN_PCLMULQDQ128,
23597 IX86_BUILTIN_ADDPD256,
23598 IX86_BUILTIN_ADDPS256,
23599 IX86_BUILTIN_ADDSUBPD256,
23600 IX86_BUILTIN_ADDSUBPS256,
23601 IX86_BUILTIN_ANDPD256,
23602 IX86_BUILTIN_ANDPS256,
23603 IX86_BUILTIN_ANDNPD256,
23604 IX86_BUILTIN_ANDNPS256,
23605 IX86_BUILTIN_BLENDPD256,
23606 IX86_BUILTIN_BLENDPS256,
23607 IX86_BUILTIN_BLENDVPD256,
23608 IX86_BUILTIN_BLENDVPS256,
23609 IX86_BUILTIN_DIVPD256,
23610 IX86_BUILTIN_DIVPS256,
23611 IX86_BUILTIN_DPPS256,
23612 IX86_BUILTIN_HADDPD256,
23613 IX86_BUILTIN_HADDPS256,
23614 IX86_BUILTIN_HSUBPD256,
23615 IX86_BUILTIN_HSUBPS256,
23616 IX86_BUILTIN_MAXPD256,
23617 IX86_BUILTIN_MAXPS256,
23618 IX86_BUILTIN_MINPD256,
23619 IX86_BUILTIN_MINPS256,
23620 IX86_BUILTIN_MULPD256,
23621 IX86_BUILTIN_MULPS256,
23622 IX86_BUILTIN_ORPD256,
23623 IX86_BUILTIN_ORPS256,
23624 IX86_BUILTIN_SHUFPD256,
23625 IX86_BUILTIN_SHUFPS256,
23626 IX86_BUILTIN_SUBPD256,
23627 IX86_BUILTIN_SUBPS256,
23628 IX86_BUILTIN_XORPD256,
23629 IX86_BUILTIN_XORPS256,
23630 IX86_BUILTIN_CMPSD,
23631 IX86_BUILTIN_CMPSS,
23632 IX86_BUILTIN_CMPPD,
23633 IX86_BUILTIN_CMPPS,
23634 IX86_BUILTIN_CMPPD256,
23635 IX86_BUILTIN_CMPPS256,
23636 IX86_BUILTIN_CVTDQ2PD256,
23637 IX86_BUILTIN_CVTDQ2PS256,
23638 IX86_BUILTIN_CVTPD2PS256,
23639 IX86_BUILTIN_CVTPS2DQ256,
23640 IX86_BUILTIN_CVTPS2PD256,
23641 IX86_BUILTIN_CVTTPD2DQ256,
23642 IX86_BUILTIN_CVTPD2DQ256,
23643 IX86_BUILTIN_CVTTPS2DQ256,
23644 IX86_BUILTIN_EXTRACTF128PD256,
23645 IX86_BUILTIN_EXTRACTF128PS256,
23646 IX86_BUILTIN_EXTRACTF128SI256,
23647 IX86_BUILTIN_VZEROALL,
23648 IX86_BUILTIN_VZEROUPPER,
23649 IX86_BUILTIN_VPERMILVARPD,
23650 IX86_BUILTIN_VPERMILVARPS,
23651 IX86_BUILTIN_VPERMILVARPD256,
23652 IX86_BUILTIN_VPERMILVARPS256,
23653 IX86_BUILTIN_VPERMILPD,
23654 IX86_BUILTIN_VPERMILPS,
23655 IX86_BUILTIN_VPERMILPD256,
23656 IX86_BUILTIN_VPERMILPS256,
23657 IX86_BUILTIN_VPERMIL2PD,
23658 IX86_BUILTIN_VPERMIL2PS,
23659 IX86_BUILTIN_VPERMIL2PD256,
23660 IX86_BUILTIN_VPERMIL2PS256,
23661 IX86_BUILTIN_VPERM2F128PD256,
23662 IX86_BUILTIN_VPERM2F128PS256,
23663 IX86_BUILTIN_VPERM2F128SI256,
23664 IX86_BUILTIN_VBROADCASTSS,
23665 IX86_BUILTIN_VBROADCASTSD256,
23666 IX86_BUILTIN_VBROADCASTSS256,
23667 IX86_BUILTIN_VBROADCASTPD256,
23668 IX86_BUILTIN_VBROADCASTPS256,
23669 IX86_BUILTIN_VINSERTF128PD256,
23670 IX86_BUILTIN_VINSERTF128PS256,
23671 IX86_BUILTIN_VINSERTF128SI256,
23672 IX86_BUILTIN_LOADUPD256,
23673 IX86_BUILTIN_LOADUPS256,
23674 IX86_BUILTIN_STOREUPD256,
23675 IX86_BUILTIN_STOREUPS256,
23676 IX86_BUILTIN_LDDQU256,
23677 IX86_BUILTIN_MOVNTDQ256,
23678 IX86_BUILTIN_MOVNTPD256,
23679 IX86_BUILTIN_MOVNTPS256,
23680 IX86_BUILTIN_LOADDQU256,
23681 IX86_BUILTIN_STOREDQU256,
23682 IX86_BUILTIN_MASKLOADPD,
23683 IX86_BUILTIN_MASKLOADPS,
23684 IX86_BUILTIN_MASKSTOREPD,
23685 IX86_BUILTIN_MASKSTOREPS,
23686 IX86_BUILTIN_MASKLOADPD256,
23687 IX86_BUILTIN_MASKLOADPS256,
23688 IX86_BUILTIN_MASKSTOREPD256,
23689 IX86_BUILTIN_MASKSTOREPS256,
23690 IX86_BUILTIN_MOVSHDUP256,
23691 IX86_BUILTIN_MOVSLDUP256,
23692 IX86_BUILTIN_MOVDDUP256,
23694 IX86_BUILTIN_SQRTPD256,
23695 IX86_BUILTIN_SQRTPS256,
23696 IX86_BUILTIN_SQRTPS_NR256,
23697 IX86_BUILTIN_RSQRTPS256,
23698 IX86_BUILTIN_RSQRTPS_NR256,
23700 IX86_BUILTIN_RCPPS256,
23702 IX86_BUILTIN_ROUNDPD256,
23703 IX86_BUILTIN_ROUNDPS256,
23705 IX86_BUILTIN_UNPCKHPD256,
23706 IX86_BUILTIN_UNPCKLPD256,
23707 IX86_BUILTIN_UNPCKHPS256,
23708 IX86_BUILTIN_UNPCKLPS256,
23710 IX86_BUILTIN_SI256_SI,
23711 IX86_BUILTIN_PS256_PS,
23712 IX86_BUILTIN_PD256_PD,
23713 IX86_BUILTIN_SI_SI256,
23714 IX86_BUILTIN_PS_PS256,
23715 IX86_BUILTIN_PD_PD256,
23717 IX86_BUILTIN_VTESTZPD,
23718 IX86_BUILTIN_VTESTCPD,
23719 IX86_BUILTIN_VTESTNZCPD,
23720 IX86_BUILTIN_VTESTZPS,
23721 IX86_BUILTIN_VTESTCPS,
23722 IX86_BUILTIN_VTESTNZCPS,
23723 IX86_BUILTIN_VTESTZPD256,
23724 IX86_BUILTIN_VTESTCPD256,
23725 IX86_BUILTIN_VTESTNZCPD256,
23726 IX86_BUILTIN_VTESTZPS256,
23727 IX86_BUILTIN_VTESTCPS256,
23728 IX86_BUILTIN_VTESTNZCPS256,
23729 IX86_BUILTIN_PTESTZ256,
23730 IX86_BUILTIN_PTESTC256,
23731 IX86_BUILTIN_PTESTNZC256,
23733 IX86_BUILTIN_MOVMSKPD256,
23734 IX86_BUILTIN_MOVMSKPS256,
23736 /* TFmode support builtins. */
23738 IX86_BUILTIN_HUGE_VALQ,
23739 IX86_BUILTIN_FABSQ,
23740 IX86_BUILTIN_COPYSIGNQ,
23742 /* Vectorizer support builtins. */
23743 IX86_BUILTIN_CPYSGNPS,
23744 IX86_BUILTIN_CPYSGNPD,
23745 IX86_BUILTIN_CPYSGNPS256,
23746 IX86_BUILTIN_CPYSGNPD256,
23748 IX86_BUILTIN_CVTUDQ2PS,
23750 IX86_BUILTIN_VEC_PERM_V2DF,
23751 IX86_BUILTIN_VEC_PERM_V4SF,
23752 IX86_BUILTIN_VEC_PERM_V2DI,
23753 IX86_BUILTIN_VEC_PERM_V4SI,
23754 IX86_BUILTIN_VEC_PERM_V8HI,
23755 IX86_BUILTIN_VEC_PERM_V16QI,
23756 IX86_BUILTIN_VEC_PERM_V2DI_U,
23757 IX86_BUILTIN_VEC_PERM_V4SI_U,
23758 IX86_BUILTIN_VEC_PERM_V8HI_U,
23759 IX86_BUILTIN_VEC_PERM_V16QI_U,
23760 IX86_BUILTIN_VEC_PERM_V4DF,
23761 IX86_BUILTIN_VEC_PERM_V8SF,
23763 /* FMA4 and XOP instructions. */
23764 IX86_BUILTIN_VFMADDSS,
23765 IX86_BUILTIN_VFMADDSD,
23766 IX86_BUILTIN_VFMADDPS,
23767 IX86_BUILTIN_VFMADDPD,
23768 IX86_BUILTIN_VFMADDPS256,
23769 IX86_BUILTIN_VFMADDPD256,
23770 IX86_BUILTIN_VFMADDSUBPS,
23771 IX86_BUILTIN_VFMADDSUBPD,
23772 IX86_BUILTIN_VFMADDSUBPS256,
23773 IX86_BUILTIN_VFMADDSUBPD256,
23775 IX86_BUILTIN_VPCMOV,
23776 IX86_BUILTIN_VPCMOV_V2DI,
23777 IX86_BUILTIN_VPCMOV_V4SI,
23778 IX86_BUILTIN_VPCMOV_V8HI,
23779 IX86_BUILTIN_VPCMOV_V16QI,
23780 IX86_BUILTIN_VPCMOV_V4SF,
23781 IX86_BUILTIN_VPCMOV_V2DF,
23782 IX86_BUILTIN_VPCMOV256,
23783 IX86_BUILTIN_VPCMOV_V4DI256,
23784 IX86_BUILTIN_VPCMOV_V8SI256,
23785 IX86_BUILTIN_VPCMOV_V16HI256,
23786 IX86_BUILTIN_VPCMOV_V32QI256,
23787 IX86_BUILTIN_VPCMOV_V8SF256,
23788 IX86_BUILTIN_VPCMOV_V4DF256,
23790 IX86_BUILTIN_VPPERM,
23792 IX86_BUILTIN_VPMACSSWW,
23793 IX86_BUILTIN_VPMACSWW,
23794 IX86_BUILTIN_VPMACSSWD,
23795 IX86_BUILTIN_VPMACSWD,
23796 IX86_BUILTIN_VPMACSSDD,
23797 IX86_BUILTIN_VPMACSDD,
23798 IX86_BUILTIN_VPMACSSDQL,
23799 IX86_BUILTIN_VPMACSSDQH,
23800 IX86_BUILTIN_VPMACSDQL,
23801 IX86_BUILTIN_VPMACSDQH,
23802 IX86_BUILTIN_VPMADCSSWD,
23803 IX86_BUILTIN_VPMADCSWD,
23805 IX86_BUILTIN_VPHADDBW,
23806 IX86_BUILTIN_VPHADDBD,
23807 IX86_BUILTIN_VPHADDBQ,
23808 IX86_BUILTIN_VPHADDWD,
23809 IX86_BUILTIN_VPHADDWQ,
23810 IX86_BUILTIN_VPHADDDQ,
23811 IX86_BUILTIN_VPHADDUBW,
23812 IX86_BUILTIN_VPHADDUBD,
23813 IX86_BUILTIN_VPHADDUBQ,
23814 IX86_BUILTIN_VPHADDUWD,
23815 IX86_BUILTIN_VPHADDUWQ,
23816 IX86_BUILTIN_VPHADDUDQ,
23817 IX86_BUILTIN_VPHSUBBW,
23818 IX86_BUILTIN_VPHSUBWD,
23819 IX86_BUILTIN_VPHSUBDQ,
23821 IX86_BUILTIN_VPROTB,
23822 IX86_BUILTIN_VPROTW,
23823 IX86_BUILTIN_VPROTD,
23824 IX86_BUILTIN_VPROTQ,
23825 IX86_BUILTIN_VPROTB_IMM,
23826 IX86_BUILTIN_VPROTW_IMM,
23827 IX86_BUILTIN_VPROTD_IMM,
23828 IX86_BUILTIN_VPROTQ_IMM,
23830 IX86_BUILTIN_VPSHLB,
23831 IX86_BUILTIN_VPSHLW,
23832 IX86_BUILTIN_VPSHLD,
23833 IX86_BUILTIN_VPSHLQ,
23834 IX86_BUILTIN_VPSHAB,
23835 IX86_BUILTIN_VPSHAW,
23836 IX86_BUILTIN_VPSHAD,
23837 IX86_BUILTIN_VPSHAQ,
23839 IX86_BUILTIN_VFRCZSS,
23840 IX86_BUILTIN_VFRCZSD,
23841 IX86_BUILTIN_VFRCZPS,
23842 IX86_BUILTIN_VFRCZPD,
23843 IX86_BUILTIN_VFRCZPS256,
23844 IX86_BUILTIN_VFRCZPD256,
23846 IX86_BUILTIN_VPCOMEQUB,
23847 IX86_BUILTIN_VPCOMNEUB,
23848 IX86_BUILTIN_VPCOMLTUB,
23849 IX86_BUILTIN_VPCOMLEUB,
23850 IX86_BUILTIN_VPCOMGTUB,
23851 IX86_BUILTIN_VPCOMGEUB,
23852 IX86_BUILTIN_VPCOMFALSEUB,
23853 IX86_BUILTIN_VPCOMTRUEUB,
23855 IX86_BUILTIN_VPCOMEQUW,
23856 IX86_BUILTIN_VPCOMNEUW,
23857 IX86_BUILTIN_VPCOMLTUW,
23858 IX86_BUILTIN_VPCOMLEUW,
23859 IX86_BUILTIN_VPCOMGTUW,
23860 IX86_BUILTIN_VPCOMGEUW,
23861 IX86_BUILTIN_VPCOMFALSEUW,
23862 IX86_BUILTIN_VPCOMTRUEUW,
23864 IX86_BUILTIN_VPCOMEQUD,
23865 IX86_BUILTIN_VPCOMNEUD,
23866 IX86_BUILTIN_VPCOMLTUD,
23867 IX86_BUILTIN_VPCOMLEUD,
23868 IX86_BUILTIN_VPCOMGTUD,
23869 IX86_BUILTIN_VPCOMGEUD,
23870 IX86_BUILTIN_VPCOMFALSEUD,
23871 IX86_BUILTIN_VPCOMTRUEUD,
23873 IX86_BUILTIN_VPCOMEQUQ,
23874 IX86_BUILTIN_VPCOMNEUQ,
23875 IX86_BUILTIN_VPCOMLTUQ,
23876 IX86_BUILTIN_VPCOMLEUQ,
23877 IX86_BUILTIN_VPCOMGTUQ,
23878 IX86_BUILTIN_VPCOMGEUQ,
23879 IX86_BUILTIN_VPCOMFALSEUQ,
23880 IX86_BUILTIN_VPCOMTRUEUQ,
23882 IX86_BUILTIN_VPCOMEQB,
23883 IX86_BUILTIN_VPCOMNEB,
23884 IX86_BUILTIN_VPCOMLTB,
23885 IX86_BUILTIN_VPCOMLEB,
23886 IX86_BUILTIN_VPCOMGTB,
23887 IX86_BUILTIN_VPCOMGEB,
23888 IX86_BUILTIN_VPCOMFALSEB,
23889 IX86_BUILTIN_VPCOMTRUEB,
23891 IX86_BUILTIN_VPCOMEQW,
23892 IX86_BUILTIN_VPCOMNEW,
23893 IX86_BUILTIN_VPCOMLTW,
23894 IX86_BUILTIN_VPCOMLEW,
23895 IX86_BUILTIN_VPCOMGTW,
23896 IX86_BUILTIN_VPCOMGEW,
23897 IX86_BUILTIN_VPCOMFALSEW,
23898 IX86_BUILTIN_VPCOMTRUEW,
23900 IX86_BUILTIN_VPCOMEQD,
23901 IX86_BUILTIN_VPCOMNED,
23902 IX86_BUILTIN_VPCOMLTD,
23903 IX86_BUILTIN_VPCOMLED,
23904 IX86_BUILTIN_VPCOMGTD,
23905 IX86_BUILTIN_VPCOMGED,
23906 IX86_BUILTIN_VPCOMFALSED,
23907 IX86_BUILTIN_VPCOMTRUED,
23909 IX86_BUILTIN_VPCOMEQQ,
23910 IX86_BUILTIN_VPCOMNEQ,
23911 IX86_BUILTIN_VPCOMLTQ,
23912 IX86_BUILTIN_VPCOMLEQ,
23913 IX86_BUILTIN_VPCOMGTQ,
23914 IX86_BUILTIN_VPCOMGEQ,
23915 IX86_BUILTIN_VPCOMFALSEQ,
23916 IX86_BUILTIN_VPCOMTRUEQ,
23918 /* LWP instructions. */
23919 IX86_BUILTIN_LLWPCB,
23920 IX86_BUILTIN_SLWPCB,
23921 IX86_BUILTIN_LWPVAL32,
23922 IX86_BUILTIN_LWPVAL64,
23923 IX86_BUILTIN_LWPINS32,
23924 IX86_BUILTIN_LWPINS64,
23928 /* FSGSBASE instructions. */
23929 IX86_BUILTIN_RDFSBASE32,
23930 IX86_BUILTIN_RDFSBASE64,
23931 IX86_BUILTIN_RDGSBASE32,
23932 IX86_BUILTIN_RDGSBASE64,
23933 IX86_BUILTIN_WRFSBASE32,
23934 IX86_BUILTIN_WRFSBASE64,
23935 IX86_BUILTIN_WRGSBASE32,
23936 IX86_BUILTIN_WRGSBASE64,
23938 /* RDRND instructions. */
23939 IX86_BUILTIN_RDRAND16,
23940 IX86_BUILTIN_RDRAND32,
23941 IX86_BUILTIN_RDRAND64,
23943 /* F16C instructions. */
23944 IX86_BUILTIN_CVTPH2PS,
23945 IX86_BUILTIN_CVTPH2PS256,
23946 IX86_BUILTIN_CVTPS2PH,
23947 IX86_BUILTIN_CVTPS2PH256,
23952 /* Table for the ix86 builtin decls. */
23953 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
23955 /* Table of all of the builtin functions that are possible with different ISA's
23956 but are waiting to be built until a function is declared to use that
23958 struct builtin_isa {
23959 const char *name; /* function name */
23960 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
23961 int isa; /* isa_flags this builtin is defined for */
23962 bool const_p; /* true if the declaration is constant */
23963 bool set_and_not_built_p;
23966 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
23969 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
23970 of which isa_flags to use in the ix86_builtins_isa array. Stores the
23971 function decl in the ix86_builtins array. Returns the function decl or
23972 NULL_TREE, if the builtin was not added.
23974 If the front end has a special hook for builtin functions, delay adding
23975 builtin functions that aren't in the current ISA until the ISA is changed
23976 with function specific optimization. Doing so, can save about 300K for the
23977 default compiler. When the builtin is expanded, check at that time whether
23980 If the front end doesn't have a special hook, record all builtins, even if
23981 it isn't an instruction set in the current ISA in case the user uses
23982 function specific options for a different ISA, so that we don't get scope
23983 errors if a builtin is added in the middle of a function scope. */
23986 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
23987 enum ix86_builtins code)
23989 tree decl = NULL_TREE;
23991 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
23993 ix86_builtins_isa[(int) code].isa = mask;
23995 mask &= ~OPTION_MASK_ISA_64BIT;
23997 || (mask & ix86_isa_flags) != 0
23998 || (lang_hooks.builtin_function
23999 == lang_hooks.builtin_function_ext_scope))
24002 tree type = ix86_get_builtin_func_type (tcode);
24003 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
24005 ix86_builtins[(int) code] = decl;
24006 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
24010 ix86_builtins[(int) code] = NULL_TREE;
24011 ix86_builtins_isa[(int) code].tcode = tcode;
24012 ix86_builtins_isa[(int) code].name = name;
24013 ix86_builtins_isa[(int) code].const_p = false;
24014 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
24021 /* Like def_builtin, but also marks the function decl "const". */
24024 def_builtin_const (int mask, const char *name,
24025 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
24027 tree decl = def_builtin (mask, name, tcode, code);
24029 TREE_READONLY (decl) = 1;
24031 ix86_builtins_isa[(int) code].const_p = true;
24036 /* Add any new builtin functions for a given ISA that may not have been
24037 declared. This saves a bit of space compared to adding all of the
24038 declarations to the tree, even if we didn't use them. */
24041 ix86_add_new_builtins (int isa)
24045 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
24047 if ((ix86_builtins_isa[i].isa & isa) != 0
24048 && ix86_builtins_isa[i].set_and_not_built_p)
24052 /* Don't define the builtin again. */
24053 ix86_builtins_isa[i].set_and_not_built_p = false;
24055 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
24056 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
24057 type, i, BUILT_IN_MD, NULL,
24060 ix86_builtins[i] = decl;
24061 if (ix86_builtins_isa[i].const_p)
24062 TREE_READONLY (decl) = 1;
24067 /* Bits for builtin_description.flag. */
24069 /* Set when we don't support the comparison natively, and should
24070 swap_comparison in order to support it. */
24071 #define BUILTIN_DESC_SWAP_OPERANDS 1
24073 struct builtin_description
24075 const unsigned int mask;
24076 const enum insn_code icode;
24077 const char *const name;
24078 const enum ix86_builtins code;
24079 const enum rtx_code comparison;
24083 static const struct builtin_description bdesc_comi[] =
24085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
24086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
24087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
24088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
24089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
24090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
24091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
24092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
24093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
24094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
24095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
24096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
24097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
24098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
24099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
24100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
24101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
24102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
24103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
24104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
24105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
24106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
24107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
24108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
24111 static const struct builtin_description bdesc_pcmpestr[] =
24114 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
24115 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
24116 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
24117 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
24118 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
24119 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
24120 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
24123 static const struct builtin_description bdesc_pcmpistr[] =
24126 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
24127 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
24128 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
24129 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
24130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
24131 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
24132 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
24135 /* Special builtins with variable number of arguments. */
24136 static const struct builtin_description bdesc_special_args[] =
24138 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
24139 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
24142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24145 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
24148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
24154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
24157 /* SSE or 3DNow!A */
24158 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24159 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
24162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
24164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
24166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
24168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
24169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
24170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
24176 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
24179 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
24182 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
24183 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
24186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
24187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
24189 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
24190 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24191 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
24193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
24195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
24196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
24197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
24201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
24203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
24204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
24205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
24207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
24208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
24209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
24210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
24211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
24212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
24213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
24214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
24216 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
24217 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
24218 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
24219 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
24220 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
24221 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
24224 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24225 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24226 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24227 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24228 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24229 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24230 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
24231 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
24234 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
24235 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
24236 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
24239 /* Builtins with variable number of arguments. */
24240 static const struct builtin_description bdesc_args[] =
24242 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
24243 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
24244 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
24245 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24246 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24247 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
24248 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
24251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
24287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
24289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
24291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
24293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
24295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
24305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
24309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
24310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
24311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
24312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
24315 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24316 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24317 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24318 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24320 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24321 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24322 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24323 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24324 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24325 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
24326 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24327 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24328 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24330 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24331 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24332 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24333 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24334 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24337 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
24338 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
24339 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24340 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
24341 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24342 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
24345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
24346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24347 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24349 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24353 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
24355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
24356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
24358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24360 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24361 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24362 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
24380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
24382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
24383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
24384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
24386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
24387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
24388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
24390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
24392 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24393 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24397 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24399 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24400 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24402 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24407 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24408 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
24411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
24412 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
24414 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
24416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
24420 /* SSE MMX or 3Dnow!A */
24421 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24422 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24423 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24425 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24426 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24427 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24428 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24430 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
24431 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
24433 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
24436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24438 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
24439 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
24440 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
24441 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
24442 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
24443 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24444 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
24445 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
24446 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
24447 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
24448 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
24449 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
24451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
24452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
24453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
24454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
24455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
24458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
24461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
24462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
24464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
24466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
24468 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24469 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
24471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
24473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
24475 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24476 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24477 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24478 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
24489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
24495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
24497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
24498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
24499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
24501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
24502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
24503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
24505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24506 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24510 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24512 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24513 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24515 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24518 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24519 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24521 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
24523 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24524 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24525 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24526 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24527 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24528 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24529 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24530 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24541 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24542 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
24544 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24546 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24547 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24559 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24560 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24561 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24564 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24565 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24566 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24567 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24568 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24569 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24570 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24571 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
24577 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
24580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
24581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
24585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
24586 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
24587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
24588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
24590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24591 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24592 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24593 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24594 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24595 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24596 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
24599 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24600 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24601 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
24602 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24603 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24604 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
24606 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
24607 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
24608 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
24609 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
24611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
24612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
24615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
24617 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
24618 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
24620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24623 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24624 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
24627 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
24628 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
24630 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24631 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24632 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24633 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24634 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
24635 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
24638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
24639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
24640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24641 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
24642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
24643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
24645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24648 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
24658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
24659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
24665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
24667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
24671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
24672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
24675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
24678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
24679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
24683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
24684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
24686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
24693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
24694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
24695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
24696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
24697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
24698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
24700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
24701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
24707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
24710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
24711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
24714 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24715 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24716 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24717 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24719 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24720 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24721 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
24724 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24725 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
24726 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
24727 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
24728 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
24731 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
24732 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
24733 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
24734 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24737 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
24738 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
24740 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24741 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24742 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24743 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
24746 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
24749 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24750 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24753 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24754 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24757 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24763 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24764 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24765 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24766 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24767 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24768 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24769 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24770 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24771 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24772 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24773 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24774 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
24777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
24778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
24779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
24781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
24784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
24785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
24791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
24792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
24795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
24796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
24797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
24798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
24799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
24800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
24802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
24804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
24805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
24806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
24807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
24808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
24809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
24810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
24813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
24814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
24816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24820 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
24821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24822 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24824 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
24828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
24829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
24831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
24837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
24838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
24839 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
24840 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
24841 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
24843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
24846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
24849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
24852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
24855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
24859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
24860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
24862 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
24863 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
24865 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
24868 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
24869 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
24870 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
24871 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
24874 /* FMA4 and XOP. */
24875 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
24876 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
24877 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
24878 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
24879 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
24880 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
24881 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
24882 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
24883 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
24884 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
24885 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
24886 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
24887 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
24888 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
24889 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
24890 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
24891 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
24892 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
24893 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
24894 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
24895 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
24896 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
24897 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
24898 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
24899 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
24900 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
24901 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
24902 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
24903 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
24904 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
24905 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
24906 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
24907 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
24908 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
24909 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
24910 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
24911 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
24912 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
24913 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
24914 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
24915 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
24916 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
24917 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
24918 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
24919 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
24920 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
24921 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
24922 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
24923 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
24924 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
24925 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
24926 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
24928 static const struct builtin_description bdesc_multi_arg[] =
24930 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
24931 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
24932 UNKNOWN, (int)MULTI_ARG_3_SF },
24933 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
24934 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
24935 UNKNOWN, (int)MULTI_ARG_3_DF },
24937 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
24938 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
24939 UNKNOWN, (int)MULTI_ARG_3_SF },
24940 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
24941 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
24942 UNKNOWN, (int)MULTI_ARG_3_DF },
24943 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
24944 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
24945 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24946 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
24947 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
24948 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24950 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
24951 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
24952 UNKNOWN, (int)MULTI_ARG_3_SF },
24953 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
24954 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
24955 UNKNOWN, (int)MULTI_ARG_3_DF },
24956 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
24957 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
24958 UNKNOWN, (int)MULTI_ARG_3_SF2 },
24959 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
24960 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
24961 UNKNOWN, (int)MULTI_ARG_3_DF2 },
24963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
24964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
24965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
24971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
24974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
24975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
24976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
24981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
25000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
25001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
25002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
25004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
25005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
25006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
25007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
25008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
25009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
25011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
25012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
25013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
25014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
25015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
25016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
25018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
25026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
25027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
25029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
25031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
25032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
25034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
25035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
25037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
25038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
25039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
25040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
25042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
25043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
25045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
25046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
25047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
25048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
25050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
25051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
25053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
25054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
25055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
25056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
25058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
25061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
25062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
25063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
25064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
25066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
25067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
25069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
25070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
25071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
25072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
25074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
25075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
25077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
25078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
25079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
25080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
25082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
25083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
25085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
25086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
25087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
25088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
25090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
25091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
25093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
25094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
25095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
25096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
25098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
25103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
25104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
25105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
25107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
25112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
25113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
25114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
25116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
25117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
25118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
25119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
25123 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
25124 in the current target ISA to allow the user to compile particular modules
25125 with different target specific options that differ from the command line
25128 ix86_init_mmx_sse_builtins (void)
25130 const struct builtin_description * d;
25131 enum ix86_builtin_func_type ftype;
25134 /* Add all special builtins with variable number of operands. */
25135 for (i = 0, d = bdesc_special_args;
25136 i < ARRAY_SIZE (bdesc_special_args);
25142 ftype = (enum ix86_builtin_func_type) d->flag;
25143 def_builtin (d->mask, d->name, ftype, d->code);
25146 /* Add all builtins with variable number of operands. */
25147 for (i = 0, d = bdesc_args;
25148 i < ARRAY_SIZE (bdesc_args);
25154 ftype = (enum ix86_builtin_func_type) d->flag;
25155 def_builtin_const (d->mask, d->name, ftype, d->code);
25158 /* pcmpestr[im] insns. */
25159 for (i = 0, d = bdesc_pcmpestr;
25160 i < ARRAY_SIZE (bdesc_pcmpestr);
25163 if (d->code == IX86_BUILTIN_PCMPESTRM128)
25164 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
25166 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
25167 def_builtin_const (d->mask, d->name, ftype, d->code);
25170 /* pcmpistr[im] insns. */
25171 for (i = 0, d = bdesc_pcmpistr;
25172 i < ARRAY_SIZE (bdesc_pcmpistr);
25175 if (d->code == IX86_BUILTIN_PCMPISTRM128)
25176 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
25178 ftype = INT_FTYPE_V16QI_V16QI_INT;
25179 def_builtin_const (d->mask, d->name, ftype, d->code);
25182 /* comi/ucomi insns. */
25183 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25185 if (d->mask == OPTION_MASK_ISA_SSE2)
25186 ftype = INT_FTYPE_V2DF_V2DF;
25188 ftype = INT_FTYPE_V4SF_V4SF;
25189 def_builtin_const (d->mask, d->name, ftype, d->code);
25193 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
25194 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
25195 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
25196 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
25198 /* SSE or 3DNow!A */
25199 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25200 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
25201 IX86_BUILTIN_MASKMOVQ);
25204 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
25205 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
25207 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
25208 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
25209 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
25210 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
25213 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
25214 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
25215 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
25216 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
25219 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
25220 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
25221 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
25222 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
25223 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
25224 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
25225 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
25226 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
25227 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
25228 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
25229 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
25230 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
25233 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
25234 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
25236 /* MMX access to the vec_init patterns. */
25237 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
25238 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
25240 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
25241 V4HI_FTYPE_HI_HI_HI_HI,
25242 IX86_BUILTIN_VEC_INIT_V4HI);
25244 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
25245 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
25246 IX86_BUILTIN_VEC_INIT_V8QI);
25248 /* Access to the vec_extract patterns. */
25249 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
25250 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
25251 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
25252 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
25253 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
25254 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
25255 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
25256 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
25257 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
25258 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
25260 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25261 "__builtin_ia32_vec_ext_v4hi",
25262 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
25264 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
25265 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
25267 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
25268 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
25270 /* Access to the vec_set patterns. */
25271 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
25272 "__builtin_ia32_vec_set_v2di",
25273 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
25275 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
25276 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
25278 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
25279 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
25281 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
25282 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
25284 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
25285 "__builtin_ia32_vec_set_v4hi",
25286 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
25288 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
25289 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
25291 /* Add FMA4 multi-arg argument instructions */
25292 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25297 ftype = (enum ix86_builtin_func_type) d->flag;
25298 def_builtin_const (d->mask, d->name, ftype, d->code);
25302 /* Internal method for ix86_init_builtins. */
25305 ix86_init_builtins_va_builtins_abi (void)
25307 tree ms_va_ref, sysv_va_ref;
25308 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
25309 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
25310 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
25311 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
25315 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
25316 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
25317 ms_va_ref = build_reference_type (ms_va_list_type_node);
25319 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
25322 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25323 fnvoid_va_start_ms =
25324 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
25325 fnvoid_va_end_sysv =
25326 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
25327 fnvoid_va_start_sysv =
25328 build_varargs_function_type_list (void_type_node, sysv_va_ref,
25330 fnvoid_va_copy_ms =
25331 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
25333 fnvoid_va_copy_sysv =
25334 build_function_type_list (void_type_node, sysv_va_ref,
25335 sysv_va_ref, NULL_TREE);
25337 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
25338 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
25339 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
25340 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
25341 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
25342 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
25343 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
25344 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25345 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
25346 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25347 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
25348 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
25352 ix86_init_builtin_types (void)
25354 tree float128_type_node, float80_type_node;
25356 /* The __float80 type. */
25357 float80_type_node = long_double_type_node;
25358 if (TYPE_MODE (float80_type_node) != XFmode)
25360 /* The __float80 type. */
25361 float80_type_node = make_node (REAL_TYPE);
25363 TYPE_PRECISION (float80_type_node) = 80;
25364 layout_type (float80_type_node);
25366 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
25368 /* The __float128 type. */
25369 float128_type_node = make_node (REAL_TYPE);
25370 TYPE_PRECISION (float128_type_node) = 128;
25371 layout_type (float128_type_node);
25372 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
25374 /* This macro is built by i386-builtin-types.awk. */
25375 DEFINE_BUILTIN_PRIMITIVE_TYPES;
25379 ix86_init_builtins (void)
25383 ix86_init_builtin_types ();
25385 /* TFmode support builtins. */
25386 def_builtin_const (0, "__builtin_infq",
25387 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
25388 def_builtin_const (0, "__builtin_huge_valq",
25389 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
25391 /* We will expand them to normal call if SSE2 isn't available since
25392 they are used by libgcc. */
25393 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
25394 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
25395 BUILT_IN_MD, "__fabstf2", NULL_TREE);
25396 TREE_READONLY (t) = 1;
25397 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
25399 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
25400 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
25401 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
25402 TREE_READONLY (t) = 1;
25403 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
25405 ix86_init_mmx_sse_builtins ();
25408 ix86_init_builtins_va_builtins_abi ();
25410 #ifdef SUBTARGET_INIT_BUILTINS
25411 SUBTARGET_INIT_BUILTINS;
25415 /* Return the ix86 builtin for CODE. */
25418 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
25420 if (code >= IX86_BUILTIN_MAX)
25421 return error_mark_node;
25423 return ix86_builtins[code];
25426 /* Errors in the source file can cause expand_expr to return const0_rtx
25427 where we expect a vector. To avoid crashing, use one of the vector
25428 clear instructions. */
25430 safe_vector_operand (rtx x, enum machine_mode mode)
25432 if (x == const0_rtx)
25433 x = CONST0_RTX (mode);
25437 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
25440 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
25443 tree arg0 = CALL_EXPR_ARG (exp, 0);
25444 tree arg1 = CALL_EXPR_ARG (exp, 1);
25445 rtx op0 = expand_normal (arg0);
25446 rtx op1 = expand_normal (arg1);
25447 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25448 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25449 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25451 if (VECTOR_MODE_P (mode0))
25452 op0 = safe_vector_operand (op0, mode0);
25453 if (VECTOR_MODE_P (mode1))
25454 op1 = safe_vector_operand (op1, mode1);
25456 if (optimize || !target
25457 || GET_MODE (target) != tmode
25458 || !insn_data[icode].operand[0].predicate (target, tmode))
25459 target = gen_reg_rtx (tmode);
25461 if (GET_MODE (op1) == SImode && mode1 == TImode)
25463 rtx x = gen_reg_rtx (V4SImode);
25464 emit_insn (gen_sse2_loadd (x, op1));
25465 op1 = gen_lowpart (TImode, x);
25468 if (!insn_data[icode].operand[1].predicate (op0, mode0))
25469 op0 = copy_to_mode_reg (mode0, op0);
25470 if (!insn_data[icode].operand[2].predicate (op1, mode1))
25471 op1 = copy_to_mode_reg (mode1, op1);
25473 pat = GEN_FCN (icode) (target, op0, op1);
25482 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
25485 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
25486 enum ix86_builtin_func_type m_type,
25487 enum rtx_code sub_code)
25492 bool comparison_p = false;
25494 bool last_arg_constant = false;
25495 int num_memory = 0;
25498 enum machine_mode mode;
25501 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25505 case MULTI_ARG_4_DF2_DI_I:
25506 case MULTI_ARG_4_DF2_DI_I1:
25507 case MULTI_ARG_4_SF2_SI_I:
25508 case MULTI_ARG_4_SF2_SI_I1:
25510 last_arg_constant = true;
25513 case MULTI_ARG_3_SF:
25514 case MULTI_ARG_3_DF:
25515 case MULTI_ARG_3_SF2:
25516 case MULTI_ARG_3_DF2:
25517 case MULTI_ARG_3_DI:
25518 case MULTI_ARG_3_SI:
25519 case MULTI_ARG_3_SI_DI:
25520 case MULTI_ARG_3_HI:
25521 case MULTI_ARG_3_HI_SI:
25522 case MULTI_ARG_3_QI:
25523 case MULTI_ARG_3_DI2:
25524 case MULTI_ARG_3_SI2:
25525 case MULTI_ARG_3_HI2:
25526 case MULTI_ARG_3_QI2:
25530 case MULTI_ARG_2_SF:
25531 case MULTI_ARG_2_DF:
25532 case MULTI_ARG_2_DI:
25533 case MULTI_ARG_2_SI:
25534 case MULTI_ARG_2_HI:
25535 case MULTI_ARG_2_QI:
25539 case MULTI_ARG_2_DI_IMM:
25540 case MULTI_ARG_2_SI_IMM:
25541 case MULTI_ARG_2_HI_IMM:
25542 case MULTI_ARG_2_QI_IMM:
25544 last_arg_constant = true;
25547 case MULTI_ARG_1_SF:
25548 case MULTI_ARG_1_DF:
25549 case MULTI_ARG_1_SF2:
25550 case MULTI_ARG_1_DF2:
25551 case MULTI_ARG_1_DI:
25552 case MULTI_ARG_1_SI:
25553 case MULTI_ARG_1_HI:
25554 case MULTI_ARG_1_QI:
25555 case MULTI_ARG_1_SI_DI:
25556 case MULTI_ARG_1_HI_DI:
25557 case MULTI_ARG_1_HI_SI:
25558 case MULTI_ARG_1_QI_DI:
25559 case MULTI_ARG_1_QI_SI:
25560 case MULTI_ARG_1_QI_HI:
25564 case MULTI_ARG_2_DI_CMP:
25565 case MULTI_ARG_2_SI_CMP:
25566 case MULTI_ARG_2_HI_CMP:
25567 case MULTI_ARG_2_QI_CMP:
25569 comparison_p = true;
25572 case MULTI_ARG_2_SF_TF:
25573 case MULTI_ARG_2_DF_TF:
25574 case MULTI_ARG_2_DI_TF:
25575 case MULTI_ARG_2_SI_TF:
25576 case MULTI_ARG_2_HI_TF:
25577 case MULTI_ARG_2_QI_TF:
25583 gcc_unreachable ();
25586 if (optimize || !target
25587 || GET_MODE (target) != tmode
25588 || !insn_data[icode].operand[0].predicate (target, tmode))
25589 target = gen_reg_rtx (tmode);
25591 gcc_assert (nargs <= 4);
25593 for (i = 0; i < nargs; i++)
25595 tree arg = CALL_EXPR_ARG (exp, i);
25596 rtx op = expand_normal (arg);
25597 int adjust = (comparison_p) ? 1 : 0;
25598 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
25600 if (last_arg_constant && i == nargs-1)
25602 if (!CONST_INT_P (op))
25604 error ("last argument must be an immediate");
25605 return gen_reg_rtx (tmode);
25610 if (VECTOR_MODE_P (mode))
25611 op = safe_vector_operand (op, mode);
25613 /* If we aren't optimizing, only allow one memory operand to be
25615 if (memory_operand (op, mode))
25618 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
25621 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
25623 op = force_reg (mode, op);
25627 args[i].mode = mode;
25633 pat = GEN_FCN (icode) (target, args[0].op);
25638 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
25639 GEN_INT ((int)sub_code));
25640 else if (! comparison_p)
25641 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25644 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
25648 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
25653 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25657 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
25661 gcc_unreachable ();
25671 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
25672 insns with vec_merge. */
25675 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
25679 tree arg0 = CALL_EXPR_ARG (exp, 0);
25680 rtx op1, op0 = expand_normal (arg0);
25681 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25682 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25684 if (optimize || !target
25685 || GET_MODE (target) != tmode
25686 || !insn_data[icode].operand[0].predicate (target, tmode))
25687 target = gen_reg_rtx (tmode);
25689 if (VECTOR_MODE_P (mode0))
25690 op0 = safe_vector_operand (op0, mode0);
25692 if ((optimize && !register_operand (op0, mode0))
25693 || !insn_data[icode].operand[1].predicate (op0, mode0))
25694 op0 = copy_to_mode_reg (mode0, op0);
25697 if (!insn_data[icode].operand[2].predicate (op1, mode0))
25698 op1 = copy_to_mode_reg (mode0, op1);
25700 pat = GEN_FCN (icode) (target, op0, op1);
25707 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
25710 ix86_expand_sse_compare (const struct builtin_description *d,
25711 tree exp, rtx target, bool swap)
25714 tree arg0 = CALL_EXPR_ARG (exp, 0);
25715 tree arg1 = CALL_EXPR_ARG (exp, 1);
25716 rtx op0 = expand_normal (arg0);
25717 rtx op1 = expand_normal (arg1);
25719 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
25720 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
25721 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
25722 enum rtx_code comparison = d->comparison;
25724 if (VECTOR_MODE_P (mode0))
25725 op0 = safe_vector_operand (op0, mode0);
25726 if (VECTOR_MODE_P (mode1))
25727 op1 = safe_vector_operand (op1, mode1);
25729 /* Swap operands if we have a comparison that isn't available in
25733 rtx tmp = gen_reg_rtx (mode1);
25734 emit_move_insn (tmp, op1);
25739 if (optimize || !target
25740 || GET_MODE (target) != tmode
25741 || !insn_data[d->icode].operand[0].predicate (target, tmode))
25742 target = gen_reg_rtx (tmode);
25744 if ((optimize && !register_operand (op0, mode0))
25745 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
25746 op0 = copy_to_mode_reg (mode0, op0);
25747 if ((optimize && !register_operand (op1, mode1))
25748 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
25749 op1 = copy_to_mode_reg (mode1, op1);
25751 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
25752 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
25759 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
25762 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
25766 tree arg0 = CALL_EXPR_ARG (exp, 0);
25767 tree arg1 = CALL_EXPR_ARG (exp, 1);
25768 rtx op0 = expand_normal (arg0);
25769 rtx op1 = expand_normal (arg1);
25770 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25771 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25772 enum rtx_code comparison = d->comparison;
25774 if (VECTOR_MODE_P (mode0))
25775 op0 = safe_vector_operand (op0, mode0);
25776 if (VECTOR_MODE_P (mode1))
25777 op1 = safe_vector_operand (op1, mode1);
25779 /* Swap operands if we have a comparison that isn't available in
25781 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
25788 target = gen_reg_rtx (SImode);
25789 emit_move_insn (target, const0_rtx);
25790 target = gen_rtx_SUBREG (QImode, target, 0);
25792 if ((optimize && !register_operand (op0, mode0))
25793 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25794 op0 = copy_to_mode_reg (mode0, op0);
25795 if ((optimize && !register_operand (op1, mode1))
25796 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25797 op1 = copy_to_mode_reg (mode1, op1);
25799 pat = GEN_FCN (d->icode) (op0, op1);
25803 emit_insn (gen_rtx_SET (VOIDmode,
25804 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25805 gen_rtx_fmt_ee (comparison, QImode,
25809 return SUBREG_REG (target);
25812 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
25815 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
25819 tree arg0 = CALL_EXPR_ARG (exp, 0);
25820 tree arg1 = CALL_EXPR_ARG (exp, 1);
25821 rtx op0 = expand_normal (arg0);
25822 rtx op1 = expand_normal (arg1);
25823 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
25824 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
25825 enum rtx_code comparison = d->comparison;
25827 if (VECTOR_MODE_P (mode0))
25828 op0 = safe_vector_operand (op0, mode0);
25829 if (VECTOR_MODE_P (mode1))
25830 op1 = safe_vector_operand (op1, mode1);
25832 target = gen_reg_rtx (SImode);
25833 emit_move_insn (target, const0_rtx);
25834 target = gen_rtx_SUBREG (QImode, target, 0);
25836 if ((optimize && !register_operand (op0, mode0))
25837 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
25838 op0 = copy_to_mode_reg (mode0, op0);
25839 if ((optimize && !register_operand (op1, mode1))
25840 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
25841 op1 = copy_to_mode_reg (mode1, op1);
25843 pat = GEN_FCN (d->icode) (op0, op1);
25847 emit_insn (gen_rtx_SET (VOIDmode,
25848 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25849 gen_rtx_fmt_ee (comparison, QImode,
25853 return SUBREG_REG (target);
25856 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
25859 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
25860 tree exp, rtx target)
25863 tree arg0 = CALL_EXPR_ARG (exp, 0);
25864 tree arg1 = CALL_EXPR_ARG (exp, 1);
25865 tree arg2 = CALL_EXPR_ARG (exp, 2);
25866 tree arg3 = CALL_EXPR_ARG (exp, 3);
25867 tree arg4 = CALL_EXPR_ARG (exp, 4);
25868 rtx scratch0, scratch1;
25869 rtx op0 = expand_normal (arg0);
25870 rtx op1 = expand_normal (arg1);
25871 rtx op2 = expand_normal (arg2);
25872 rtx op3 = expand_normal (arg3);
25873 rtx op4 = expand_normal (arg4);
25874 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
25876 tmode0 = insn_data[d->icode].operand[0].mode;
25877 tmode1 = insn_data[d->icode].operand[1].mode;
25878 modev2 = insn_data[d->icode].operand[2].mode;
25879 modei3 = insn_data[d->icode].operand[3].mode;
25880 modev4 = insn_data[d->icode].operand[4].mode;
25881 modei5 = insn_data[d->icode].operand[5].mode;
25882 modeimm = insn_data[d->icode].operand[6].mode;
25884 if (VECTOR_MODE_P (modev2))
25885 op0 = safe_vector_operand (op0, modev2);
25886 if (VECTOR_MODE_P (modev4))
25887 op2 = safe_vector_operand (op2, modev4);
25889 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25890 op0 = copy_to_mode_reg (modev2, op0);
25891 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
25892 op1 = copy_to_mode_reg (modei3, op1);
25893 if ((optimize && !register_operand (op2, modev4))
25894 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
25895 op2 = copy_to_mode_reg (modev4, op2);
25896 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
25897 op3 = copy_to_mode_reg (modei5, op3);
25899 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
25901 error ("the fifth argument must be a 8-bit immediate");
25905 if (d->code == IX86_BUILTIN_PCMPESTRI128)
25907 if (optimize || !target
25908 || GET_MODE (target) != tmode0
25909 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25910 target = gen_reg_rtx (tmode0);
25912 scratch1 = gen_reg_rtx (tmode1);
25914 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
25916 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
25918 if (optimize || !target
25919 || GET_MODE (target) != tmode1
25920 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25921 target = gen_reg_rtx (tmode1);
25923 scratch0 = gen_reg_rtx (tmode0);
25925 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
25929 gcc_assert (d->flag);
25931 scratch0 = gen_reg_rtx (tmode0);
25932 scratch1 = gen_reg_rtx (tmode1);
25934 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
25944 target = gen_reg_rtx (SImode);
25945 emit_move_insn (target, const0_rtx);
25946 target = gen_rtx_SUBREG (QImode, target, 0);
25949 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25950 gen_rtx_fmt_ee (EQ, QImode,
25951 gen_rtx_REG ((enum machine_mode) d->flag,
25954 return SUBREG_REG (target);
25961 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
25964 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
25965 tree exp, rtx target)
25968 tree arg0 = CALL_EXPR_ARG (exp, 0);
25969 tree arg1 = CALL_EXPR_ARG (exp, 1);
25970 tree arg2 = CALL_EXPR_ARG (exp, 2);
25971 rtx scratch0, scratch1;
25972 rtx op0 = expand_normal (arg0);
25973 rtx op1 = expand_normal (arg1);
25974 rtx op2 = expand_normal (arg2);
25975 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
25977 tmode0 = insn_data[d->icode].operand[0].mode;
25978 tmode1 = insn_data[d->icode].operand[1].mode;
25979 modev2 = insn_data[d->icode].operand[2].mode;
25980 modev3 = insn_data[d->icode].operand[3].mode;
25981 modeimm = insn_data[d->icode].operand[4].mode;
25983 if (VECTOR_MODE_P (modev2))
25984 op0 = safe_vector_operand (op0, modev2);
25985 if (VECTOR_MODE_P (modev3))
25986 op1 = safe_vector_operand (op1, modev3);
25988 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25989 op0 = copy_to_mode_reg (modev2, op0);
25990 if ((optimize && !register_operand (op1, modev3))
25991 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
25992 op1 = copy_to_mode_reg (modev3, op1);
25994 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
25996 error ("the third argument must be a 8-bit immediate");
26000 if (d->code == IX86_BUILTIN_PCMPISTRI128)
26002 if (optimize || !target
26003 || GET_MODE (target) != tmode0
26004 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
26005 target = gen_reg_rtx (tmode0);
26007 scratch1 = gen_reg_rtx (tmode1);
26009 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
26011 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
26013 if (optimize || !target
26014 || GET_MODE (target) != tmode1
26015 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
26016 target = gen_reg_rtx (tmode1);
26018 scratch0 = gen_reg_rtx (tmode0);
26020 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
26024 gcc_assert (d->flag);
26026 scratch0 = gen_reg_rtx (tmode0);
26027 scratch1 = gen_reg_rtx (tmode1);
26029 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
26039 target = gen_reg_rtx (SImode);
26040 emit_move_insn (target, const0_rtx);
26041 target = gen_rtx_SUBREG (QImode, target, 0);
26044 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
26045 gen_rtx_fmt_ee (EQ, QImode,
26046 gen_rtx_REG ((enum machine_mode) d->flag,
26049 return SUBREG_REG (target);
26055 /* Subroutine of ix86_expand_builtin to take care of insns with
26056 variable number of operands. */
26059 ix86_expand_args_builtin (const struct builtin_description *d,
26060 tree exp, rtx target)
26062 rtx pat, real_target;
26063 unsigned int i, nargs;
26064 unsigned int nargs_constant = 0;
26065 int num_memory = 0;
26069 enum machine_mode mode;
26071 bool last_arg_count = false;
26072 enum insn_code icode = d->icode;
26073 const struct insn_data_d *insn_p = &insn_data[icode];
26074 enum machine_mode tmode = insn_p->operand[0].mode;
26075 enum machine_mode rmode = VOIDmode;
26077 enum rtx_code comparison = d->comparison;
26079 switch ((enum ix86_builtin_func_type) d->flag)
26081 case INT_FTYPE_V8SF_V8SF_PTEST:
26082 case INT_FTYPE_V4DI_V4DI_PTEST:
26083 case INT_FTYPE_V4DF_V4DF_PTEST:
26084 case INT_FTYPE_V4SF_V4SF_PTEST:
26085 case INT_FTYPE_V2DI_V2DI_PTEST:
26086 case INT_FTYPE_V2DF_V2DF_PTEST:
26087 return ix86_expand_sse_ptest (d, exp, target);
26088 case FLOAT128_FTYPE_FLOAT128:
26089 case FLOAT_FTYPE_FLOAT:
26090 case INT_FTYPE_INT:
26091 case UINT64_FTYPE_INT:
26092 case UINT16_FTYPE_UINT16:
26093 case INT64_FTYPE_INT64:
26094 case INT64_FTYPE_V4SF:
26095 case INT64_FTYPE_V2DF:
26096 case INT_FTYPE_V16QI:
26097 case INT_FTYPE_V8QI:
26098 case INT_FTYPE_V8SF:
26099 case INT_FTYPE_V4DF:
26100 case INT_FTYPE_V4SF:
26101 case INT_FTYPE_V2DF:
26102 case V16QI_FTYPE_V16QI:
26103 case V8SI_FTYPE_V8SF:
26104 case V8SI_FTYPE_V4SI:
26105 case V8HI_FTYPE_V8HI:
26106 case V8HI_FTYPE_V16QI:
26107 case V8QI_FTYPE_V8QI:
26108 case V8SF_FTYPE_V8SF:
26109 case V8SF_FTYPE_V8SI:
26110 case V8SF_FTYPE_V4SF:
26111 case V8SF_FTYPE_V8HI:
26112 case V4SI_FTYPE_V4SI:
26113 case V4SI_FTYPE_V16QI:
26114 case V4SI_FTYPE_V4SF:
26115 case V4SI_FTYPE_V8SI:
26116 case V4SI_FTYPE_V8HI:
26117 case V4SI_FTYPE_V4DF:
26118 case V4SI_FTYPE_V2DF:
26119 case V4HI_FTYPE_V4HI:
26120 case V4DF_FTYPE_V4DF:
26121 case V4DF_FTYPE_V4SI:
26122 case V4DF_FTYPE_V4SF:
26123 case V4DF_FTYPE_V2DF:
26124 case V4SF_FTYPE_V4SF:
26125 case V4SF_FTYPE_V4SI:
26126 case V4SF_FTYPE_V8SF:
26127 case V4SF_FTYPE_V4DF:
26128 case V4SF_FTYPE_V8HI:
26129 case V4SF_FTYPE_V2DF:
26130 case V2DI_FTYPE_V2DI:
26131 case V2DI_FTYPE_V16QI:
26132 case V2DI_FTYPE_V8HI:
26133 case V2DI_FTYPE_V4SI:
26134 case V2DF_FTYPE_V2DF:
26135 case V2DF_FTYPE_V4SI:
26136 case V2DF_FTYPE_V4DF:
26137 case V2DF_FTYPE_V4SF:
26138 case V2DF_FTYPE_V2SI:
26139 case V2SI_FTYPE_V2SI:
26140 case V2SI_FTYPE_V4SF:
26141 case V2SI_FTYPE_V2SF:
26142 case V2SI_FTYPE_V2DF:
26143 case V2SF_FTYPE_V2SF:
26144 case V2SF_FTYPE_V2SI:
26147 case V4SF_FTYPE_V4SF_VEC_MERGE:
26148 case V2DF_FTYPE_V2DF_VEC_MERGE:
26149 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
26150 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
26151 case V16QI_FTYPE_V16QI_V16QI:
26152 case V16QI_FTYPE_V8HI_V8HI:
26153 case V8QI_FTYPE_V8QI_V8QI:
26154 case V8QI_FTYPE_V4HI_V4HI:
26155 case V8HI_FTYPE_V8HI_V8HI:
26156 case V8HI_FTYPE_V16QI_V16QI:
26157 case V8HI_FTYPE_V4SI_V4SI:
26158 case V8SF_FTYPE_V8SF_V8SF:
26159 case V8SF_FTYPE_V8SF_V8SI:
26160 case V4SI_FTYPE_V4SI_V4SI:
26161 case V4SI_FTYPE_V8HI_V8HI:
26162 case V4SI_FTYPE_V4SF_V4SF:
26163 case V4SI_FTYPE_V2DF_V2DF:
26164 case V4HI_FTYPE_V4HI_V4HI:
26165 case V4HI_FTYPE_V8QI_V8QI:
26166 case V4HI_FTYPE_V2SI_V2SI:
26167 case V4DF_FTYPE_V4DF_V4DF:
26168 case V4DF_FTYPE_V4DF_V4DI:
26169 case V4SF_FTYPE_V4SF_V4SF:
26170 case V4SF_FTYPE_V4SF_V4SI:
26171 case V4SF_FTYPE_V4SF_V2SI:
26172 case V4SF_FTYPE_V4SF_V2DF:
26173 case V4SF_FTYPE_V4SF_DI:
26174 case V4SF_FTYPE_V4SF_SI:
26175 case V2DI_FTYPE_V2DI_V2DI:
26176 case V2DI_FTYPE_V16QI_V16QI:
26177 case V2DI_FTYPE_V4SI_V4SI:
26178 case V2DI_FTYPE_V2DI_V16QI:
26179 case V2DI_FTYPE_V2DF_V2DF:
26180 case V2SI_FTYPE_V2SI_V2SI:
26181 case V2SI_FTYPE_V4HI_V4HI:
26182 case V2SI_FTYPE_V2SF_V2SF:
26183 case V2DF_FTYPE_V2DF_V2DF:
26184 case V2DF_FTYPE_V2DF_V4SF:
26185 case V2DF_FTYPE_V2DF_V2DI:
26186 case V2DF_FTYPE_V2DF_DI:
26187 case V2DF_FTYPE_V2DF_SI:
26188 case V2SF_FTYPE_V2SF_V2SF:
26189 case V1DI_FTYPE_V1DI_V1DI:
26190 case V1DI_FTYPE_V8QI_V8QI:
26191 case V1DI_FTYPE_V2SI_V2SI:
26192 if (comparison == UNKNOWN)
26193 return ix86_expand_binop_builtin (icode, exp, target);
26196 case V4SF_FTYPE_V4SF_V4SF_SWAP:
26197 case V2DF_FTYPE_V2DF_V2DF_SWAP:
26198 gcc_assert (comparison != UNKNOWN);
26202 case V8HI_FTYPE_V8HI_V8HI_COUNT:
26203 case V8HI_FTYPE_V8HI_SI_COUNT:
26204 case V4SI_FTYPE_V4SI_V4SI_COUNT:
26205 case V4SI_FTYPE_V4SI_SI_COUNT:
26206 case V4HI_FTYPE_V4HI_V4HI_COUNT:
26207 case V4HI_FTYPE_V4HI_SI_COUNT:
26208 case V2DI_FTYPE_V2DI_V2DI_COUNT:
26209 case V2DI_FTYPE_V2DI_SI_COUNT:
26210 case V2SI_FTYPE_V2SI_V2SI_COUNT:
26211 case V2SI_FTYPE_V2SI_SI_COUNT:
26212 case V1DI_FTYPE_V1DI_V1DI_COUNT:
26213 case V1DI_FTYPE_V1DI_SI_COUNT:
26215 last_arg_count = true;
26217 case UINT64_FTYPE_UINT64_UINT64:
26218 case UINT_FTYPE_UINT_UINT:
26219 case UINT_FTYPE_UINT_USHORT:
26220 case UINT_FTYPE_UINT_UCHAR:
26221 case UINT16_FTYPE_UINT16_INT:
26222 case UINT8_FTYPE_UINT8_INT:
26225 case V2DI_FTYPE_V2DI_INT_CONVERT:
26228 nargs_constant = 1;
26230 case V8HI_FTYPE_V8HI_INT:
26231 case V8HI_FTYPE_V8SF_INT:
26232 case V8HI_FTYPE_V4SF_INT:
26233 case V8SF_FTYPE_V8SF_INT:
26234 case V4SI_FTYPE_V4SI_INT:
26235 case V4SI_FTYPE_V8SI_INT:
26236 case V4HI_FTYPE_V4HI_INT:
26237 case V4DF_FTYPE_V4DF_INT:
26238 case V4SF_FTYPE_V4SF_INT:
26239 case V4SF_FTYPE_V8SF_INT:
26240 case V2DI_FTYPE_V2DI_INT:
26241 case V2DF_FTYPE_V2DF_INT:
26242 case V2DF_FTYPE_V4DF_INT:
26244 nargs_constant = 1;
26246 case V16QI_FTYPE_V16QI_V16QI_V16QI:
26247 case V8SF_FTYPE_V8SF_V8SF_V8SF:
26248 case V4DF_FTYPE_V4DF_V4DF_V4DF:
26249 case V4SF_FTYPE_V4SF_V4SF_V4SF:
26250 case V2DF_FTYPE_V2DF_V2DF_V2DF:
26253 case V16QI_FTYPE_V16QI_V16QI_INT:
26254 case V8HI_FTYPE_V8HI_V8HI_INT:
26255 case V8SI_FTYPE_V8SI_V8SI_INT:
26256 case V8SI_FTYPE_V8SI_V4SI_INT:
26257 case V8SF_FTYPE_V8SF_V8SF_INT:
26258 case V8SF_FTYPE_V8SF_V4SF_INT:
26259 case V4SI_FTYPE_V4SI_V4SI_INT:
26260 case V4DF_FTYPE_V4DF_V4DF_INT:
26261 case V4DF_FTYPE_V4DF_V2DF_INT:
26262 case V4SF_FTYPE_V4SF_V4SF_INT:
26263 case V2DI_FTYPE_V2DI_V2DI_INT:
26264 case V2DF_FTYPE_V2DF_V2DF_INT:
26266 nargs_constant = 1;
26268 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
26271 nargs_constant = 1;
26273 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
26276 nargs_constant = 1;
26278 case V2DI_FTYPE_V2DI_UINT_UINT:
26280 nargs_constant = 2;
26282 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
26283 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
26284 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
26285 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
26287 nargs_constant = 1;
26289 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
26291 nargs_constant = 2;
26294 gcc_unreachable ();
26297 gcc_assert (nargs <= ARRAY_SIZE (args));
26299 if (comparison != UNKNOWN)
26301 gcc_assert (nargs == 2);
26302 return ix86_expand_sse_compare (d, exp, target, swap);
26305 if (rmode == VOIDmode || rmode == tmode)
26309 || GET_MODE (target) != tmode
26310 || !insn_p->operand[0].predicate (target, tmode))
26311 target = gen_reg_rtx (tmode);
26312 real_target = target;
26316 target = gen_reg_rtx (rmode);
26317 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
26320 for (i = 0; i < nargs; i++)
26322 tree arg = CALL_EXPR_ARG (exp, i);
26323 rtx op = expand_normal (arg);
26324 enum machine_mode mode = insn_p->operand[i + 1].mode;
26325 bool match = insn_p->operand[i + 1].predicate (op, mode);
26327 if (last_arg_count && (i + 1) == nargs)
26329 /* SIMD shift insns take either an 8-bit immediate or
26330 register as count. But builtin functions take int as
26331 count. If count doesn't match, we put it in register. */
26334 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
26335 if (!insn_p->operand[i + 1].predicate (op, mode))
26336 op = copy_to_reg (op);
26339 else if ((nargs - i) <= nargs_constant)
26344 case CODE_FOR_sse4_1_roundpd:
26345 case CODE_FOR_sse4_1_roundps:
26346 case CODE_FOR_sse4_1_roundsd:
26347 case CODE_FOR_sse4_1_roundss:
26348 case CODE_FOR_sse4_1_blendps:
26349 case CODE_FOR_avx_blendpd256:
26350 case CODE_FOR_avx_vpermilv4df:
26351 case CODE_FOR_avx_roundpd256:
26352 case CODE_FOR_avx_roundps256:
26353 error ("the last argument must be a 4-bit immediate");
26356 case CODE_FOR_sse4_1_blendpd:
26357 case CODE_FOR_avx_vpermilv2df:
26358 case CODE_FOR_xop_vpermil2v2df3:
26359 case CODE_FOR_xop_vpermil2v4sf3:
26360 case CODE_FOR_xop_vpermil2v4df3:
26361 case CODE_FOR_xop_vpermil2v8sf3:
26362 error ("the last argument must be a 2-bit immediate");
26365 case CODE_FOR_avx_vextractf128v4df:
26366 case CODE_FOR_avx_vextractf128v8sf:
26367 case CODE_FOR_avx_vextractf128v8si:
26368 case CODE_FOR_avx_vinsertf128v4df:
26369 case CODE_FOR_avx_vinsertf128v8sf:
26370 case CODE_FOR_avx_vinsertf128v8si:
26371 error ("the last argument must be a 1-bit immediate");
26374 case CODE_FOR_avx_cmpsdv2df3:
26375 case CODE_FOR_avx_cmpssv4sf3:
26376 case CODE_FOR_avx_cmppdv2df3:
26377 case CODE_FOR_avx_cmppsv4sf3:
26378 case CODE_FOR_avx_cmppdv4df3:
26379 case CODE_FOR_avx_cmppsv8sf3:
26380 error ("the last argument must be a 5-bit immediate");
26384 switch (nargs_constant)
26387 if ((nargs - i) == nargs_constant)
26389 error ("the next to last argument must be an 8-bit immediate");
26393 error ("the last argument must be an 8-bit immediate");
26396 gcc_unreachable ();
26403 if (VECTOR_MODE_P (mode))
26404 op = safe_vector_operand (op, mode);
26406 /* If we aren't optimizing, only allow one memory operand to
26408 if (memory_operand (op, mode))
26411 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
26413 if (optimize || !match || num_memory > 1)
26414 op = copy_to_mode_reg (mode, op);
26418 op = copy_to_reg (op);
26419 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
26424 args[i].mode = mode;
26430 pat = GEN_FCN (icode) (real_target, args[0].op);
26433 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
26436 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26440 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
26441 args[2].op, args[3].op);
26444 gcc_unreachable ();
26454 /* Subroutine of ix86_expand_builtin to take care of special insns
26455 with variable number of operands. */
26458 ix86_expand_special_args_builtin (const struct builtin_description *d,
26459 tree exp, rtx target)
26463 unsigned int i, nargs, arg_adjust, memory;
26467 enum machine_mode mode;
26469 enum insn_code icode = d->icode;
26470 bool last_arg_constant = false;
26471 const struct insn_data_d *insn_p = &insn_data[icode];
26472 enum machine_mode tmode = insn_p->operand[0].mode;
26473 enum { load, store } klass;
26475 switch ((enum ix86_builtin_func_type) d->flag)
26477 case VOID_FTYPE_VOID:
26478 if (icode == CODE_FOR_avx_vzeroupper)
26479 target = GEN_INT (vzeroupper_intrinsic);
26480 emit_insn (GEN_FCN (icode) (target));
26482 case VOID_FTYPE_UINT64:
26483 case VOID_FTYPE_UNSIGNED:
26489 case UINT64_FTYPE_VOID:
26490 case UNSIGNED_FTYPE_VOID:
26491 case UINT16_FTYPE_VOID:
26496 case UINT64_FTYPE_PUNSIGNED:
26497 case V2DI_FTYPE_PV2DI:
26498 case V32QI_FTYPE_PCCHAR:
26499 case V16QI_FTYPE_PCCHAR:
26500 case V8SF_FTYPE_PCV4SF:
26501 case V8SF_FTYPE_PCFLOAT:
26502 case V4SF_FTYPE_PCFLOAT:
26503 case V4DF_FTYPE_PCV2DF:
26504 case V4DF_FTYPE_PCDOUBLE:
26505 case V2DF_FTYPE_PCDOUBLE:
26506 case VOID_FTYPE_PVOID:
26511 case VOID_FTYPE_PV2SF_V4SF:
26512 case VOID_FTYPE_PV4DI_V4DI:
26513 case VOID_FTYPE_PV2DI_V2DI:
26514 case VOID_FTYPE_PCHAR_V32QI:
26515 case VOID_FTYPE_PCHAR_V16QI:
26516 case VOID_FTYPE_PFLOAT_V8SF:
26517 case VOID_FTYPE_PFLOAT_V4SF:
26518 case VOID_FTYPE_PDOUBLE_V4DF:
26519 case VOID_FTYPE_PDOUBLE_V2DF:
26520 case VOID_FTYPE_PULONGLONG_ULONGLONG:
26521 case VOID_FTYPE_PINT_INT:
26524 /* Reserve memory operand for target. */
26525 memory = ARRAY_SIZE (args);
26527 case V4SF_FTYPE_V4SF_PCV2SF:
26528 case V2DF_FTYPE_V2DF_PCDOUBLE:
26533 case V8SF_FTYPE_PCV8SF_V8SF:
26534 case V4DF_FTYPE_PCV4DF_V4DF:
26535 case V4SF_FTYPE_PCV4SF_V4SF:
26536 case V2DF_FTYPE_PCV2DF_V2DF:
26541 case VOID_FTYPE_PV8SF_V8SF_V8SF:
26542 case VOID_FTYPE_PV4DF_V4DF_V4DF:
26543 case VOID_FTYPE_PV4SF_V4SF_V4SF:
26544 case VOID_FTYPE_PV2DF_V2DF_V2DF:
26547 /* Reserve memory operand for target. */
26548 memory = ARRAY_SIZE (args);
26550 case VOID_FTYPE_UINT_UINT_UINT:
26551 case VOID_FTYPE_UINT64_UINT_UINT:
26552 case UCHAR_FTYPE_UINT_UINT_UINT:
26553 case UCHAR_FTYPE_UINT64_UINT_UINT:
26556 memory = ARRAY_SIZE (args);
26557 last_arg_constant = true;
26560 gcc_unreachable ();
26563 gcc_assert (nargs <= ARRAY_SIZE (args));
26565 if (klass == store)
26567 arg = CALL_EXPR_ARG (exp, 0);
26568 op = expand_normal (arg);
26569 gcc_assert (target == 0);
26571 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
26573 target = force_reg (tmode, op);
26581 || GET_MODE (target) != tmode
26582 || !insn_p->operand[0].predicate (target, tmode))
26583 target = gen_reg_rtx (tmode);
26586 for (i = 0; i < nargs; i++)
26588 enum machine_mode mode = insn_p->operand[i + 1].mode;
26591 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
26592 op = expand_normal (arg);
26593 match = insn_p->operand[i + 1].predicate (op, mode);
26595 if (last_arg_constant && (i + 1) == nargs)
26599 if (icode == CODE_FOR_lwp_lwpvalsi3
26600 || icode == CODE_FOR_lwp_lwpinssi3
26601 || icode == CODE_FOR_lwp_lwpvaldi3
26602 || icode == CODE_FOR_lwp_lwpinsdi3)
26603 error ("the last argument must be a 32-bit immediate");
26605 error ("the last argument must be an 8-bit immediate");
26613 /* This must be the memory operand. */
26614 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
26615 gcc_assert (GET_MODE (op) == mode
26616 || GET_MODE (op) == VOIDmode);
26620 /* This must be register. */
26621 if (VECTOR_MODE_P (mode))
26622 op = safe_vector_operand (op, mode);
26624 gcc_assert (GET_MODE (op) == mode
26625 || GET_MODE (op) == VOIDmode);
26626 op = copy_to_mode_reg (mode, op);
26631 args[i].mode = mode;
26637 pat = GEN_FCN (icode) (target);
26640 pat = GEN_FCN (icode) (target, args[0].op);
26643 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
26646 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
26649 gcc_unreachable ();
26655 return klass == store ? 0 : target;
26658 /* Return the integer constant in ARG. Constrain it to be in the range
26659 of the subparts of VEC_TYPE; issue an error if not. */
26662 get_element_number (tree vec_type, tree arg)
26664 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
26666 if (!host_integerp (arg, 1)
26667 || (elt = tree_low_cst (arg, 1), elt > max))
26669 error ("selector must be an integer constant in the range 0..%wi", max);
26676 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26677 ix86_expand_vector_init. We DO have language-level syntax for this, in
26678 the form of (type){ init-list }. Except that since we can't place emms
26679 instructions from inside the compiler, we can't allow the use of MMX
26680 registers unless the user explicitly asks for it. So we do *not* define
26681 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
26682 we have builtins invoked by mmintrin.h that gives us license to emit
26683 these sorts of instructions. */
26686 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
26688 enum machine_mode tmode = TYPE_MODE (type);
26689 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
26690 int i, n_elt = GET_MODE_NUNITS (tmode);
26691 rtvec v = rtvec_alloc (n_elt);
26693 gcc_assert (VECTOR_MODE_P (tmode));
26694 gcc_assert (call_expr_nargs (exp) == n_elt);
26696 for (i = 0; i < n_elt; ++i)
26698 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
26699 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
26702 if (!target || !register_operand (target, tmode))
26703 target = gen_reg_rtx (tmode);
26705 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
26709 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26710 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
26711 had a language-level syntax for referencing vector elements. */
26714 ix86_expand_vec_ext_builtin (tree exp, rtx target)
26716 enum machine_mode tmode, mode0;
26721 arg0 = CALL_EXPR_ARG (exp, 0);
26722 arg1 = CALL_EXPR_ARG (exp, 1);
26724 op0 = expand_normal (arg0);
26725 elt = get_element_number (TREE_TYPE (arg0), arg1);
26727 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26728 mode0 = TYPE_MODE (TREE_TYPE (arg0));
26729 gcc_assert (VECTOR_MODE_P (mode0));
26731 op0 = force_reg (mode0, op0);
26733 if (optimize || !target || !register_operand (target, tmode))
26734 target = gen_reg_rtx (tmode);
26736 ix86_expand_vector_extract (true, target, op0, elt);
26741 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
26742 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
26743 a language-level syntax for referencing vector elements. */
26746 ix86_expand_vec_set_builtin (tree exp)
26748 enum machine_mode tmode, mode1;
26749 tree arg0, arg1, arg2;
26751 rtx op0, op1, target;
26753 arg0 = CALL_EXPR_ARG (exp, 0);
26754 arg1 = CALL_EXPR_ARG (exp, 1);
26755 arg2 = CALL_EXPR_ARG (exp, 2);
26757 tmode = TYPE_MODE (TREE_TYPE (arg0));
26758 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
26759 gcc_assert (VECTOR_MODE_P (tmode));
26761 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
26762 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
26763 elt = get_element_number (TREE_TYPE (arg0), arg2);
26765 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
26766 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
26768 op0 = force_reg (tmode, op0);
26769 op1 = force_reg (mode1, op1);
26771 /* OP0 is the source of these builtin functions and shouldn't be
26772 modified. Create a copy, use it and return it as target. */
26773 target = gen_reg_rtx (tmode);
26774 emit_move_insn (target, op0);
26775 ix86_expand_vector_set (true, target, op1, elt);
26780 /* Expand an expression EXP that calls a built-in function,
26781 with result going to TARGET if that's convenient
26782 (and in mode MODE if that's convenient).
26783 SUBTARGET may be used as the target for computing one of EXP's operands.
26784 IGNORE is nonzero if the value is to be ignored. */
26787 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
26788 enum machine_mode mode ATTRIBUTE_UNUSED,
26789 int ignore ATTRIBUTE_UNUSED)
26791 const struct builtin_description *d;
26793 enum insn_code icode;
26794 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
26795 tree arg0, arg1, arg2;
26796 rtx op0, op1, op2, pat;
26797 enum machine_mode mode0, mode1, mode2;
26798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
26800 /* Determine whether the builtin function is available under the current ISA.
26801 Originally the builtin was not created if it wasn't applicable to the
26802 current ISA based on the command line switches. With function specific
26803 options, we need to check in the context of the function making the call
26804 whether it is supported. */
26805 if (ix86_builtins_isa[fcode].isa
26806 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
26808 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
26809 NULL, NULL, false);
26812 error ("%qE needs unknown isa option", fndecl);
26815 gcc_assert (opts != NULL);
26816 error ("%qE needs isa option %s", fndecl, opts);
26824 case IX86_BUILTIN_MASKMOVQ:
26825 case IX86_BUILTIN_MASKMOVDQU:
26826 icode = (fcode == IX86_BUILTIN_MASKMOVQ
26827 ? CODE_FOR_mmx_maskmovq
26828 : CODE_FOR_sse2_maskmovdqu);
26829 /* Note the arg order is different from the operand order. */
26830 arg1 = CALL_EXPR_ARG (exp, 0);
26831 arg2 = CALL_EXPR_ARG (exp, 1);
26832 arg0 = CALL_EXPR_ARG (exp, 2);
26833 op0 = expand_normal (arg0);
26834 op1 = expand_normal (arg1);
26835 op2 = expand_normal (arg2);
26836 mode0 = insn_data[icode].operand[0].mode;
26837 mode1 = insn_data[icode].operand[1].mode;
26838 mode2 = insn_data[icode].operand[2].mode;
26840 op0 = force_reg (Pmode, op0);
26841 op0 = gen_rtx_MEM (mode1, op0);
26843 if (!insn_data[icode].operand[0].predicate (op0, mode0))
26844 op0 = copy_to_mode_reg (mode0, op0);
26845 if (!insn_data[icode].operand[1].predicate (op1, mode1))
26846 op1 = copy_to_mode_reg (mode1, op1);
26847 if (!insn_data[icode].operand[2].predicate (op2, mode2))
26848 op2 = copy_to_mode_reg (mode2, op2);
26849 pat = GEN_FCN (icode) (op0, op1, op2);
26855 case IX86_BUILTIN_LDMXCSR:
26856 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
26857 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26858 emit_move_insn (target, op0);
26859 emit_insn (gen_sse_ldmxcsr (target));
26862 case IX86_BUILTIN_STMXCSR:
26863 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
26864 emit_insn (gen_sse_stmxcsr (target));
26865 return copy_to_mode_reg (SImode, target);
26867 case IX86_BUILTIN_CLFLUSH:
26868 arg0 = CALL_EXPR_ARG (exp, 0);
26869 op0 = expand_normal (arg0);
26870 icode = CODE_FOR_sse2_clflush;
26871 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26872 op0 = copy_to_mode_reg (Pmode, op0);
26874 emit_insn (gen_sse2_clflush (op0));
26877 case IX86_BUILTIN_MONITOR:
26878 arg0 = CALL_EXPR_ARG (exp, 0);
26879 arg1 = CALL_EXPR_ARG (exp, 1);
26880 arg2 = CALL_EXPR_ARG (exp, 2);
26881 op0 = expand_normal (arg0);
26882 op1 = expand_normal (arg1);
26883 op2 = expand_normal (arg2);
26885 op0 = copy_to_mode_reg (Pmode, op0);
26887 op1 = copy_to_mode_reg (SImode, op1);
26889 op2 = copy_to_mode_reg (SImode, op2);
26890 emit_insn (ix86_gen_monitor (op0, op1, op2));
26893 case IX86_BUILTIN_MWAIT:
26894 arg0 = CALL_EXPR_ARG (exp, 0);
26895 arg1 = CALL_EXPR_ARG (exp, 1);
26896 op0 = expand_normal (arg0);
26897 op1 = expand_normal (arg1);
26899 op0 = copy_to_mode_reg (SImode, op0);
26901 op1 = copy_to_mode_reg (SImode, op1);
26902 emit_insn (gen_sse3_mwait (op0, op1));
26905 case IX86_BUILTIN_VEC_INIT_V2SI:
26906 case IX86_BUILTIN_VEC_INIT_V4HI:
26907 case IX86_BUILTIN_VEC_INIT_V8QI:
26908 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
26910 case IX86_BUILTIN_VEC_EXT_V2DF:
26911 case IX86_BUILTIN_VEC_EXT_V2DI:
26912 case IX86_BUILTIN_VEC_EXT_V4SF:
26913 case IX86_BUILTIN_VEC_EXT_V4SI:
26914 case IX86_BUILTIN_VEC_EXT_V8HI:
26915 case IX86_BUILTIN_VEC_EXT_V2SI:
26916 case IX86_BUILTIN_VEC_EXT_V4HI:
26917 case IX86_BUILTIN_VEC_EXT_V16QI:
26918 return ix86_expand_vec_ext_builtin (exp, target);
26920 case IX86_BUILTIN_VEC_SET_V2DI:
26921 case IX86_BUILTIN_VEC_SET_V4SF:
26922 case IX86_BUILTIN_VEC_SET_V4SI:
26923 case IX86_BUILTIN_VEC_SET_V8HI:
26924 case IX86_BUILTIN_VEC_SET_V4HI:
26925 case IX86_BUILTIN_VEC_SET_V16QI:
26926 return ix86_expand_vec_set_builtin (exp);
26928 case IX86_BUILTIN_VEC_PERM_V2DF:
26929 case IX86_BUILTIN_VEC_PERM_V4SF:
26930 case IX86_BUILTIN_VEC_PERM_V2DI:
26931 case IX86_BUILTIN_VEC_PERM_V4SI:
26932 case IX86_BUILTIN_VEC_PERM_V8HI:
26933 case IX86_BUILTIN_VEC_PERM_V16QI:
26934 case IX86_BUILTIN_VEC_PERM_V2DI_U:
26935 case IX86_BUILTIN_VEC_PERM_V4SI_U:
26936 case IX86_BUILTIN_VEC_PERM_V8HI_U:
26937 case IX86_BUILTIN_VEC_PERM_V16QI_U:
26938 case IX86_BUILTIN_VEC_PERM_V4DF:
26939 case IX86_BUILTIN_VEC_PERM_V8SF:
26940 return ix86_expand_vec_perm_builtin (exp);
26942 case IX86_BUILTIN_INFQ:
26943 case IX86_BUILTIN_HUGE_VALQ:
26945 REAL_VALUE_TYPE inf;
26949 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
26951 tmp = validize_mem (force_const_mem (mode, tmp));
26954 target = gen_reg_rtx (mode);
26956 emit_move_insn (target, tmp);
26960 case IX86_BUILTIN_LLWPCB:
26961 arg0 = CALL_EXPR_ARG (exp, 0);
26962 op0 = expand_normal (arg0);
26963 icode = CODE_FOR_lwp_llwpcb;
26964 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26965 op0 = copy_to_mode_reg (Pmode, op0);
26966 emit_insn (gen_lwp_llwpcb (op0));
26969 case IX86_BUILTIN_SLWPCB:
26970 icode = CODE_FOR_lwp_slwpcb;
26972 || !insn_data[icode].operand[0].predicate (target, Pmode))
26973 target = gen_reg_rtx (Pmode);
26974 emit_insn (gen_lwp_slwpcb (target));
26981 for (i = 0, d = bdesc_special_args;
26982 i < ARRAY_SIZE (bdesc_special_args);
26984 if (d->code == fcode)
26985 return ix86_expand_special_args_builtin (d, exp, target);
26987 for (i = 0, d = bdesc_args;
26988 i < ARRAY_SIZE (bdesc_args);
26990 if (d->code == fcode)
26993 case IX86_BUILTIN_FABSQ:
26994 case IX86_BUILTIN_COPYSIGNQ:
26996 /* Emit a normal call if SSE2 isn't available. */
26997 return expand_call (exp, target, ignore);
26999 return ix86_expand_args_builtin (d, exp, target);
27002 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
27003 if (d->code == fcode)
27004 return ix86_expand_sse_comi (d, exp, target);
27006 for (i = 0, d = bdesc_pcmpestr;
27007 i < ARRAY_SIZE (bdesc_pcmpestr);
27009 if (d->code == fcode)
27010 return ix86_expand_sse_pcmpestr (d, exp, target);
27012 for (i = 0, d = bdesc_pcmpistr;
27013 i < ARRAY_SIZE (bdesc_pcmpistr);
27015 if (d->code == fcode)
27016 return ix86_expand_sse_pcmpistr (d, exp, target);
27018 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
27019 if (d->code == fcode)
27020 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
27021 (enum ix86_builtin_func_type)
27022 d->flag, d->comparison);
27024 gcc_unreachable ();
27027 /* Returns a function decl for a vectorized version of the builtin function
27028 with builtin function code FN and the result vector type TYPE, or NULL_TREE
27029 if it is not available. */
27032 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
27035 enum machine_mode in_mode, out_mode;
27037 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27039 if (TREE_CODE (type_out) != VECTOR_TYPE
27040 || TREE_CODE (type_in) != VECTOR_TYPE
27041 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
27044 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27045 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27046 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27047 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27051 case BUILT_IN_SQRT:
27052 if (out_mode == DFmode && in_mode == DFmode)
27054 if (out_n == 2 && in_n == 2)
27055 return ix86_builtins[IX86_BUILTIN_SQRTPD];
27056 else if (out_n == 4 && in_n == 4)
27057 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
27061 case BUILT_IN_SQRTF:
27062 if (out_mode == SFmode && in_mode == SFmode)
27064 if (out_n == 4 && in_n == 4)
27065 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
27066 else if (out_n == 8 && in_n == 8)
27067 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
27071 case BUILT_IN_LRINT:
27072 if (out_mode == SImode && out_n == 4
27073 && in_mode == DFmode && in_n == 2)
27074 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
27077 case BUILT_IN_LRINTF:
27078 if (out_mode == SImode && in_mode == SFmode)
27080 if (out_n == 4 && in_n == 4)
27081 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
27082 else if (out_n == 8 && in_n == 8)
27083 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
27087 case BUILT_IN_COPYSIGN:
27088 if (out_mode == DFmode && in_mode == DFmode)
27090 if (out_n == 2 && in_n == 2)
27091 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
27092 else if (out_n == 4 && in_n == 4)
27093 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
27097 case BUILT_IN_COPYSIGNF:
27098 if (out_mode == SFmode && in_mode == SFmode)
27100 if (out_n == 4 && in_n == 4)
27101 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
27102 else if (out_n == 8 && in_n == 8)
27103 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
27108 if (out_mode == DFmode && in_mode == DFmode)
27110 if (out_n == 2 && in_n == 2)
27111 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
27112 if (out_n == 4 && in_n == 4)
27113 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
27117 case BUILT_IN_FMAF:
27118 if (out_mode == SFmode && in_mode == SFmode)
27120 if (out_n == 4 && in_n == 4)
27121 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
27122 if (out_n == 8 && in_n == 8)
27123 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
27131 /* Dispatch to a handler for a vectorization library. */
27132 if (ix86_veclib_handler)
27133 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
27139 /* Handler for an SVML-style interface to
27140 a library with vectorized intrinsics. */
27143 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
27146 tree fntype, new_fndecl, args;
27149 enum machine_mode el_mode, in_mode;
27152 /* The SVML is suitable for unsafe math only. */
27153 if (!flag_unsafe_math_optimizations)
27156 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27157 n = TYPE_VECTOR_SUBPARTS (type_out);
27158 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27159 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27160 if (el_mode != in_mode
27168 case BUILT_IN_LOG10:
27170 case BUILT_IN_TANH:
27172 case BUILT_IN_ATAN:
27173 case BUILT_IN_ATAN2:
27174 case BUILT_IN_ATANH:
27175 case BUILT_IN_CBRT:
27176 case BUILT_IN_SINH:
27178 case BUILT_IN_ASINH:
27179 case BUILT_IN_ASIN:
27180 case BUILT_IN_COSH:
27182 case BUILT_IN_ACOSH:
27183 case BUILT_IN_ACOS:
27184 if (el_mode != DFmode || n != 2)
27188 case BUILT_IN_EXPF:
27189 case BUILT_IN_LOGF:
27190 case BUILT_IN_LOG10F:
27191 case BUILT_IN_POWF:
27192 case BUILT_IN_TANHF:
27193 case BUILT_IN_TANF:
27194 case BUILT_IN_ATANF:
27195 case BUILT_IN_ATAN2F:
27196 case BUILT_IN_ATANHF:
27197 case BUILT_IN_CBRTF:
27198 case BUILT_IN_SINHF:
27199 case BUILT_IN_SINF:
27200 case BUILT_IN_ASINHF:
27201 case BUILT_IN_ASINF:
27202 case BUILT_IN_COSHF:
27203 case BUILT_IN_COSF:
27204 case BUILT_IN_ACOSHF:
27205 case BUILT_IN_ACOSF:
27206 if (el_mode != SFmode || n != 4)
27214 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27216 if (fn == BUILT_IN_LOGF)
27217 strcpy (name, "vmlsLn4");
27218 else if (fn == BUILT_IN_LOG)
27219 strcpy (name, "vmldLn2");
27222 sprintf (name, "vmls%s", bname+10);
27223 name[strlen (name)-1] = '4';
27226 sprintf (name, "vmld%s2", bname+10);
27228 /* Convert to uppercase. */
27232 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27233 args = TREE_CHAIN (args))
27237 fntype = build_function_type_list (type_out, type_in, NULL);
27239 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27241 /* Build a function declaration for the vectorized function. */
27242 new_fndecl = build_decl (BUILTINS_LOCATION,
27243 FUNCTION_DECL, get_identifier (name), fntype);
27244 TREE_PUBLIC (new_fndecl) = 1;
27245 DECL_EXTERNAL (new_fndecl) = 1;
27246 DECL_IS_NOVOPS (new_fndecl) = 1;
27247 TREE_READONLY (new_fndecl) = 1;
27252 /* Handler for an ACML-style interface to
27253 a library with vectorized intrinsics. */
27256 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
27258 char name[20] = "__vr.._";
27259 tree fntype, new_fndecl, args;
27262 enum machine_mode el_mode, in_mode;
27265 /* The ACML is 64bits only and suitable for unsafe math only as
27266 it does not correctly support parts of IEEE with the required
27267 precision such as denormals. */
27269 || !flag_unsafe_math_optimizations)
27272 el_mode = TYPE_MODE (TREE_TYPE (type_out));
27273 n = TYPE_VECTOR_SUBPARTS (type_out);
27274 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27275 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27276 if (el_mode != in_mode
27286 case BUILT_IN_LOG2:
27287 case BUILT_IN_LOG10:
27290 if (el_mode != DFmode
27295 case BUILT_IN_SINF:
27296 case BUILT_IN_COSF:
27297 case BUILT_IN_EXPF:
27298 case BUILT_IN_POWF:
27299 case BUILT_IN_LOGF:
27300 case BUILT_IN_LOG2F:
27301 case BUILT_IN_LOG10F:
27304 if (el_mode != SFmode
27313 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
27314 sprintf (name + 7, "%s", bname+10);
27317 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
27318 args = TREE_CHAIN (args))
27322 fntype = build_function_type_list (type_out, type_in, NULL);
27324 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
27326 /* Build a function declaration for the vectorized function. */
27327 new_fndecl = build_decl (BUILTINS_LOCATION,
27328 FUNCTION_DECL, get_identifier (name), fntype);
27329 TREE_PUBLIC (new_fndecl) = 1;
27330 DECL_EXTERNAL (new_fndecl) = 1;
27331 DECL_IS_NOVOPS (new_fndecl) = 1;
27332 TREE_READONLY (new_fndecl) = 1;
27338 /* Returns a decl of a function that implements conversion of an integer vector
27339 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
27340 are the types involved when converting according to CODE.
27341 Return NULL_TREE if it is not available. */
27344 ix86_vectorize_builtin_conversion (unsigned int code,
27345 tree dest_type, tree src_type)
27353 switch (TYPE_MODE (src_type))
27356 switch (TYPE_MODE (dest_type))
27359 return (TYPE_UNSIGNED (src_type)
27360 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
27361 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
27363 return (TYPE_UNSIGNED (src_type)
27365 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
27371 switch (TYPE_MODE (dest_type))
27374 return (TYPE_UNSIGNED (src_type)
27376 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
27385 case FIX_TRUNC_EXPR:
27386 switch (TYPE_MODE (dest_type))
27389 switch (TYPE_MODE (src_type))
27392 return (TYPE_UNSIGNED (dest_type)
27394 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
27396 return (TYPE_UNSIGNED (dest_type)
27398 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
27405 switch (TYPE_MODE (src_type))
27408 return (TYPE_UNSIGNED (dest_type)
27410 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
27427 /* Returns a code for a target-specific builtin that implements
27428 reciprocal of the function, or NULL_TREE if not available. */
27431 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
27432 bool sqrt ATTRIBUTE_UNUSED)
27434 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
27435 && flag_finite_math_only && !flag_trapping_math
27436 && flag_unsafe_math_optimizations))
27440 /* Machine dependent builtins. */
27443 /* Vectorized version of sqrt to rsqrt conversion. */
27444 case IX86_BUILTIN_SQRTPS_NR:
27445 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
27447 case IX86_BUILTIN_SQRTPS_NR256:
27448 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
27454 /* Normal builtins. */
27457 /* Sqrt to rsqrt conversion. */
27458 case BUILT_IN_SQRTF:
27459 return ix86_builtins[IX86_BUILTIN_RSQRTF];
27466 /* Helper for avx_vpermilps256_operand et al. This is also used by
27467 the expansion functions to turn the parallel back into a mask.
27468 The return value is 0 for no match and the imm8+1 for a match. */
27471 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
27473 unsigned i, nelt = GET_MODE_NUNITS (mode);
27475 unsigned char ipar[8];
27477 if (XVECLEN (par, 0) != (int) nelt)
27480 /* Validate that all of the elements are constants, and not totally
27481 out of range. Copy the data into an integral array to make the
27482 subsequent checks easier. */
27483 for (i = 0; i < nelt; ++i)
27485 rtx er = XVECEXP (par, 0, i);
27486 unsigned HOST_WIDE_INT ei;
27488 if (!CONST_INT_P (er))
27499 /* In the 256-bit DFmode case, we can only move elements within
27501 for (i = 0; i < 2; ++i)
27505 mask |= ipar[i] << i;
27507 for (i = 2; i < 4; ++i)
27511 mask |= (ipar[i] - 2) << i;
27516 /* In the 256-bit SFmode case, we have full freedom of movement
27517 within the low 128-bit lane, but the high 128-bit lane must
27518 mirror the exact same pattern. */
27519 for (i = 0; i < 4; ++i)
27520 if (ipar[i] + 4 != ipar[i + 4])
27527 /* In the 128-bit case, we've full freedom in the placement of
27528 the elements from the source operand. */
27529 for (i = 0; i < nelt; ++i)
27530 mask |= ipar[i] << (i * (nelt / 2));
27534 gcc_unreachable ();
27537 /* Make sure success has a non-zero value by adding one. */
27541 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
27542 the expansion functions to turn the parallel back into a mask.
27543 The return value is 0 for no match and the imm8+1 for a match. */
27546 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
27548 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
27550 unsigned char ipar[8];
27552 if (XVECLEN (par, 0) != (int) nelt)
27555 /* Validate that all of the elements are constants, and not totally
27556 out of range. Copy the data into an integral array to make the
27557 subsequent checks easier. */
27558 for (i = 0; i < nelt; ++i)
27560 rtx er = XVECEXP (par, 0, i);
27561 unsigned HOST_WIDE_INT ei;
27563 if (!CONST_INT_P (er))
27566 if (ei >= 2 * nelt)
27571 /* Validate that the halves of the permute are halves. */
27572 for (i = 0; i < nelt2 - 1; ++i)
27573 if (ipar[i] + 1 != ipar[i + 1])
27575 for (i = nelt2; i < nelt - 1; ++i)
27576 if (ipar[i] + 1 != ipar[i + 1])
27579 /* Reconstruct the mask. */
27580 for (i = 0; i < 2; ++i)
27582 unsigned e = ipar[i * nelt2];
27586 mask |= e << (i * 4);
27589 /* Make sure success has a non-zero value by adding one. */
27594 /* Store OPERAND to the memory after reload is completed. This means
27595 that we can't easily use assign_stack_local. */
27597 ix86_force_to_memory (enum machine_mode mode, rtx operand)
27601 gcc_assert (reload_completed);
27602 if (ix86_using_red_zone ())
27604 result = gen_rtx_MEM (mode,
27605 gen_rtx_PLUS (Pmode,
27607 GEN_INT (-RED_ZONE_SIZE)));
27608 emit_move_insn (result, operand);
27610 else if (TARGET_64BIT)
27616 operand = gen_lowpart (DImode, operand);
27620 gen_rtx_SET (VOIDmode,
27621 gen_rtx_MEM (DImode,
27622 gen_rtx_PRE_DEC (DImode,
27623 stack_pointer_rtx)),
27627 gcc_unreachable ();
27629 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27638 split_double_mode (mode, &operand, 1, operands, operands + 1);
27640 gen_rtx_SET (VOIDmode,
27641 gen_rtx_MEM (SImode,
27642 gen_rtx_PRE_DEC (Pmode,
27643 stack_pointer_rtx)),
27646 gen_rtx_SET (VOIDmode,
27647 gen_rtx_MEM (SImode,
27648 gen_rtx_PRE_DEC (Pmode,
27649 stack_pointer_rtx)),
27654 /* Store HImodes as SImodes. */
27655 operand = gen_lowpart (SImode, operand);
27659 gen_rtx_SET (VOIDmode,
27660 gen_rtx_MEM (GET_MODE (operand),
27661 gen_rtx_PRE_DEC (SImode,
27662 stack_pointer_rtx)),
27666 gcc_unreachable ();
27668 result = gen_rtx_MEM (mode, stack_pointer_rtx);
27673 /* Free operand from the memory. */
27675 ix86_free_from_memory (enum machine_mode mode)
27677 if (!ix86_using_red_zone ())
27681 if (mode == DImode || TARGET_64BIT)
27685 /* Use LEA to deallocate stack space. In peephole2 it will be converted
27686 to pop or add instruction if registers are available. */
27687 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27688 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
27693 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
27694 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
27696 static const reg_class_t *
27697 i386_ira_cover_classes (void)
27699 static const reg_class_t sse_fpmath_classes[] = {
27700 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
27702 static const reg_class_t no_sse_fpmath_classes[] = {
27703 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
27706 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
27709 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
27711 Put float CONST_DOUBLE in the constant pool instead of fp regs.
27712 QImode must go into class Q_REGS.
27713 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
27714 movdf to do mem-to-mem moves through integer regs. */
27717 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
27719 enum machine_mode mode = GET_MODE (x);
27721 /* We're only allowed to return a subclass of CLASS. Many of the
27722 following checks fail for NO_REGS, so eliminate that early. */
27723 if (regclass == NO_REGS)
27726 /* All classes can load zeros. */
27727 if (x == CONST0_RTX (mode))
27730 /* Force constants into memory if we are loading a (nonzero) constant into
27731 an MMX or SSE register. This is because there are no MMX/SSE instructions
27732 to load from a constant. */
27734 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
27737 /* Prefer SSE regs only, if we can use them for math. */
27738 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
27739 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
27741 /* Floating-point constants need more complex checks. */
27742 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
27744 /* General regs can load everything. */
27745 if (reg_class_subset_p (regclass, GENERAL_REGS))
27748 /* Floats can load 0 and 1 plus some others. Note that we eliminated
27749 zero above. We only want to wind up preferring 80387 registers if
27750 we plan on doing computation with them. */
27752 && standard_80387_constant_p (x))
27754 /* Limit class to non-sse. */
27755 if (regclass == FLOAT_SSE_REGS)
27757 if (regclass == FP_TOP_SSE_REGS)
27759 if (regclass == FP_SECOND_SSE_REGS)
27760 return FP_SECOND_REG;
27761 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
27768 /* Generally when we see PLUS here, it's the function invariant
27769 (plus soft-fp const_int). Which can only be computed into general
27771 if (GET_CODE (x) == PLUS)
27772 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
27774 /* QImode constants are easy to load, but non-constant QImode data
27775 must go into Q_REGS. */
27776 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
27778 if (reg_class_subset_p (regclass, Q_REGS))
27780 if (reg_class_subset_p (Q_REGS, regclass))
27788 /* Discourage putting floating-point values in SSE registers unless
27789 SSE math is being used, and likewise for the 387 registers. */
27791 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
27793 enum machine_mode mode = GET_MODE (x);
27795 /* Restrict the output reload class to the register bank that we are doing
27796 math on. If we would like not to return a subset of CLASS, reject this
27797 alternative: if reload cannot do this, it will still use its choice. */
27798 mode = GET_MODE (x);
27799 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
27800 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
27802 if (X87_FLOAT_MODE_P (mode))
27804 if (regclass == FP_TOP_SSE_REGS)
27806 else if (regclass == FP_SECOND_SSE_REGS)
27807 return FP_SECOND_REG;
27809 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
27816 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
27817 enum machine_mode mode,
27818 secondary_reload_info *sri ATTRIBUTE_UNUSED)
27820 /* QImode spills from non-QI registers require
27821 intermediate register on 32bit targets. */
27822 if (!in_p && mode == QImode && !TARGET_64BIT
27823 && (rclass == GENERAL_REGS
27824 || rclass == LEGACY_REGS
27825 || rclass == INDEX_REGS))
27834 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
27835 regno = true_regnum (x);
27837 /* Return Q_REGS if the operand is in memory. */
27845 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
27848 ix86_class_likely_spilled_p (reg_class_t rclass)
27859 case SSE_FIRST_REG:
27861 case FP_SECOND_REG:
27871 /* If we are copying between general and FP registers, we need a memory
27872 location. The same is true for SSE and MMX registers.
27874 To optimize register_move_cost performance, allow inline variant.
27876 The macro can't work reliably when one of the CLASSES is class containing
27877 registers from multiple units (SSE, MMX, integer). We avoid this by never
27878 combining those units in single alternative in the machine description.
27879 Ensure that this constraint holds to avoid unexpected surprises.
27881 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
27882 enforce these sanity checks. */
27885 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
27886 enum machine_mode mode, int strict)
27888 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
27889 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
27890 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
27891 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
27892 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
27893 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
27895 gcc_assert (!strict);
27899 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
27902 /* ??? This is a lie. We do have moves between mmx/general, and for
27903 mmx/sse2. But by saying we need secondary memory we discourage the
27904 register allocator from using the mmx registers unless needed. */
27905 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
27908 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27910 /* SSE1 doesn't have any direct moves from other classes. */
27914 /* If the target says that inter-unit moves are more expensive
27915 than moving through memory, then don't generate them. */
27916 if (!TARGET_INTER_UNIT_MOVES)
27919 /* Between SSE and general, we have moves no larger than word size. */
27920 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
27928 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
27929 enum machine_mode mode, int strict)
27931 return inline_secondary_memory_needed (class1, class2, mode, strict);
27934 /* Return true if the registers in CLASS cannot represent the change from
27935 modes FROM to TO. */
27938 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
27939 enum reg_class regclass)
27944 /* x87 registers can't do subreg at all, as all values are reformatted
27945 to extended precision. */
27946 if (MAYBE_FLOAT_CLASS_P (regclass))
27949 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
27951 /* Vector registers do not support QI or HImode loads. If we don't
27952 disallow a change to these modes, reload will assume it's ok to
27953 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
27954 the vec_dupv4hi pattern. */
27955 if (GET_MODE_SIZE (from) < 4)
27958 /* Vector registers do not support subreg with nonzero offsets, which
27959 are otherwise valid for integer registers. Since we can't see
27960 whether we have a nonzero offset from here, prohibit all
27961 nonparadoxical subregs changing size. */
27962 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
27969 /* Return the cost of moving data of mode M between a
27970 register and memory. A value of 2 is the default; this cost is
27971 relative to those in `REGISTER_MOVE_COST'.
27973 This function is used extensively by register_move_cost that is used to
27974 build tables at startup. Make it inline in this case.
27975 When IN is 2, return maximum of in and out move cost.
27977 If moving between registers and memory is more expensive than
27978 between two registers, you should define this macro to express the
27981 Model also increased moving costs of QImode registers in non
27985 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
27989 if (FLOAT_CLASS_P (regclass))
28007 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
28008 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
28010 if (SSE_CLASS_P (regclass))
28013 switch (GET_MODE_SIZE (mode))
28028 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
28029 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
28031 if (MMX_CLASS_P (regclass))
28034 switch (GET_MODE_SIZE (mode))
28046 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
28047 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
28049 switch (GET_MODE_SIZE (mode))
28052 if (Q_CLASS_P (regclass) || TARGET_64BIT)
28055 return ix86_cost->int_store[0];
28056 if (TARGET_PARTIAL_REG_DEPENDENCY
28057 && optimize_function_for_speed_p (cfun))
28058 cost = ix86_cost->movzbl_load;
28060 cost = ix86_cost->int_load[0];
28062 return MAX (cost, ix86_cost->int_store[0]);
28068 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
28070 return ix86_cost->movzbl_load;
28072 return ix86_cost->int_store[0] + 4;
28077 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
28078 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
28080 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
28081 if (mode == TFmode)
28084 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
28086 cost = ix86_cost->int_load[2];
28088 cost = ix86_cost->int_store[2];
28089 return (cost * (((int) GET_MODE_SIZE (mode)
28090 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
28095 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
28098 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
28102 /* Return the cost of moving data from a register in class CLASS1 to
28103 one in class CLASS2.
28105 It is not required that the cost always equal 2 when FROM is the same as TO;
28106 on some machines it is expensive to move between registers if they are not
28107 general registers. */
28110 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
28111 reg_class_t class2_i)
28113 enum reg_class class1 = (enum reg_class) class1_i;
28114 enum reg_class class2 = (enum reg_class) class2_i;
28116 /* In case we require secondary memory, compute cost of the store followed
28117 by load. In order to avoid bad register allocation choices, we need
28118 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
28120 if (inline_secondary_memory_needed (class1, class2, mode, 0))
28124 cost += inline_memory_move_cost (mode, class1, 2);
28125 cost += inline_memory_move_cost (mode, class2, 2);
28127 /* In case of copying from general_purpose_register we may emit multiple
28128 stores followed by single load causing memory size mismatch stall.
28129 Count this as arbitrarily high cost of 20. */
28130 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
28133 /* In the case of FP/MMX moves, the registers actually overlap, and we
28134 have to switch modes in order to treat them differently. */
28135 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
28136 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
28142 /* Moves between SSE/MMX and integer unit are expensive. */
28143 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
28144 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
28146 /* ??? By keeping returned value relatively high, we limit the number
28147 of moves between integer and MMX/SSE registers for all targets.
28148 Additionally, high value prevents problem with x86_modes_tieable_p(),
28149 where integer modes in MMX/SSE registers are not tieable
28150 because of missing QImode and HImode moves to, from or between
28151 MMX/SSE registers. */
28152 return MAX (8, ix86_cost->mmxsse_to_integer);
28154 if (MAYBE_FLOAT_CLASS_P (class1))
28155 return ix86_cost->fp_move;
28156 if (MAYBE_SSE_CLASS_P (class1))
28157 return ix86_cost->sse_move;
28158 if (MAYBE_MMX_CLASS_P (class1))
28159 return ix86_cost->mmx_move;
28163 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
28166 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
28168 /* Flags and only flags can only hold CCmode values. */
28169 if (CC_REGNO_P (regno))
28170 return GET_MODE_CLASS (mode) == MODE_CC;
28171 if (GET_MODE_CLASS (mode) == MODE_CC
28172 || GET_MODE_CLASS (mode) == MODE_RANDOM
28173 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
28175 if (FP_REGNO_P (regno))
28176 return VALID_FP_MODE_P (mode);
28177 if (SSE_REGNO_P (regno))
28179 /* We implement the move patterns for all vector modes into and
28180 out of SSE registers, even when no operation instructions
28181 are available. OImode move is available only when AVX is
28183 return ((TARGET_AVX && mode == OImode)
28184 || VALID_AVX256_REG_MODE (mode)
28185 || VALID_SSE_REG_MODE (mode)
28186 || VALID_SSE2_REG_MODE (mode)
28187 || VALID_MMX_REG_MODE (mode)
28188 || VALID_MMX_REG_MODE_3DNOW (mode));
28190 if (MMX_REGNO_P (regno))
28192 /* We implement the move patterns for 3DNOW modes even in MMX mode,
28193 so if the register is available at all, then we can move data of
28194 the given mode into or out of it. */
28195 return (VALID_MMX_REG_MODE (mode)
28196 || VALID_MMX_REG_MODE_3DNOW (mode));
28199 if (mode == QImode)
28201 /* Take care for QImode values - they can be in non-QI regs,
28202 but then they do cause partial register stalls. */
28203 if (regno <= BX_REG || TARGET_64BIT)
28205 if (!TARGET_PARTIAL_REG_STALL)
28207 return reload_in_progress || reload_completed;
28209 /* We handle both integer and floats in the general purpose registers. */
28210 else if (VALID_INT_MODE_P (mode))
28212 else if (VALID_FP_MODE_P (mode))
28214 else if (VALID_DFP_MODE_P (mode))
28216 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
28217 on to use that value in smaller contexts, this can easily force a
28218 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
28219 supporting DImode, allow it. */
28220 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
28226 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
28227 tieable integer mode. */
28230 ix86_tieable_integer_mode_p (enum machine_mode mode)
28239 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
28242 return TARGET_64BIT;
28249 /* Return true if MODE1 is accessible in a register that can hold MODE2
28250 without copying. That is, all register classes that can hold MODE2
28251 can also hold MODE1. */
28254 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
28256 if (mode1 == mode2)
28259 if (ix86_tieable_integer_mode_p (mode1)
28260 && ix86_tieable_integer_mode_p (mode2))
28263 /* MODE2 being XFmode implies fp stack or general regs, which means we
28264 can tie any smaller floating point modes to it. Note that we do not
28265 tie this with TFmode. */
28266 if (mode2 == XFmode)
28267 return mode1 == SFmode || mode1 == DFmode;
28269 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
28270 that we can tie it with SFmode. */
28271 if (mode2 == DFmode)
28272 return mode1 == SFmode;
28274 /* If MODE2 is only appropriate for an SSE register, then tie with
28275 any other mode acceptable to SSE registers. */
28276 if (GET_MODE_SIZE (mode2) == 16
28277 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
28278 return (GET_MODE_SIZE (mode1) == 16
28279 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
28281 /* If MODE2 is appropriate for an MMX register, then tie
28282 with any other mode acceptable to MMX registers. */
28283 if (GET_MODE_SIZE (mode2) == 8
28284 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
28285 return (GET_MODE_SIZE (mode1) == 8
28286 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
28291 /* Compute a (partial) cost for rtx X. Return true if the complete
28292 cost has been computed, and false if subexpressions should be
28293 scanned. In either case, *TOTAL contains the cost result. */
28296 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
28298 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
28299 enum machine_mode mode = GET_MODE (x);
28300 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
28308 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
28310 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
28312 else if (flag_pic && SYMBOLIC_CONST (x)
28314 || (!GET_CODE (x) != LABEL_REF
28315 && (GET_CODE (x) != SYMBOL_REF
28316 || !SYMBOL_REF_LOCAL_P (x)))))
28323 if (mode == VOIDmode)
28326 switch (standard_80387_constant_p (x))
28331 default: /* Other constants */
28336 /* Start with (MEM (SYMBOL_REF)), since that's where
28337 it'll probably end up. Add a penalty for size. */
28338 *total = (COSTS_N_INSNS (1)
28339 + (flag_pic != 0 && !TARGET_64BIT)
28340 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
28346 /* The zero extensions is often completely free on x86_64, so make
28347 it as cheap as possible. */
28348 if (TARGET_64BIT && mode == DImode
28349 && GET_MODE (XEXP (x, 0)) == SImode)
28351 else if (TARGET_ZERO_EXTEND_WITH_AND)
28352 *total = cost->add;
28354 *total = cost->movzx;
28358 *total = cost->movsx;
28362 if (CONST_INT_P (XEXP (x, 1))
28363 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
28365 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28368 *total = cost->add;
28371 if ((value == 2 || value == 3)
28372 && cost->lea <= cost->shift_const)
28374 *total = cost->lea;
28384 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
28386 if (CONST_INT_P (XEXP (x, 1)))
28388 if (INTVAL (XEXP (x, 1)) > 32)
28389 *total = cost->shift_const + COSTS_N_INSNS (2);
28391 *total = cost->shift_const * 2;
28395 if (GET_CODE (XEXP (x, 1)) == AND)
28396 *total = cost->shift_var * 2;
28398 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
28403 if (CONST_INT_P (XEXP (x, 1)))
28404 *total = cost->shift_const;
28406 *total = cost->shift_var;
28411 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28413 /* ??? SSE scalar cost should be used here. */
28414 *total = cost->fmul;
28417 else if (X87_FLOAT_MODE_P (mode))
28419 *total = cost->fmul;
28422 else if (FLOAT_MODE_P (mode))
28424 /* ??? SSE vector cost should be used here. */
28425 *total = cost->fmul;
28430 rtx op0 = XEXP (x, 0);
28431 rtx op1 = XEXP (x, 1);
28433 if (CONST_INT_P (XEXP (x, 1)))
28435 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
28436 for (nbits = 0; value != 0; value &= value - 1)
28440 /* This is arbitrary. */
28443 /* Compute costs correctly for widening multiplication. */
28444 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
28445 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
28446 == GET_MODE_SIZE (mode))
28448 int is_mulwiden = 0;
28449 enum machine_mode inner_mode = GET_MODE (op0);
28451 if (GET_CODE (op0) == GET_CODE (op1))
28452 is_mulwiden = 1, op1 = XEXP (op1, 0);
28453 else if (CONST_INT_P (op1))
28455 if (GET_CODE (op0) == SIGN_EXTEND)
28456 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
28459 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
28463 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
28466 *total = (cost->mult_init[MODE_INDEX (mode)]
28467 + nbits * cost->mult_bit
28468 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
28477 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28478 /* ??? SSE cost should be used here. */
28479 *total = cost->fdiv;
28480 else if (X87_FLOAT_MODE_P (mode))
28481 *total = cost->fdiv;
28482 else if (FLOAT_MODE_P (mode))
28483 /* ??? SSE vector cost should be used here. */
28484 *total = cost->fdiv;
28486 *total = cost->divide[MODE_INDEX (mode)];
28490 if (GET_MODE_CLASS (mode) == MODE_INT
28491 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
28493 if (GET_CODE (XEXP (x, 0)) == PLUS
28494 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
28495 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
28496 && CONSTANT_P (XEXP (x, 1)))
28498 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
28499 if (val == 2 || val == 4 || val == 8)
28501 *total = cost->lea;
28502 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28503 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
28504 outer_code, speed);
28505 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28509 else if (GET_CODE (XEXP (x, 0)) == MULT
28510 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
28512 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
28513 if (val == 2 || val == 4 || val == 8)
28515 *total = cost->lea;
28516 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28517 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28521 else if (GET_CODE (XEXP (x, 0)) == PLUS)
28523 *total = cost->lea;
28524 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
28525 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
28526 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
28533 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28535 /* ??? SSE cost should be used here. */
28536 *total = cost->fadd;
28539 else if (X87_FLOAT_MODE_P (mode))
28541 *total = cost->fadd;
28544 else if (FLOAT_MODE_P (mode))
28546 /* ??? SSE vector cost should be used here. */
28547 *total = cost->fadd;
28555 if (!TARGET_64BIT && mode == DImode)
28557 *total = (cost->add * 2
28558 + (rtx_cost (XEXP (x, 0), outer_code, speed)
28559 << (GET_MODE (XEXP (x, 0)) != DImode))
28560 + (rtx_cost (XEXP (x, 1), outer_code, speed)
28561 << (GET_MODE (XEXP (x, 1)) != DImode)));
28567 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28569 /* ??? SSE cost should be used here. */
28570 *total = cost->fchs;
28573 else if (X87_FLOAT_MODE_P (mode))
28575 *total = cost->fchs;
28578 else if (FLOAT_MODE_P (mode))
28580 /* ??? SSE vector cost should be used here. */
28581 *total = cost->fchs;
28587 if (!TARGET_64BIT && mode == DImode)
28588 *total = cost->add * 2;
28590 *total = cost->add;
28594 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
28595 && XEXP (XEXP (x, 0), 1) == const1_rtx
28596 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
28597 && XEXP (x, 1) == const0_rtx)
28599 /* This kind of construct is implemented using test[bwl].
28600 Treat it as if we had an AND. */
28601 *total = (cost->add
28602 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
28603 + rtx_cost (const1_rtx, outer_code, speed));
28609 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
28614 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28615 /* ??? SSE cost should be used here. */
28616 *total = cost->fabs;
28617 else if (X87_FLOAT_MODE_P (mode))
28618 *total = cost->fabs;
28619 else if (FLOAT_MODE_P (mode))
28620 /* ??? SSE vector cost should be used here. */
28621 *total = cost->fabs;
28625 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
28626 /* ??? SSE cost should be used here. */
28627 *total = cost->fsqrt;
28628 else if (X87_FLOAT_MODE_P (mode))
28629 *total = cost->fsqrt;
28630 else if (FLOAT_MODE_P (mode))
28631 /* ??? SSE vector cost should be used here. */
28632 *total = cost->fsqrt;
28636 if (XINT (x, 1) == UNSPEC_TP)
28643 case VEC_DUPLICATE:
28644 /* ??? Assume all of these vector manipulation patterns are
28645 recognizable. In which case they all pretty much have the
28647 *total = COSTS_N_INSNS (1);
28657 static int current_machopic_label_num;
28659 /* Given a symbol name and its associated stub, write out the
28660 definition of the stub. */
28663 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28665 unsigned int length;
28666 char *binder_name, *symbol_name, lazy_ptr_name[32];
28667 int label = ++current_machopic_label_num;
28669 /* For 64-bit we shouldn't get here. */
28670 gcc_assert (!TARGET_64BIT);
28672 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28673 symb = targetm.strip_name_encoding (symb);
28675 length = strlen (stub);
28676 binder_name = XALLOCAVEC (char, length + 32);
28677 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
28679 length = strlen (symb);
28680 symbol_name = XALLOCAVEC (char, length + 32);
28681 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28683 sprintf (lazy_ptr_name, "L%d$lz", label);
28685 if (MACHOPIC_ATT_STUB)
28686 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
28687 else if (MACHOPIC_PURE)
28689 if (TARGET_DEEP_BRANCH_PREDICTION)
28690 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
28692 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
28695 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
28697 fprintf (file, "%s:\n", stub);
28698 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28700 if (MACHOPIC_ATT_STUB)
28702 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
28704 else if (MACHOPIC_PURE)
28707 if (TARGET_DEEP_BRANCH_PREDICTION)
28709 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28710 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
28711 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
28712 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
28716 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
28717 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
28718 fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
28720 fprintf (file, "\tjmp\t*%%ecx\n");
28723 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
28725 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
28726 it needs no stub-binding-helper. */
28727 if (MACHOPIC_ATT_STUB)
28730 fprintf (file, "%s:\n", binder_name);
28734 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
28735 fprintf (file, "\tpushl\t%%ecx\n");
28738 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
28740 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
28742 /* N.B. Keep the correspondence of these
28743 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
28744 old-pic/new-pic/non-pic stubs; altering this will break
28745 compatibility with existing dylibs. */
28749 if (TARGET_DEEP_BRANCH_PREDICTION)
28750 /* 25-byte PIC stub using "CALL get_pc_thunk". */
28751 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
28753 /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
28754 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
28757 /* 16-byte -mdynamic-no-pic stub. */
28758 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
28760 fprintf (file, "%s:\n", lazy_ptr_name);
28761 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28762 fprintf (file, ASM_LONG "%s\n", binder_name);
28764 #endif /* TARGET_MACHO */
28766 /* Order the registers for register allocator. */
28769 x86_order_regs_for_local_alloc (void)
28774 /* First allocate the local general purpose registers. */
28775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28776 if (GENERAL_REGNO_P (i) && call_used_regs[i])
28777 reg_alloc_order [pos++] = i;
28779 /* Global general purpose registers. */
28780 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
28781 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
28782 reg_alloc_order [pos++] = i;
28784 /* x87 registers come first in case we are doing FP math
28786 if (!TARGET_SSE_MATH)
28787 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
28788 reg_alloc_order [pos++] = i;
28790 /* SSE registers. */
28791 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
28792 reg_alloc_order [pos++] = i;
28793 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
28794 reg_alloc_order [pos++] = i;
28796 /* x87 registers. */
28797 if (TARGET_SSE_MATH)
28798 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
28799 reg_alloc_order [pos++] = i;
28801 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
28802 reg_alloc_order [pos++] = i;
28804 /* Initialize the rest of array as we do not allocate some registers
28806 while (pos < FIRST_PSEUDO_REGISTER)
28807 reg_alloc_order [pos++] = 0;
28810 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
28811 struct attribute_spec.handler. */
28813 ix86_handle_abi_attribute (tree *node, tree name,
28814 tree args ATTRIBUTE_UNUSED,
28815 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28817 if (TREE_CODE (*node) != FUNCTION_TYPE
28818 && TREE_CODE (*node) != METHOD_TYPE
28819 && TREE_CODE (*node) != FIELD_DECL
28820 && TREE_CODE (*node) != TYPE_DECL)
28822 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28824 *no_add_attrs = true;
28829 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
28831 *no_add_attrs = true;
28835 /* Can combine regparm with all attributes but fastcall. */
28836 if (is_attribute_p ("ms_abi", name))
28838 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
28840 error ("ms_abi and sysv_abi attributes are not compatible");
28845 else if (is_attribute_p ("sysv_abi", name))
28847 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
28849 error ("ms_abi and sysv_abi attributes are not compatible");
28858 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28859 struct attribute_spec.handler. */
28861 ix86_handle_struct_attribute (tree *node, tree name,
28862 tree args ATTRIBUTE_UNUSED,
28863 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28866 if (DECL_P (*node))
28868 if (TREE_CODE (*node) == TYPE_DECL)
28869 type = &TREE_TYPE (*node);
28874 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28875 || TREE_CODE (*type) == UNION_TYPE)))
28877 warning (OPT_Wattributes, "%qE attribute ignored",
28879 *no_add_attrs = true;
28882 else if ((is_attribute_p ("ms_struct", name)
28883 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28884 || ((is_attribute_p ("gcc_struct", name)
28885 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28887 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28889 *no_add_attrs = true;
28896 ix86_handle_fndecl_attribute (tree *node, tree name,
28897 tree args ATTRIBUTE_UNUSED,
28898 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28900 if (TREE_CODE (*node) != FUNCTION_DECL)
28902 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28904 *no_add_attrs = true;
28910 ix86_ms_bitfield_layout_p (const_tree record_type)
28912 return ((TARGET_MS_BITFIELD_LAYOUT
28913 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28914 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
28917 /* Returns an expression indicating where the this parameter is
28918 located on entry to the FUNCTION. */
28921 x86_this_parameter (tree function)
28923 tree type = TREE_TYPE (function);
28924 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
28929 const int *parm_regs;
28931 if (ix86_function_type_abi (type) == MS_ABI)
28932 parm_regs = x86_64_ms_abi_int_parameter_registers;
28934 parm_regs = x86_64_int_parameter_registers;
28935 return gen_rtx_REG (DImode, parm_regs[aggr]);
28938 nregs = ix86_function_regparm (type, function);
28940 if (nregs > 0 && !stdarg_p (type))
28944 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
28945 regno = aggr ? DX_REG : CX_REG;
28946 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
28950 return gen_rtx_MEM (SImode,
28951 plus_constant (stack_pointer_rtx, 4));
28960 return gen_rtx_MEM (SImode,
28961 plus_constant (stack_pointer_rtx, 4));
28964 return gen_rtx_REG (SImode, regno);
28967 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
28970 /* Determine whether x86_output_mi_thunk can succeed. */
28973 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
28974 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
28975 HOST_WIDE_INT vcall_offset, const_tree function)
28977 /* 64-bit can handle anything. */
28981 /* For 32-bit, everything's fine if we have one free register. */
28982 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
28985 /* Need a free register for vcall_offset. */
28989 /* Need a free register for GOT references. */
28990 if (flag_pic && !targetm.binds_local_p (function))
28993 /* Otherwise ok. */
28997 /* Output the assembler code for a thunk function. THUNK_DECL is the
28998 declaration for the thunk function itself, FUNCTION is the decl for
28999 the target function. DELTA is an immediate constant offset to be
29000 added to THIS. If VCALL_OFFSET is nonzero, the word at
29001 *(*this + vcall_offset) should be added to THIS. */
29004 x86_output_mi_thunk (FILE *file,
29005 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
29006 HOST_WIDE_INT vcall_offset, tree function)
29009 rtx this_param = x86_this_parameter (function);
29012 /* Make sure unwind info is emitted for the thunk if needed. */
29013 final_start_function (emit_barrier (), file, 1);
29015 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
29016 pull it in now and let DELTA benefit. */
29017 if (REG_P (this_param))
29018 this_reg = this_param;
29019 else if (vcall_offset)
29021 /* Put the this parameter into %eax. */
29022 xops[0] = this_param;
29023 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
29024 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29027 this_reg = NULL_RTX;
29029 /* Adjust the this parameter by a fixed constant. */
29032 xops[0] = GEN_INT (delta);
29033 xops[1] = this_reg ? this_reg : this_param;
29036 if (!x86_64_general_operand (xops[0], DImode))
29038 tmp = gen_rtx_REG (DImode, R10_REG);
29040 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
29042 xops[1] = this_param;
29044 if (x86_maybe_negate_const_int (&xops[0], DImode))
29045 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
29047 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
29049 else if (x86_maybe_negate_const_int (&xops[0], SImode))
29050 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
29052 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
29055 /* Adjust the this parameter by a value stored in the vtable. */
29059 tmp = gen_rtx_REG (DImode, R10_REG);
29062 int tmp_regno = CX_REG;
29063 if (lookup_attribute ("fastcall",
29064 TYPE_ATTRIBUTES (TREE_TYPE (function)))
29065 || lookup_attribute ("thiscall",
29066 TYPE_ATTRIBUTES (TREE_TYPE (function))))
29067 tmp_regno = AX_REG;
29068 tmp = gen_rtx_REG (SImode, tmp_regno);
29071 xops[0] = gen_rtx_MEM (Pmode, this_reg);
29073 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29075 /* Adjust the this parameter. */
29076 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
29077 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
29079 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
29080 xops[0] = GEN_INT (vcall_offset);
29082 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
29083 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
29085 xops[1] = this_reg;
29086 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
29089 /* If necessary, drop THIS back to its stack slot. */
29090 if (this_reg && this_reg != this_param)
29092 xops[0] = this_reg;
29093 xops[1] = this_param;
29094 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
29097 xops[0] = XEXP (DECL_RTL (function), 0);
29100 if (!flag_pic || targetm.binds_local_p (function))
29101 output_asm_insn ("jmp\t%P0", xops);
29102 /* All thunks should be in the same object as their target,
29103 and thus binds_local_p should be true. */
29104 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
29105 gcc_unreachable ();
29108 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
29109 tmp = gen_rtx_CONST (Pmode, tmp);
29110 tmp = gen_rtx_MEM (QImode, tmp);
29112 output_asm_insn ("jmp\t%A0", xops);
29117 if (!flag_pic || targetm.binds_local_p (function))
29118 output_asm_insn ("jmp\t%P0", xops);
29123 rtx sym_ref = XEXP (DECL_RTL (function), 0);
29124 if (TARGET_MACHO_BRANCH_ISLANDS)
29125 sym_ref = (gen_rtx_SYMBOL_REF
29127 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
29128 tmp = gen_rtx_MEM (QImode, sym_ref);
29130 output_asm_insn ("jmp\t%0", xops);
29133 #endif /* TARGET_MACHO */
29135 tmp = gen_rtx_REG (SImode, CX_REG);
29136 output_set_got (tmp, NULL_RTX);
29139 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
29140 output_asm_insn ("jmp\t{*}%1", xops);
29143 final_end_function ();
29147 x86_file_start (void)
29149 default_file_start ();
29151 darwin_file_start ();
29153 if (X86_FILE_START_VERSION_DIRECTIVE)
29154 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
29155 if (X86_FILE_START_FLTUSED)
29156 fputs ("\t.global\t__fltused\n", asm_out_file);
29157 if (ix86_asm_dialect == ASM_INTEL)
29158 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
29162 x86_field_alignment (tree field, int computed)
29164 enum machine_mode mode;
29165 tree type = TREE_TYPE (field);
29167 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
29169 mode = TYPE_MODE (strip_array_types (type));
29170 if (mode == DFmode || mode == DCmode
29171 || GET_MODE_CLASS (mode) == MODE_INT
29172 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
29173 return MIN (32, computed);
29177 /* Output assembler code to FILE to increment profiler label # LABELNO
29178 for profiling a function entry. */
29180 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
29182 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
29187 #ifndef NO_PROFILE_COUNTERS
29188 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
29191 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
29192 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
29194 fprintf (file, "\tcall\t%s\n", mcount_name);
29198 #ifndef NO_PROFILE_COUNTERS
29199 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
29202 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
29206 #ifndef NO_PROFILE_COUNTERS
29207 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
29210 fprintf (file, "\tcall\t%s\n", mcount_name);
29214 /* We don't have exact information about the insn sizes, but we may assume
29215 quite safely that we are informed about all 1 byte insns and memory
29216 address sizes. This is enough to eliminate unnecessary padding in
29220 min_insn_size (rtx insn)
29224 if (!INSN_P (insn) || !active_insn_p (insn))
29227 /* Discard alignments we've emit and jump instructions. */
29228 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
29229 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
29231 if (JUMP_TABLE_DATA_P (insn))
29234 /* Important case - calls are always 5 bytes.
29235 It is common to have many calls in the row. */
29237 && symbolic_reference_mentioned_p (PATTERN (insn))
29238 && !SIBLING_CALL_P (insn))
29240 len = get_attr_length (insn);
29244 /* For normal instructions we rely on get_attr_length being exact,
29245 with a few exceptions. */
29246 if (!JUMP_P (insn))
29248 enum attr_type type = get_attr_type (insn);
29253 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
29254 || asm_noperands (PATTERN (insn)) >= 0)
29261 /* Otherwise trust get_attr_length. */
29265 l = get_attr_length_address (insn);
29266 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
29275 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29277 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
29281 ix86_avoid_jump_mispredicts (void)
29283 rtx insn, start = get_insns ();
29284 int nbytes = 0, njumps = 0;
29287 /* Look for all minimal intervals of instructions containing 4 jumps.
29288 The intervals are bounded by START and INSN. NBYTES is the total
29289 size of instructions in the interval including INSN and not including
29290 START. When the NBYTES is smaller than 16 bytes, it is possible
29291 that the end of START and INSN ends up in the same 16byte page.
29293 The smallest offset in the page INSN can start is the case where START
29294 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
29295 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
29297 for (insn = start; insn; insn = NEXT_INSN (insn))
29301 if (LABEL_P (insn))
29303 int align = label_to_alignment (insn);
29304 int max_skip = label_to_max_skip (insn);
29308 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
29309 already in the current 16 byte page, because otherwise
29310 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
29311 bytes to reach 16 byte boundary. */
29313 || (align <= 3 && max_skip != (1 << align) - 1))
29316 fprintf (dump_file, "Label %i with max_skip %i\n",
29317 INSN_UID (insn), max_skip);
29320 while (nbytes + max_skip >= 16)
29322 start = NEXT_INSN (start);
29323 if ((JUMP_P (start)
29324 && GET_CODE (PATTERN (start)) != ADDR_VEC
29325 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29327 njumps--, isjump = 1;
29330 nbytes -= min_insn_size (start);
29336 min_size = min_insn_size (insn);
29337 nbytes += min_size;
29339 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
29340 INSN_UID (insn), min_size);
29342 && GET_CODE (PATTERN (insn)) != ADDR_VEC
29343 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
29351 start = NEXT_INSN (start);
29352 if ((JUMP_P (start)
29353 && GET_CODE (PATTERN (start)) != ADDR_VEC
29354 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
29356 njumps--, isjump = 1;
29359 nbytes -= min_insn_size (start);
29361 gcc_assert (njumps >= 0);
29363 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
29364 INSN_UID (start), INSN_UID (insn), nbytes);
29366 if (njumps == 3 && isjump && nbytes < 16)
29368 int padsize = 15 - nbytes + min_insn_size (insn);
29371 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
29372 INSN_UID (insn), padsize);
29373 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
29379 /* AMD Athlon works faster
29380 when RET is not destination of conditional jump or directly preceded
29381 by other jump instruction. We avoid the penalty by inserting NOP just
29382 before the RET instructions in such cases. */
29384 ix86_pad_returns (void)
29389 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29391 basic_block bb = e->src;
29392 rtx ret = BB_END (bb);
29394 bool replace = false;
29396 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
29397 || optimize_bb_for_size_p (bb))
29399 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
29400 if (active_insn_p (prev) || LABEL_P (prev))
29402 if (prev && LABEL_P (prev))
29407 FOR_EACH_EDGE (e, ei, bb->preds)
29408 if (EDGE_FREQUENCY (e) && e->src->index >= 0
29409 && !(e->flags & EDGE_FALLTHRU))
29414 prev = prev_active_insn (ret);
29416 && ((JUMP_P (prev) && any_condjump_p (prev))
29419 /* Empty functions get branch mispredict even when the jump destination
29420 is not visible to us. */
29421 if (!prev && !optimize_function_for_size_p (cfun))
29426 emit_jump_insn_before (gen_return_internal_long (), ret);
29432 /* Count the minimum number of instructions in BB. Return 4 if the
29433 number of instructions >= 4. */
29436 ix86_count_insn_bb (basic_block bb)
29439 int insn_count = 0;
29441 /* Count number of instructions in this block. Return 4 if the number
29442 of instructions >= 4. */
29443 FOR_BB_INSNS (bb, insn)
29445 /* Only happen in exit blocks. */
29447 && GET_CODE (PATTERN (insn)) == RETURN)
29450 if (NONDEBUG_INSN_P (insn)
29451 && GET_CODE (PATTERN (insn)) != USE
29452 && GET_CODE (PATTERN (insn)) != CLOBBER)
29455 if (insn_count >= 4)
29464 /* Count the minimum number of instructions in code path in BB.
29465 Return 4 if the number of instructions >= 4. */
29468 ix86_count_insn (basic_block bb)
29472 int min_prev_count;
29474 /* Only bother counting instructions along paths with no
29475 more than 2 basic blocks between entry and exit. Given
29476 that BB has an edge to exit, determine if a predecessor
29477 of BB has an edge from entry. If so, compute the number
29478 of instructions in the predecessor block. If there
29479 happen to be multiple such blocks, compute the minimum. */
29480 min_prev_count = 4;
29481 FOR_EACH_EDGE (e, ei, bb->preds)
29484 edge_iterator prev_ei;
29486 if (e->src == ENTRY_BLOCK_PTR)
29488 min_prev_count = 0;
29491 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
29493 if (prev_e->src == ENTRY_BLOCK_PTR)
29495 int count = ix86_count_insn_bb (e->src);
29496 if (count < min_prev_count)
29497 min_prev_count = count;
29503 if (min_prev_count < 4)
29504 min_prev_count += ix86_count_insn_bb (bb);
29506 return min_prev_count;
29509 /* Pad short funtion to 4 instructions. */
29512 ix86_pad_short_function (void)
29517 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
29519 rtx ret = BB_END (e->src);
29520 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
29522 int insn_count = ix86_count_insn (e->src);
29524 /* Pad short function. */
29525 if (insn_count < 4)
29529 /* Find epilogue. */
29532 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
29533 insn = PREV_INSN (insn);
29538 /* Two NOPs are counted as one instruction. */
29539 insn_count = 2 * (4 - insn_count);
29540 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
29546 /* Implement machine specific optimizations. We implement padding of returns
29547 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
29551 if (optimize && optimize_function_for_speed_p (cfun))
29553 if (TARGET_PAD_SHORT_FUNCTION)
29554 ix86_pad_short_function ();
29555 else if (TARGET_PAD_RETURNS)
29556 ix86_pad_returns ();
29557 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
29558 if (TARGET_FOUR_JUMP_LIMIT)
29559 ix86_avoid_jump_mispredicts ();
29563 /* Run the vzeroupper optimization if needed. */
29564 if (cfun->machine->use_vzeroupper_p)
29565 move_or_delete_vzeroupper ();
29568 /* Return nonzero when QImode register that must be represented via REX prefix
29571 x86_extended_QIreg_mentioned_p (rtx insn)
29574 extract_insn_cached (insn);
29575 for (i = 0; i < recog_data.n_operands; i++)
29576 if (REG_P (recog_data.operand[i])
29577 && REGNO (recog_data.operand[i]) > BX_REG)
29582 /* Return nonzero when P points to register encoded via REX prefix.
29583 Called via for_each_rtx. */
29585 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
29587 unsigned int regno;
29590 regno = REGNO (*p);
29591 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
29594 /* Return true when INSN mentions register that must be encoded using REX
29597 x86_extended_reg_mentioned_p (rtx insn)
29599 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
29600 extended_reg_mentioned_1, NULL);
29603 /* If profitable, negate (without causing overflow) integer constant
29604 of mode MODE at location LOC. Return true in this case. */
29606 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
29610 if (!CONST_INT_P (*loc))
29616 /* DImode x86_64 constants must fit in 32 bits. */
29617 gcc_assert (x86_64_immediate_operand (*loc, mode));
29628 gcc_unreachable ();
29631 /* Avoid overflows. */
29632 if (mode_signbit_p (mode, *loc))
29635 val = INTVAL (*loc);
29637 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
29638 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
29639 if ((val < 0 && val != -128)
29642 *loc = GEN_INT (-val);
29649 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
29650 optabs would emit if we didn't have TFmode patterns. */
29653 x86_emit_floatuns (rtx operands[2])
29655 rtx neglab, donelab, i0, i1, f0, in, out;
29656 enum machine_mode mode, inmode;
29658 inmode = GET_MODE (operands[1]);
29659 gcc_assert (inmode == SImode || inmode == DImode);
29662 in = force_reg (inmode, operands[1]);
29663 mode = GET_MODE (out);
29664 neglab = gen_label_rtx ();
29665 donelab = gen_label_rtx ();
29666 f0 = gen_reg_rtx (mode);
29668 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
29670 expand_float (out, in, 0);
29672 emit_jump_insn (gen_jump (donelab));
29675 emit_label (neglab);
29677 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
29679 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
29681 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
29683 expand_float (f0, i0, 0);
29685 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
29687 emit_label (donelab);
29690 /* AVX does not support 32-byte integer vector operations,
29691 thus the longest vector we are faced with is V16QImode. */
29692 #define MAX_VECT_LEN 16
29694 struct expand_vec_perm_d
29696 rtx target, op0, op1;
29697 unsigned char perm[MAX_VECT_LEN];
29698 enum machine_mode vmode;
29699 unsigned char nelt;
29703 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
29704 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
29706 /* Get a vector mode of the same size as the original but with elements
29707 twice as wide. This is only guaranteed to apply to integral vectors. */
29709 static inline enum machine_mode
29710 get_mode_wider_vector (enum machine_mode o)
29712 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
29713 enum machine_mode n = GET_MODE_WIDER_MODE (o);
29714 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
29715 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
29719 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29720 with all elements equal to VAR. Return true if successful. */
29723 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
29724 rtx target, rtx val)
29747 /* First attempt to recognize VAL as-is. */
29748 dup = gen_rtx_VEC_DUPLICATE (mode, val);
29749 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
29750 if (recog_memoized (insn) < 0)
29753 /* If that fails, force VAL into a register. */
29756 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
29757 seq = get_insns ();
29760 emit_insn_before (seq, insn);
29762 ok = recog_memoized (insn) >= 0;
29771 if (TARGET_SSE || TARGET_3DNOW_A)
29775 val = gen_lowpart (SImode, val);
29776 x = gen_rtx_TRUNCATE (HImode, val);
29777 x = gen_rtx_VEC_DUPLICATE (mode, x);
29778 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29791 struct expand_vec_perm_d dperm;
29795 memset (&dperm, 0, sizeof (dperm));
29796 dperm.target = target;
29797 dperm.vmode = mode;
29798 dperm.nelt = GET_MODE_NUNITS (mode);
29799 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
29801 /* Extend to SImode using a paradoxical SUBREG. */
29802 tmp1 = gen_reg_rtx (SImode);
29803 emit_move_insn (tmp1, gen_lowpart (SImode, val));
29805 /* Insert the SImode value as low element of a V4SImode vector. */
29806 tmp2 = gen_lowpart (V4SImode, dperm.op0);
29807 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
29809 ok = (expand_vec_perm_1 (&dperm)
29810 || expand_vec_perm_broadcast_1 (&dperm));
29822 /* Replicate the value once into the next wider mode and recurse. */
29824 enum machine_mode smode, wsmode, wvmode;
29827 smode = GET_MODE_INNER (mode);
29828 wvmode = get_mode_wider_vector (mode);
29829 wsmode = GET_MODE_INNER (wvmode);
29831 val = convert_modes (wsmode, smode, val, true);
29832 x = expand_simple_binop (wsmode, ASHIFT, val,
29833 GEN_INT (GET_MODE_BITSIZE (smode)),
29834 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29835 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
29837 x = gen_lowpart (wvmode, target);
29838 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
29846 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
29847 rtx x = gen_reg_rtx (hvmode);
29849 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
29852 x = gen_rtx_VEC_CONCAT (mode, x, x);
29853 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29862 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
29863 whose ONE_VAR element is VAR, and other elements are zero. Return true
29867 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
29868 rtx target, rtx var, int one_var)
29870 enum machine_mode vsimode;
29873 bool use_vector_set = false;
29878 /* For SSE4.1, we normally use vector set. But if the second
29879 element is zero and inter-unit moves are OK, we use movq
29881 use_vector_set = (TARGET_64BIT
29883 && !(TARGET_INTER_UNIT_MOVES
29889 use_vector_set = TARGET_SSE4_1;
29892 use_vector_set = TARGET_SSE2;
29895 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
29902 use_vector_set = TARGET_AVX;
29905 /* Use ix86_expand_vector_set in 64bit mode only. */
29906 use_vector_set = TARGET_AVX && TARGET_64BIT;
29912 if (use_vector_set)
29914 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
29915 var = force_reg (GET_MODE_INNER (mode), var);
29916 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29932 var = force_reg (GET_MODE_INNER (mode), var);
29933 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
29934 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29939 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
29940 new_target = gen_reg_rtx (mode);
29942 new_target = target;
29943 var = force_reg (GET_MODE_INNER (mode), var);
29944 x = gen_rtx_VEC_DUPLICATE (mode, var);
29945 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
29946 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
29949 /* We need to shuffle the value to the correct position, so
29950 create a new pseudo to store the intermediate result. */
29952 /* With SSE2, we can use the integer shuffle insns. */
29953 if (mode != V4SFmode && TARGET_SSE2)
29955 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
29957 GEN_INT (one_var == 1 ? 0 : 1),
29958 GEN_INT (one_var == 2 ? 0 : 1),
29959 GEN_INT (one_var == 3 ? 0 : 1)));
29960 if (target != new_target)
29961 emit_move_insn (target, new_target);
29965 /* Otherwise convert the intermediate result to V4SFmode and
29966 use the SSE1 shuffle instructions. */
29967 if (mode != V4SFmode)
29969 tmp = gen_reg_rtx (V4SFmode);
29970 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
29975 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
29977 GEN_INT (one_var == 1 ? 0 : 1),
29978 GEN_INT (one_var == 2 ? 0+4 : 1+4),
29979 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
29981 if (mode != V4SFmode)
29982 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
29983 else if (tmp != target)
29984 emit_move_insn (target, tmp);
29986 else if (target != new_target)
29987 emit_move_insn (target, new_target);
29992 vsimode = V4SImode;
29998 vsimode = V2SImode;
30004 /* Zero extend the variable element to SImode and recurse. */
30005 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
30007 x = gen_reg_rtx (vsimode);
30008 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
30010 gcc_unreachable ();
30012 emit_move_insn (target, gen_lowpart (mode, x));
30020 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
30021 consisting of the values in VALS. It is known that all elements
30022 except ONE_VAR are constants. Return true if successful. */
30025 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
30026 rtx target, rtx vals, int one_var)
30028 rtx var = XVECEXP (vals, 0, one_var);
30029 enum machine_mode wmode;
30032 const_vec = copy_rtx (vals);
30033 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
30034 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
30042 /* For the two element vectors, it's just as easy to use
30043 the general case. */
30047 /* Use ix86_expand_vector_set in 64bit mode only. */
30070 /* There's no way to set one QImode entry easily. Combine
30071 the variable value with its adjacent constant value, and
30072 promote to an HImode set. */
30073 x = XVECEXP (vals, 0, one_var ^ 1);
30076 var = convert_modes (HImode, QImode, var, true);
30077 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
30078 NULL_RTX, 1, OPTAB_LIB_WIDEN);
30079 x = GEN_INT (INTVAL (x) & 0xff);
30083 var = convert_modes (HImode, QImode, var, true);
30084 x = gen_int_mode (INTVAL (x) << 8, HImode);
30086 if (x != const0_rtx)
30087 var = expand_simple_binop (HImode, IOR, var, x, var,
30088 1, OPTAB_LIB_WIDEN);
30090 x = gen_reg_rtx (wmode);
30091 emit_move_insn (x, gen_lowpart (wmode, const_vec));
30092 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
30094 emit_move_insn (target, gen_lowpart (mode, x));
30101 emit_move_insn (target, const_vec);
30102 ix86_expand_vector_set (mmx_ok, target, var, one_var);
30106 /* A subroutine of ix86_expand_vector_init_general. Use vector
30107 concatenate to handle the most general case: all values variable,
30108 and none identical. */
30111 ix86_expand_vector_init_concat (enum machine_mode mode,
30112 rtx target, rtx *ops, int n)
30114 enum machine_mode cmode, hmode = VOIDmode;
30115 rtx first[8], second[4];
30155 gcc_unreachable ();
30158 if (!register_operand (ops[1], cmode))
30159 ops[1] = force_reg (cmode, ops[1]);
30160 if (!register_operand (ops[0], cmode))
30161 ops[0] = force_reg (cmode, ops[0]);
30162 emit_insn (gen_rtx_SET (VOIDmode, target,
30163 gen_rtx_VEC_CONCAT (mode, ops[0],
30183 gcc_unreachable ();
30199 gcc_unreachable ();
30204 /* FIXME: We process inputs backward to help RA. PR 36222. */
30207 for (; i > 0; i -= 2, j--)
30209 first[j] = gen_reg_rtx (cmode);
30210 v = gen_rtvec (2, ops[i - 1], ops[i]);
30211 ix86_expand_vector_init (false, first[j],
30212 gen_rtx_PARALLEL (cmode, v));
30218 gcc_assert (hmode != VOIDmode);
30219 for (i = j = 0; i < n; i += 2, j++)
30221 second[j] = gen_reg_rtx (hmode);
30222 ix86_expand_vector_init_concat (hmode, second [j],
30226 ix86_expand_vector_init_concat (mode, target, second, n);
30229 ix86_expand_vector_init_concat (mode, target, first, n);
30233 gcc_unreachable ();
30237 /* A subroutine of ix86_expand_vector_init_general. Use vector
30238 interleave to handle the most general case: all values variable,
30239 and none identical. */
30242 ix86_expand_vector_init_interleave (enum machine_mode mode,
30243 rtx target, rtx *ops, int n)
30245 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
30248 rtx (*gen_load_even) (rtx, rtx, rtx);
30249 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
30250 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
30255 gen_load_even = gen_vec_setv8hi;
30256 gen_interleave_first_low = gen_vec_interleave_lowv4si;
30257 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30258 inner_mode = HImode;
30259 first_imode = V4SImode;
30260 second_imode = V2DImode;
30261 third_imode = VOIDmode;
30264 gen_load_even = gen_vec_setv16qi;
30265 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
30266 gen_interleave_second_low = gen_vec_interleave_lowv4si;
30267 inner_mode = QImode;
30268 first_imode = V8HImode;
30269 second_imode = V4SImode;
30270 third_imode = V2DImode;
30273 gcc_unreachable ();
30276 for (i = 0; i < n; i++)
30278 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
30279 op0 = gen_reg_rtx (SImode);
30280 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
30282 /* Insert the SImode value as low element of V4SImode vector. */
30283 op1 = gen_reg_rtx (V4SImode);
30284 op0 = gen_rtx_VEC_MERGE (V4SImode,
30285 gen_rtx_VEC_DUPLICATE (V4SImode,
30287 CONST0_RTX (V4SImode),
30289 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
30291 /* Cast the V4SImode vector back to a vector in orignal mode. */
30292 op0 = gen_reg_rtx (mode);
30293 emit_move_insn (op0, gen_lowpart (mode, op1));
30295 /* Load even elements into the second positon. */
30296 emit_insn (gen_load_even (op0,
30297 force_reg (inner_mode,
30301 /* Cast vector to FIRST_IMODE vector. */
30302 ops[i] = gen_reg_rtx (first_imode);
30303 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
30306 /* Interleave low FIRST_IMODE vectors. */
30307 for (i = j = 0; i < n; i += 2, j++)
30309 op0 = gen_reg_rtx (first_imode);
30310 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
30312 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
30313 ops[j] = gen_reg_rtx (second_imode);
30314 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
30317 /* Interleave low SECOND_IMODE vectors. */
30318 switch (second_imode)
30321 for (i = j = 0; i < n / 2; i += 2, j++)
30323 op0 = gen_reg_rtx (second_imode);
30324 emit_insn (gen_interleave_second_low (op0, ops[i],
30327 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
30329 ops[j] = gen_reg_rtx (third_imode);
30330 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
30332 second_imode = V2DImode;
30333 gen_interleave_second_low = gen_vec_interleave_lowv2di;
30337 op0 = gen_reg_rtx (second_imode);
30338 emit_insn (gen_interleave_second_low (op0, ops[0],
30341 /* Cast the SECOND_IMODE vector back to a vector on original
30343 emit_insn (gen_rtx_SET (VOIDmode, target,
30344 gen_lowpart (mode, op0)));
30348 gcc_unreachable ();
30352 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
30353 all values variable, and none identical. */
30356 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
30357 rtx target, rtx vals)
30359 rtx ops[32], op0, op1;
30360 enum machine_mode half_mode = VOIDmode;
30367 if (!mmx_ok && !TARGET_SSE)
30379 n = GET_MODE_NUNITS (mode);
30380 for (i = 0; i < n; i++)
30381 ops[i] = XVECEXP (vals, 0, i);
30382 ix86_expand_vector_init_concat (mode, target, ops, n);
30386 half_mode = V16QImode;
30390 half_mode = V8HImode;
30394 n = GET_MODE_NUNITS (mode);
30395 for (i = 0; i < n; i++)
30396 ops[i] = XVECEXP (vals, 0, i);
30397 op0 = gen_reg_rtx (half_mode);
30398 op1 = gen_reg_rtx (half_mode);
30399 ix86_expand_vector_init_interleave (half_mode, op0, ops,
30401 ix86_expand_vector_init_interleave (half_mode, op1,
30402 &ops [n >> 1], n >> 2);
30403 emit_insn (gen_rtx_SET (VOIDmode, target,
30404 gen_rtx_VEC_CONCAT (mode, op0, op1)));
30408 if (!TARGET_SSE4_1)
30416 /* Don't use ix86_expand_vector_init_interleave if we can't
30417 move from GPR to SSE register directly. */
30418 if (!TARGET_INTER_UNIT_MOVES)
30421 n = GET_MODE_NUNITS (mode);
30422 for (i = 0; i < n; i++)
30423 ops[i] = XVECEXP (vals, 0, i);
30424 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
30432 gcc_unreachable ();
30436 int i, j, n_elts, n_words, n_elt_per_word;
30437 enum machine_mode inner_mode;
30438 rtx words[4], shift;
30440 inner_mode = GET_MODE_INNER (mode);
30441 n_elts = GET_MODE_NUNITS (mode);
30442 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
30443 n_elt_per_word = n_elts / n_words;
30444 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
30446 for (i = 0; i < n_words; ++i)
30448 rtx word = NULL_RTX;
30450 for (j = 0; j < n_elt_per_word; ++j)
30452 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
30453 elt = convert_modes (word_mode, inner_mode, elt, true);
30459 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
30460 word, 1, OPTAB_LIB_WIDEN);
30461 word = expand_simple_binop (word_mode, IOR, word, elt,
30462 word, 1, OPTAB_LIB_WIDEN);
30470 emit_move_insn (target, gen_lowpart (mode, words[0]));
30471 else if (n_words == 2)
30473 rtx tmp = gen_reg_rtx (mode);
30474 emit_clobber (tmp);
30475 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
30476 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
30477 emit_move_insn (target, tmp);
30479 else if (n_words == 4)
30481 rtx tmp = gen_reg_rtx (V4SImode);
30482 gcc_assert (word_mode == SImode);
30483 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
30484 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
30485 emit_move_insn (target, gen_lowpart (mode, tmp));
30488 gcc_unreachable ();
30492 /* Initialize vector TARGET via VALS. Suppress the use of MMX
30493 instructions unless MMX_OK is true. */
30496 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
30498 enum machine_mode mode = GET_MODE (target);
30499 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30500 int n_elts = GET_MODE_NUNITS (mode);
30501 int n_var = 0, one_var = -1;
30502 bool all_same = true, all_const_zero = true;
30506 for (i = 0; i < n_elts; ++i)
30508 x = XVECEXP (vals, 0, i);
30509 if (!(CONST_INT_P (x)
30510 || GET_CODE (x) == CONST_DOUBLE
30511 || GET_CODE (x) == CONST_FIXED))
30512 n_var++, one_var = i;
30513 else if (x != CONST0_RTX (inner_mode))
30514 all_const_zero = false;
30515 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
30519 /* Constants are best loaded from the constant pool. */
30522 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
30526 /* If all values are identical, broadcast the value. */
30528 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
30529 XVECEXP (vals, 0, 0)))
30532 /* Values where only one field is non-constant are best loaded from
30533 the pool and overwritten via move later. */
30537 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
30538 XVECEXP (vals, 0, one_var),
30542 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
30546 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
30550 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
30552 enum machine_mode mode = GET_MODE (target);
30553 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30554 enum machine_mode half_mode;
30555 bool use_vec_merge = false;
30557 static rtx (*gen_extract[6][2]) (rtx, rtx)
30559 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
30560 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
30561 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
30562 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
30563 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
30564 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
30566 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
30568 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
30569 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
30570 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
30571 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
30572 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
30573 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
30583 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
30584 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
30586 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
30588 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
30589 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30595 use_vec_merge = TARGET_SSE4_1;
30603 /* For the two element vectors, we implement a VEC_CONCAT with
30604 the extraction of the other element. */
30606 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
30607 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
30610 op0 = val, op1 = tmp;
30612 op0 = tmp, op1 = val;
30614 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
30615 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30620 use_vec_merge = TARGET_SSE4_1;
30627 use_vec_merge = true;
30631 /* tmp = target = A B C D */
30632 tmp = copy_to_reg (target);
30633 /* target = A A B B */
30634 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
30635 /* target = X A B B */
30636 ix86_expand_vector_set (false, target, val, 0);
30637 /* target = A X C D */
30638 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30639 const1_rtx, const0_rtx,
30640 GEN_INT (2+4), GEN_INT (3+4)));
30644 /* tmp = target = A B C D */
30645 tmp = copy_to_reg (target);
30646 /* tmp = X B C D */
30647 ix86_expand_vector_set (false, tmp, val, 0);
30648 /* target = A B X D */
30649 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30650 const0_rtx, const1_rtx,
30651 GEN_INT (0+4), GEN_INT (3+4)));
30655 /* tmp = target = A B C D */
30656 tmp = copy_to_reg (target);
30657 /* tmp = X B C D */
30658 ix86_expand_vector_set (false, tmp, val, 0);
30659 /* target = A B X D */
30660 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
30661 const0_rtx, const1_rtx,
30662 GEN_INT (2+4), GEN_INT (0+4)));
30666 gcc_unreachable ();
30671 use_vec_merge = TARGET_SSE4_1;
30675 /* Element 0 handled by vec_merge below. */
30678 use_vec_merge = true;
30684 /* With SSE2, use integer shuffles to swap element 0 and ELT,
30685 store into element 0, then shuffle them back. */
30689 order[0] = GEN_INT (elt);
30690 order[1] = const1_rtx;
30691 order[2] = const2_rtx;
30692 order[3] = GEN_INT (3);
30693 order[elt] = const0_rtx;
30695 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30696 order[1], order[2], order[3]));
30698 ix86_expand_vector_set (false, target, val, 0);
30700 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
30701 order[1], order[2], order[3]));
30705 /* For SSE1, we have to reuse the V4SF code. */
30706 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
30707 gen_lowpart (SFmode, val), elt);
30712 use_vec_merge = TARGET_SSE2;
30715 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30719 use_vec_merge = TARGET_SSE4_1;
30726 half_mode = V16QImode;
30732 half_mode = V8HImode;
30738 half_mode = V4SImode;
30744 half_mode = V2DImode;
30750 half_mode = V4SFmode;
30756 half_mode = V2DFmode;
30762 /* Compute offset. */
30766 gcc_assert (i <= 1);
30768 /* Extract the half. */
30769 tmp = gen_reg_rtx (half_mode);
30770 emit_insn (gen_extract[j][i] (tmp, target));
30772 /* Put val in tmp at elt. */
30773 ix86_expand_vector_set (false, tmp, val, elt);
30776 emit_insn (gen_insert[j][i] (target, target, tmp));
30785 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
30786 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
30787 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30791 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
30793 emit_move_insn (mem, target);
30795 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
30796 emit_move_insn (tmp, val);
30798 emit_move_insn (target, mem);
30803 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
30805 enum machine_mode mode = GET_MODE (vec);
30806 enum machine_mode inner_mode = GET_MODE_INNER (mode);
30807 bool use_vec_extr = false;
30820 use_vec_extr = true;
30824 use_vec_extr = TARGET_SSE4_1;
30836 tmp = gen_reg_rtx (mode);
30837 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
30838 GEN_INT (elt), GEN_INT (elt),
30839 GEN_INT (elt+4), GEN_INT (elt+4)));
30843 tmp = gen_reg_rtx (mode);
30844 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
30848 gcc_unreachable ();
30851 use_vec_extr = true;
30856 use_vec_extr = TARGET_SSE4_1;
30870 tmp = gen_reg_rtx (mode);
30871 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
30872 GEN_INT (elt), GEN_INT (elt),
30873 GEN_INT (elt), GEN_INT (elt)));
30877 tmp = gen_reg_rtx (mode);
30878 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
30882 gcc_unreachable ();
30885 use_vec_extr = true;
30890 /* For SSE1, we have to reuse the V4SF code. */
30891 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
30892 gen_lowpart (V4SFmode, vec), elt);
30898 use_vec_extr = TARGET_SSE2;
30901 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
30905 use_vec_extr = TARGET_SSE4_1;
30909 /* ??? Could extract the appropriate HImode element and shift. */
30916 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
30917 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
30919 /* Let the rtl optimizers know about the zero extension performed. */
30920 if (inner_mode == QImode || inner_mode == HImode)
30922 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
30923 target = gen_lowpart (SImode, target);
30926 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
30930 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
30932 emit_move_insn (mem, vec);
30934 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
30935 emit_move_insn (target, tmp);
30939 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
30940 pattern to reduce; DEST is the destination; IN is the input vector. */
30943 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
30945 rtx tmp1, tmp2, tmp3;
30947 tmp1 = gen_reg_rtx (V4SFmode);
30948 tmp2 = gen_reg_rtx (V4SFmode);
30949 tmp3 = gen_reg_rtx (V4SFmode);
30951 emit_insn (gen_sse_movhlps (tmp1, in, in));
30952 emit_insn (fn (tmp2, tmp1, in));
30954 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
30955 const1_rtx, const1_rtx,
30956 GEN_INT (1+4), GEN_INT (1+4)));
30957 emit_insn (fn (dest, tmp2, tmp3));
30960 /* Target hook for scalar_mode_supported_p. */
30962 ix86_scalar_mode_supported_p (enum machine_mode mode)
30964 if (DECIMAL_FLOAT_MODE_P (mode))
30965 return default_decimal_float_supported_p ();
30966 else if (mode == TFmode)
30969 return default_scalar_mode_supported_p (mode);
30972 /* Implements target hook vector_mode_supported_p. */
30974 ix86_vector_mode_supported_p (enum machine_mode mode)
30976 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
30978 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
30980 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
30982 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
30984 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
30989 /* Target hook for c_mode_for_suffix. */
30990 static enum machine_mode
30991 ix86_c_mode_for_suffix (char suffix)
31001 /* Worker function for TARGET_MD_ASM_CLOBBERS.
31003 We do this in the new i386 backend to maintain source compatibility
31004 with the old cc0-based compiler. */
31007 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
31008 tree inputs ATTRIBUTE_UNUSED,
31011 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
31013 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
31018 /* Implements target vector targetm.asm.encode_section_info. This
31019 is not used by netware. */
31021 static void ATTRIBUTE_UNUSED
31022 ix86_encode_section_info (tree decl, rtx rtl, int first)
31024 default_encode_section_info (decl, rtl, first);
31026 if (TREE_CODE (decl) == VAR_DECL
31027 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
31028 && ix86_in_large_data_p (decl))
31029 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
31032 /* Worker function for REVERSE_CONDITION. */
31035 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
31037 return (mode != CCFPmode && mode != CCFPUmode
31038 ? reverse_condition (code)
31039 : reverse_condition_maybe_unordered (code));
31042 /* Output code to perform an x87 FP register move, from OPERANDS[1]
31046 output_387_reg_move (rtx insn, rtx *operands)
31048 if (REG_P (operands[0]))
31050 if (REG_P (operands[1])
31051 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31053 if (REGNO (operands[0]) == FIRST_STACK_REG)
31054 return output_387_ffreep (operands, 0);
31055 return "fstp\t%y0";
31057 if (STACK_TOP_P (operands[0]))
31058 return "fld%Z1\t%y1";
31061 else if (MEM_P (operands[0]))
31063 gcc_assert (REG_P (operands[1]));
31064 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
31065 return "fstp%Z0\t%y0";
31068 /* There is no non-popping store to memory for XFmode.
31069 So if we need one, follow the store with a load. */
31070 if (GET_MODE (operands[0]) == XFmode)
31071 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
31073 return "fst%Z0\t%y0";
31080 /* Output code to perform a conditional jump to LABEL, if C2 flag in
31081 FP status register is set. */
31084 ix86_emit_fp_unordered_jump (rtx label)
31086 rtx reg = gen_reg_rtx (HImode);
31089 emit_insn (gen_x86_fnstsw_1 (reg));
31091 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
31093 emit_insn (gen_x86_sahf_1 (reg));
31095 temp = gen_rtx_REG (CCmode, FLAGS_REG);
31096 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
31100 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
31102 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
31103 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
31106 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
31107 gen_rtx_LABEL_REF (VOIDmode, label),
31109 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
31111 emit_jump_insn (temp);
31112 predict_jump (REG_BR_PROB_BASE * 10 / 100);
31115 /* Output code to perform a log1p XFmode calculation. */
31117 void ix86_emit_i387_log1p (rtx op0, rtx op1)
31119 rtx label1 = gen_label_rtx ();
31120 rtx label2 = gen_label_rtx ();
31122 rtx tmp = gen_reg_rtx (XFmode);
31123 rtx tmp2 = gen_reg_rtx (XFmode);
31126 emit_insn (gen_absxf2 (tmp, op1));
31127 test = gen_rtx_GE (VOIDmode, tmp,
31128 CONST_DOUBLE_FROM_REAL_VALUE (
31129 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
31131 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
31133 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31134 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
31135 emit_jump (label2);
31137 emit_label (label1);
31138 emit_move_insn (tmp, CONST1_RTX (XFmode));
31139 emit_insn (gen_addxf3 (tmp, op1, tmp));
31140 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
31141 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
31143 emit_label (label2);
31146 /* Output code to perform a Newton-Rhapson approximation of a single precision
31147 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
31149 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
31151 rtx x0, x1, e0, e1, two;
31153 x0 = gen_reg_rtx (mode);
31154 e0 = gen_reg_rtx (mode);
31155 e1 = gen_reg_rtx (mode);
31156 x1 = gen_reg_rtx (mode);
31158 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
31160 if (VECTOR_MODE_P (mode))
31161 two = ix86_build_const_vector (mode, true, two);
31163 two = force_reg (mode, two);
31165 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
31167 /* x0 = rcp(b) estimate */
31168 emit_insn (gen_rtx_SET (VOIDmode, x0,
31169 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
31172 emit_insn (gen_rtx_SET (VOIDmode, e0,
31173 gen_rtx_MULT (mode, x0, a)));
31175 emit_insn (gen_rtx_SET (VOIDmode, e1,
31176 gen_rtx_MULT (mode, x0, b)));
31178 emit_insn (gen_rtx_SET (VOIDmode, x1,
31179 gen_rtx_MINUS (mode, two, e1)));
31180 /* res = e0 * x1 */
31181 emit_insn (gen_rtx_SET (VOIDmode, res,
31182 gen_rtx_MULT (mode, e0, x1)));
31185 /* Output code to perform a Newton-Rhapson approximation of a
31186 single precision floating point [reciprocal] square root. */
31188 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
31191 rtx x0, e0, e1, e2, e3, mthree, mhalf;
31194 x0 = gen_reg_rtx (mode);
31195 e0 = gen_reg_rtx (mode);
31196 e1 = gen_reg_rtx (mode);
31197 e2 = gen_reg_rtx (mode);
31198 e3 = gen_reg_rtx (mode);
31200 real_from_integer (&r, VOIDmode, -3, -1, 0);
31201 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31203 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
31204 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
31206 if (VECTOR_MODE_P (mode))
31208 mthree = ix86_build_const_vector (mode, true, mthree);
31209 mhalf = ix86_build_const_vector (mode, true, mhalf);
31212 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
31213 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
31215 /* x0 = rsqrt(a) estimate */
31216 emit_insn (gen_rtx_SET (VOIDmode, x0,
31217 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
31220 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
31225 zero = gen_reg_rtx (mode);
31226 mask = gen_reg_rtx (mode);
31228 zero = force_reg (mode, CONST0_RTX(mode));
31229 emit_insn (gen_rtx_SET (VOIDmode, mask,
31230 gen_rtx_NE (mode, zero, a)));
31232 emit_insn (gen_rtx_SET (VOIDmode, x0,
31233 gen_rtx_AND (mode, x0, mask)));
31237 emit_insn (gen_rtx_SET (VOIDmode, e0,
31238 gen_rtx_MULT (mode, x0, a)));
31240 emit_insn (gen_rtx_SET (VOIDmode, e1,
31241 gen_rtx_MULT (mode, e0, x0)));
31244 mthree = force_reg (mode, mthree);
31245 emit_insn (gen_rtx_SET (VOIDmode, e2,
31246 gen_rtx_PLUS (mode, e1, mthree)));
31248 mhalf = force_reg (mode, mhalf);
31250 /* e3 = -.5 * x0 */
31251 emit_insn (gen_rtx_SET (VOIDmode, e3,
31252 gen_rtx_MULT (mode, x0, mhalf)));
31254 /* e3 = -.5 * e0 */
31255 emit_insn (gen_rtx_SET (VOIDmode, e3,
31256 gen_rtx_MULT (mode, e0, mhalf)));
31257 /* ret = e2 * e3 */
31258 emit_insn (gen_rtx_SET (VOIDmode, res,
31259 gen_rtx_MULT (mode, e2, e3)));
31262 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
31264 static void ATTRIBUTE_UNUSED
31265 i386_solaris_elf_named_section (const char *name, unsigned int flags,
31268 /* With Binutils 2.15, the "@unwind" marker must be specified on
31269 every occurrence of the ".eh_frame" section, not just the first
31272 && strcmp (name, ".eh_frame") == 0)
31274 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
31275 flags & SECTION_WRITE ? "aw" : "a");
31278 default_elf_asm_named_section (name, flags, decl);
31281 /* Return the mangling of TYPE if it is an extended fundamental type. */
31283 static const char *
31284 ix86_mangle_type (const_tree type)
31286 type = TYPE_MAIN_VARIANT (type);
31288 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
31289 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
31292 switch (TYPE_MODE (type))
31295 /* __float128 is "g". */
31298 /* "long double" or __float80 is "e". */
31305 /* For 32-bit code we can save PIC register setup by using
31306 __stack_chk_fail_local hidden function instead of calling
31307 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
31308 register, so it is better to call __stack_chk_fail directly. */
31311 ix86_stack_protect_fail (void)
31313 return TARGET_64BIT
31314 ? default_external_stack_protect_fail ()
31315 : default_hidden_stack_protect_fail ();
31318 /* Select a format to encode pointers in exception handling data. CODE
31319 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
31320 true if the symbol may be affected by dynamic relocations.
31322 ??? All x86 object file formats are capable of representing this.
31323 After all, the relocation needed is the same as for the call insn.
31324 Whether or not a particular assembler allows us to enter such, I
31325 guess we'll have to see. */
31327 asm_preferred_eh_data_format (int code, int global)
31331 int type = DW_EH_PE_sdata8;
31333 || ix86_cmodel == CM_SMALL_PIC
31334 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
31335 type = DW_EH_PE_sdata4;
31336 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
31338 if (ix86_cmodel == CM_SMALL
31339 || (ix86_cmodel == CM_MEDIUM && code))
31340 return DW_EH_PE_udata4;
31341 return DW_EH_PE_absptr;
31344 /* Expand copysign from SIGN to the positive value ABS_VALUE
31345 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
31348 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
31350 enum machine_mode mode = GET_MODE (sign);
31351 rtx sgn = gen_reg_rtx (mode);
31352 if (mask == NULL_RTX)
31354 enum machine_mode vmode;
31356 if (mode == SFmode)
31358 else if (mode == DFmode)
31363 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
31364 if (!VECTOR_MODE_P (mode))
31366 /* We need to generate a scalar mode mask in this case. */
31367 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31368 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31369 mask = gen_reg_rtx (mode);
31370 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31374 mask = gen_rtx_NOT (mode, mask);
31375 emit_insn (gen_rtx_SET (VOIDmode, sgn,
31376 gen_rtx_AND (mode, mask, sign)));
31377 emit_insn (gen_rtx_SET (VOIDmode, result,
31378 gen_rtx_IOR (mode, abs_value, sgn)));
31381 /* Expand fabs (OP0) and return a new rtx that holds the result. The
31382 mask for masking out the sign-bit is stored in *SMASK, if that is
31385 ix86_expand_sse_fabs (rtx op0, rtx *smask)
31387 enum machine_mode vmode, mode = GET_MODE (op0);
31390 xa = gen_reg_rtx (mode);
31391 if (mode == SFmode)
31393 else if (mode == DFmode)
31397 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
31398 if (!VECTOR_MODE_P (mode))
31400 /* We need to generate a scalar mode mask in this case. */
31401 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
31402 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
31403 mask = gen_reg_rtx (mode);
31404 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
31406 emit_insn (gen_rtx_SET (VOIDmode, xa,
31407 gen_rtx_AND (mode, op0, mask)));
31415 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
31416 swapping the operands if SWAP_OPERANDS is true. The expanded
31417 code is a forward jump to a newly created label in case the
31418 comparison is true. The generated label rtx is returned. */
31420 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
31421 bool swap_operands)
31432 label = gen_label_rtx ();
31433 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
31434 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31435 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
31436 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
31437 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
31438 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
31439 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
31440 JUMP_LABEL (tmp) = label;
31445 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
31446 using comparison code CODE. Operands are swapped for the comparison if
31447 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
31449 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
31450 bool swap_operands)
31452 enum machine_mode mode = GET_MODE (op0);
31453 rtx mask = gen_reg_rtx (mode);
31462 if (mode == DFmode)
31463 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
31464 gen_rtx_fmt_ee (code, mode, op0, op1)));
31466 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
31467 gen_rtx_fmt_ee (code, mode, op0, op1)));
31472 /* Generate and return a rtx of mode MODE for 2**n where n is the number
31473 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
31475 ix86_gen_TWO52 (enum machine_mode mode)
31477 REAL_VALUE_TYPE TWO52r;
31480 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
31481 TWO52 = const_double_from_real_value (TWO52r, mode);
31482 TWO52 = force_reg (mode, TWO52);
31487 /* Expand SSE sequence for computing lround from OP1 storing
31490 ix86_expand_lround (rtx op0, rtx op1)
31492 /* C code for the stuff we're doing below:
31493 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
31496 enum machine_mode mode = GET_MODE (op1);
31497 const struct real_format *fmt;
31498 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31501 /* load nextafter (0.5, 0.0) */
31502 fmt = REAL_MODE_FORMAT (mode);
31503 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31504 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31506 /* adj = copysign (0.5, op1) */
31507 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
31508 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
31510 /* adj = op1 + adj */
31511 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
31513 /* op0 = (imode)adj */
31514 expand_fix (op0, adj, 0);
31517 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
31520 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
31522 /* C code for the stuff we're doing below (for do_floor):
31524 xi -= (double)xi > op1 ? 1 : 0;
31527 enum machine_mode fmode = GET_MODE (op1);
31528 enum machine_mode imode = GET_MODE (op0);
31529 rtx ireg, freg, label, tmp;
31531 /* reg = (long)op1 */
31532 ireg = gen_reg_rtx (imode);
31533 expand_fix (ireg, op1, 0);
31535 /* freg = (double)reg */
31536 freg = gen_reg_rtx (fmode);
31537 expand_float (freg, ireg, 0);
31539 /* ireg = (freg > op1) ? ireg - 1 : ireg */
31540 label = ix86_expand_sse_compare_and_jump (UNLE,
31541 freg, op1, !do_floor);
31542 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
31543 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
31544 emit_move_insn (ireg, tmp);
31546 emit_label (label);
31547 LABEL_NUSES (label) = 1;
31549 emit_move_insn (op0, ireg);
31552 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
31553 result in OPERAND0. */
31555 ix86_expand_rint (rtx operand0, rtx operand1)
31557 /* C code for the stuff we're doing below:
31558 xa = fabs (operand1);
31559 if (!isless (xa, 2**52))
31561 xa = xa + 2**52 - 2**52;
31562 return copysign (xa, operand1);
31564 enum machine_mode mode = GET_MODE (operand0);
31565 rtx res, xa, label, TWO52, mask;
31567 res = gen_reg_rtx (mode);
31568 emit_move_insn (res, operand1);
31570 /* xa = abs (operand1) */
31571 xa = ix86_expand_sse_fabs (res, &mask);
31573 /* if (!isless (xa, TWO52)) goto label; */
31574 TWO52 = ix86_gen_TWO52 (mode);
31575 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31577 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31578 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31580 ix86_sse_copysign_to_positive (res, xa, res, mask);
31582 emit_label (label);
31583 LABEL_NUSES (label) = 1;
31585 emit_move_insn (operand0, res);
31588 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31591 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
31593 /* C code for the stuff we expand below.
31594 double xa = fabs (x), x2;
31595 if (!isless (xa, TWO52))
31597 xa = xa + TWO52 - TWO52;
31598 x2 = copysign (xa, x);
31607 enum machine_mode mode = GET_MODE (operand0);
31608 rtx xa, TWO52, tmp, label, one, res, mask;
31610 TWO52 = ix86_gen_TWO52 (mode);
31612 /* Temporary for holding the result, initialized to the input
31613 operand to ease control flow. */
31614 res = gen_reg_rtx (mode);
31615 emit_move_insn (res, operand1);
31617 /* xa = abs (operand1) */
31618 xa = ix86_expand_sse_fabs (res, &mask);
31620 /* if (!isless (xa, TWO52)) goto label; */
31621 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31623 /* xa = xa + TWO52 - TWO52; */
31624 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31625 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
31627 /* xa = copysign (xa, operand1) */
31628 ix86_sse_copysign_to_positive (xa, xa, res, mask);
31630 /* generate 1.0 or -1.0 */
31631 one = force_reg (mode,
31632 const_double_from_real_value (do_floor
31633 ? dconst1 : dconstm1, mode));
31635 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31636 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31637 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31638 gen_rtx_AND (mode, one, tmp)));
31639 /* We always need to subtract here to preserve signed zero. */
31640 tmp = expand_simple_binop (mode, MINUS,
31641 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31642 emit_move_insn (res, tmp);
31644 emit_label (label);
31645 LABEL_NUSES (label) = 1;
31647 emit_move_insn (operand0, res);
31650 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
31653 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
31655 /* C code for the stuff we expand below.
31656 double xa = fabs (x), x2;
31657 if (!isless (xa, TWO52))
31659 x2 = (double)(long)x;
31666 if (HONOR_SIGNED_ZEROS (mode))
31667 return copysign (x2, x);
31670 enum machine_mode mode = GET_MODE (operand0);
31671 rtx xa, xi, TWO52, tmp, label, one, res, mask;
31673 TWO52 = ix86_gen_TWO52 (mode);
31675 /* Temporary for holding the result, initialized to the input
31676 operand to ease control flow. */
31677 res = gen_reg_rtx (mode);
31678 emit_move_insn (res, operand1);
31680 /* xa = abs (operand1) */
31681 xa = ix86_expand_sse_fabs (res, &mask);
31683 /* if (!isless (xa, TWO52)) goto label; */
31684 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31686 /* xa = (double)(long)x */
31687 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31688 expand_fix (xi, res, 0);
31689 expand_float (xa, xi, 0);
31692 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31694 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
31695 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
31696 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31697 gen_rtx_AND (mode, one, tmp)));
31698 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
31699 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31700 emit_move_insn (res, tmp);
31702 if (HONOR_SIGNED_ZEROS (mode))
31703 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31705 emit_label (label);
31706 LABEL_NUSES (label) = 1;
31708 emit_move_insn (operand0, res);
31711 /* Expand SSE sequence for computing round from OPERAND1 storing
31712 into OPERAND0. Sequence that works without relying on DImode truncation
31713 via cvttsd2siq that is only available on 64bit targets. */
31715 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
31717 /* C code for the stuff we expand below.
31718 double xa = fabs (x), xa2, x2;
31719 if (!isless (xa, TWO52))
31721 Using the absolute value and copying back sign makes
31722 -0.0 -> -0.0 correct.
31723 xa2 = xa + TWO52 - TWO52;
31728 else if (dxa > 0.5)
31730 x2 = copysign (xa2, x);
31733 enum machine_mode mode = GET_MODE (operand0);
31734 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
31736 TWO52 = ix86_gen_TWO52 (mode);
31738 /* Temporary for holding the result, initialized to the input
31739 operand to ease control flow. */
31740 res = gen_reg_rtx (mode);
31741 emit_move_insn (res, operand1);
31743 /* xa = abs (operand1) */
31744 xa = ix86_expand_sse_fabs (res, &mask);
31746 /* if (!isless (xa, TWO52)) goto label; */
31747 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31749 /* xa2 = xa + TWO52 - TWO52; */
31750 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31751 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
31753 /* dxa = xa2 - xa; */
31754 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
31756 /* generate 0.5, 1.0 and -0.5 */
31757 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
31758 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
31759 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
31763 tmp = gen_reg_rtx (mode);
31764 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
31765 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
31766 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31767 gen_rtx_AND (mode, one, tmp)));
31768 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31769 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
31770 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
31771 emit_insn (gen_rtx_SET (VOIDmode, tmp,
31772 gen_rtx_AND (mode, one, tmp)));
31773 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
31775 /* res = copysign (xa2, operand1) */
31776 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
31778 emit_label (label);
31779 LABEL_NUSES (label) = 1;
31781 emit_move_insn (operand0, res);
31784 /* Expand SSE sequence for computing trunc from OPERAND1 storing
31787 ix86_expand_trunc (rtx operand0, rtx operand1)
31789 /* C code for SSE variant we expand below.
31790 double xa = fabs (x), x2;
31791 if (!isless (xa, TWO52))
31793 x2 = (double)(long)x;
31794 if (HONOR_SIGNED_ZEROS (mode))
31795 return copysign (x2, x);
31798 enum machine_mode mode = GET_MODE (operand0);
31799 rtx xa, xi, TWO52, label, res, mask;
31801 TWO52 = ix86_gen_TWO52 (mode);
31803 /* Temporary for holding the result, initialized to the input
31804 operand to ease control flow. */
31805 res = gen_reg_rtx (mode);
31806 emit_move_insn (res, operand1);
31808 /* xa = abs (operand1) */
31809 xa = ix86_expand_sse_fabs (res, &mask);
31811 /* if (!isless (xa, TWO52)) goto label; */
31812 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31814 /* x = (double)(long)x */
31815 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31816 expand_fix (xi, res, 0);
31817 expand_float (res, xi, 0);
31819 if (HONOR_SIGNED_ZEROS (mode))
31820 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
31822 emit_label (label);
31823 LABEL_NUSES (label) = 1;
31825 emit_move_insn (operand0, res);
31828 /* Expand SSE sequence for computing trunc from OPERAND1 storing
31831 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
31833 enum machine_mode mode = GET_MODE (operand0);
31834 rtx xa, mask, TWO52, label, one, res, smask, tmp;
31836 /* C code for SSE variant we expand below.
31837 double xa = fabs (x), x2;
31838 if (!isless (xa, TWO52))
31840 xa2 = xa + TWO52 - TWO52;
31844 x2 = copysign (xa2, x);
31848 TWO52 = ix86_gen_TWO52 (mode);
31850 /* Temporary for holding the result, initialized to the input
31851 operand to ease control flow. */
31852 res = gen_reg_rtx (mode);
31853 emit_move_insn (res, operand1);
31855 /* xa = abs (operand1) */
31856 xa = ix86_expand_sse_fabs (res, &smask);
31858 /* if (!isless (xa, TWO52)) goto label; */
31859 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31861 /* res = xa + TWO52 - TWO52; */
31862 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
31863 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
31864 emit_move_insn (res, tmp);
31867 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
31869 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
31870 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
31871 emit_insn (gen_rtx_SET (VOIDmode, mask,
31872 gen_rtx_AND (mode, mask, one)));
31873 tmp = expand_simple_binop (mode, MINUS,
31874 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
31875 emit_move_insn (res, tmp);
31877 /* res = copysign (res, operand1) */
31878 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
31880 emit_label (label);
31881 LABEL_NUSES (label) = 1;
31883 emit_move_insn (operand0, res);
31886 /* Expand SSE sequence for computing round from OPERAND1 storing
31889 ix86_expand_round (rtx operand0, rtx operand1)
31891 /* C code for the stuff we're doing below:
31892 double xa = fabs (x);
31893 if (!isless (xa, TWO52))
31895 xa = (double)(long)(xa + nextafter (0.5, 0.0));
31896 return copysign (xa, x);
31898 enum machine_mode mode = GET_MODE (operand0);
31899 rtx res, TWO52, xa, label, xi, half, mask;
31900 const struct real_format *fmt;
31901 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
31903 /* Temporary for holding the result, initialized to the input
31904 operand to ease control flow. */
31905 res = gen_reg_rtx (mode);
31906 emit_move_insn (res, operand1);
31908 TWO52 = ix86_gen_TWO52 (mode);
31909 xa = ix86_expand_sse_fabs (res, &mask);
31910 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
31912 /* load nextafter (0.5, 0.0) */
31913 fmt = REAL_MODE_FORMAT (mode);
31914 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
31915 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
31917 /* xa = xa + 0.5 */
31918 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
31919 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
31921 /* xa = (double)(int64_t)xa */
31922 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
31923 expand_fix (xi, xa, 0);
31924 expand_float (xa, xi, 0);
31926 /* res = copysign (xa, operand1) */
31927 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
31929 emit_label (label);
31930 LABEL_NUSES (label) = 1;
31932 emit_move_insn (operand0, res);
31936 /* Table of valid machine attributes. */
31937 static const struct attribute_spec ix86_attribute_table[] =
31939 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
31940 /* Stdcall attribute says callee is responsible for popping arguments
31941 if they are not variable. */
31942 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31943 /* Fastcall attribute says callee is responsible for popping arguments
31944 if they are not variable. */
31945 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31946 /* Thiscall attribute says callee is responsible for popping arguments
31947 if they are not variable. */
31948 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31949 /* Cdecl attribute says the callee is a normal C declaration */
31950 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31951 /* Regparm attribute specifies how many integer arguments are to be
31952 passed in registers. */
31953 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
31954 /* Sseregparm attribute says we are using x86_64 calling conventions
31955 for FP arguments. */
31956 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
31957 /* force_align_arg_pointer says this function realigns the stack at entry. */
31958 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
31959 false, true, true, ix86_handle_cconv_attribute },
31960 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31961 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
31962 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
31963 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
31965 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
31966 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
31967 #ifdef SUBTARGET_ATTRIBUTE_TABLE
31968 SUBTARGET_ATTRIBUTE_TABLE,
31970 /* ms_abi and sysv_abi calling convention function attributes. */
31971 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
31972 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
31973 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
31975 { NULL, 0, 0, false, false, false, NULL }
31978 /* Implement targetm.vectorize.builtin_vectorization_cost. */
31980 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
31981 tree vectype ATTRIBUTE_UNUSED,
31982 int misalign ATTRIBUTE_UNUSED)
31984 switch (type_of_cost)
31987 return ix86_cost->scalar_stmt_cost;
31990 return ix86_cost->scalar_load_cost;
31993 return ix86_cost->scalar_store_cost;
31996 return ix86_cost->vec_stmt_cost;
31999 return ix86_cost->vec_align_load_cost;
32002 return ix86_cost->vec_store_cost;
32004 case vec_to_scalar:
32005 return ix86_cost->vec_to_scalar_cost;
32007 case scalar_to_vec:
32008 return ix86_cost->scalar_to_vec_cost;
32010 case unaligned_load:
32011 case unaligned_store:
32012 return ix86_cost->vec_unalign_load_cost;
32014 case cond_branch_taken:
32015 return ix86_cost->cond_taken_branch_cost;
32017 case cond_branch_not_taken:
32018 return ix86_cost->cond_not_taken_branch_cost;
32024 gcc_unreachable ();
32029 /* Implement targetm.vectorize.builtin_vec_perm. */
32032 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
32034 tree itype = TREE_TYPE (vec_type);
32035 bool u = TYPE_UNSIGNED (itype);
32036 enum machine_mode vmode = TYPE_MODE (vec_type);
32037 enum ix86_builtins fcode;
32038 bool ok = TARGET_SSE2;
32044 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
32047 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
32049 itype = ix86_get_builtin_type (IX86_BT_DI);
32054 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
32058 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
32060 itype = ix86_get_builtin_type (IX86_BT_SI);
32064 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
32067 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
32070 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
32073 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
32083 *mask_type = itype;
32084 return ix86_builtins[(int) fcode];
32087 /* Return a vector mode with twice as many elements as VMODE. */
32088 /* ??? Consider moving this to a table generated by genmodes.c. */
32090 static enum machine_mode
32091 doublesize_vector_mode (enum machine_mode vmode)
32095 case V2SFmode: return V4SFmode;
32096 case V1DImode: return V2DImode;
32097 case V2SImode: return V4SImode;
32098 case V4HImode: return V8HImode;
32099 case V8QImode: return V16QImode;
32101 case V2DFmode: return V4DFmode;
32102 case V4SFmode: return V8SFmode;
32103 case V2DImode: return V4DImode;
32104 case V4SImode: return V8SImode;
32105 case V8HImode: return V16HImode;
32106 case V16QImode: return V32QImode;
32108 case V4DFmode: return V8DFmode;
32109 case V8SFmode: return V16SFmode;
32110 case V4DImode: return V8DImode;
32111 case V8SImode: return V16SImode;
32112 case V16HImode: return V32HImode;
32113 case V32QImode: return V64QImode;
32116 gcc_unreachable ();
32120 /* Construct (set target (vec_select op0 (parallel perm))) and
32121 return true if that's a valid instruction in the active ISA. */
32124 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
32126 rtx rperm[MAX_VECT_LEN], x;
32129 for (i = 0; i < nelt; ++i)
32130 rperm[i] = GEN_INT (perm[i]);
32132 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
32133 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
32134 x = gen_rtx_SET (VOIDmode, target, x);
32137 if (recog_memoized (x) < 0)
32145 /* Similar, but generate a vec_concat from op0 and op1 as well. */
32148 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
32149 const unsigned char *perm, unsigned nelt)
32151 enum machine_mode v2mode;
32154 v2mode = doublesize_vector_mode (GET_MODE (op0));
32155 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
32156 return expand_vselect (target, x, perm, nelt);
32159 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32160 in terms of blendp[sd] / pblendw / pblendvb. */
32163 expand_vec_perm_blend (struct expand_vec_perm_d *d)
32165 enum machine_mode vmode = d->vmode;
32166 unsigned i, mask, nelt = d->nelt;
32167 rtx target, op0, op1, x;
32169 if (!TARGET_SSE4_1 || d->op0 == d->op1)
32171 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
32174 /* This is a blend, not a permute. Elements must stay in their
32175 respective lanes. */
32176 for (i = 0; i < nelt; ++i)
32178 unsigned e = d->perm[i];
32179 if (!(e == i || e == i + nelt))
32186 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
32187 decision should be extracted elsewhere, so that we only try that
32188 sequence once all budget==3 options have been tried. */
32190 /* For bytes, see if bytes move in pairs so we can use pblendw with
32191 an immediate argument, rather than pblendvb with a vector argument. */
32192 if (vmode == V16QImode)
32194 bool pblendw_ok = true;
32195 for (i = 0; i < 16 && pblendw_ok; i += 2)
32196 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
32200 rtx rperm[16], vperm;
32202 for (i = 0; i < nelt; ++i)
32203 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
32205 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32206 vperm = force_reg (V16QImode, vperm);
32208 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
32213 target = d->target;
32225 for (i = 0; i < nelt; ++i)
32226 mask |= (d->perm[i] >= nelt) << i;
32230 for (i = 0; i < 2; ++i)
32231 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
32235 for (i = 0; i < 4; ++i)
32236 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
32240 for (i = 0; i < 8; ++i)
32241 mask |= (d->perm[i * 2] >= 16) << i;
32245 target = gen_lowpart (vmode, target);
32246 op0 = gen_lowpart (vmode, op0);
32247 op1 = gen_lowpart (vmode, op1);
32251 gcc_unreachable ();
32254 /* This matches five different patterns with the different modes. */
32255 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
32256 x = gen_rtx_SET (VOIDmode, target, x);
32262 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32263 in terms of the variable form of vpermilps.
32265 Note that we will have already failed the immediate input vpermilps,
32266 which requires that the high and low part shuffle be identical; the
32267 variable form doesn't require that. */
32270 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
32272 rtx rperm[8], vperm;
32275 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
32278 /* We can only permute within the 128-bit lane. */
32279 for (i = 0; i < 8; ++i)
32281 unsigned e = d->perm[i];
32282 if (i < 4 ? e >= 4 : e < 4)
32289 for (i = 0; i < 8; ++i)
32291 unsigned e = d->perm[i];
32293 /* Within each 128-bit lane, the elements of op0 are numbered
32294 from 0 and the elements of op1 are numbered from 4. */
32300 rperm[i] = GEN_INT (e);
32303 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
32304 vperm = force_reg (V8SImode, vperm);
32305 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
32310 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32311 in terms of pshufb or vpperm. */
32314 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
32316 unsigned i, nelt, eltsz;
32317 rtx rperm[16], vperm, target, op0, op1;
32319 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
32321 if (GET_MODE_SIZE (d->vmode) != 16)
32328 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32330 for (i = 0; i < nelt; ++i)
32332 unsigned j, e = d->perm[i];
32333 for (j = 0; j < eltsz; ++j)
32334 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
32337 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
32338 vperm = force_reg (V16QImode, vperm);
32340 target = gen_lowpart (V16QImode, d->target);
32341 op0 = gen_lowpart (V16QImode, d->op0);
32342 if (d->op0 == d->op1)
32343 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
32346 op1 = gen_lowpart (V16QImode, d->op1);
32347 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
32353 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
32354 in a single instruction. */
32357 expand_vec_perm_1 (struct expand_vec_perm_d *d)
32359 unsigned i, nelt = d->nelt;
32360 unsigned char perm2[MAX_VECT_LEN];
32362 /* Check plain VEC_SELECT first, because AVX has instructions that could
32363 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
32364 input where SEL+CONCAT may not. */
32365 if (d->op0 == d->op1)
32367 int mask = nelt - 1;
32369 for (i = 0; i < nelt; i++)
32370 perm2[i] = d->perm[i] & mask;
32372 if (expand_vselect (d->target, d->op0, perm2, nelt))
32375 /* There are plenty of patterns in sse.md that are written for
32376 SEL+CONCAT and are not replicated for a single op. Perhaps
32377 that should be changed, to avoid the nastiness here. */
32379 /* Recognize interleave style patterns, which means incrementing
32380 every other permutation operand. */
32381 for (i = 0; i < nelt; i += 2)
32383 perm2[i] = d->perm[i] & mask;
32384 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
32386 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32389 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
32392 for (i = 0; i < nelt; i += 4)
32394 perm2[i + 0] = d->perm[i + 0] & mask;
32395 perm2[i + 1] = d->perm[i + 1] & mask;
32396 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
32397 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
32400 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
32405 /* Finally, try the fully general two operand permute. */
32406 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
32409 /* Recognize interleave style patterns with reversed operands. */
32410 if (d->op0 != d->op1)
32412 for (i = 0; i < nelt; ++i)
32414 unsigned e = d->perm[i];
32422 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
32426 /* Try the SSE4.1 blend variable merge instructions. */
32427 if (expand_vec_perm_blend (d))
32430 /* Try one of the AVX vpermil variable permutations. */
32431 if (expand_vec_perm_vpermil (d))
32434 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
32435 if (expand_vec_perm_pshufb (d))
32441 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
32442 in terms of a pair of pshuflw + pshufhw instructions. */
32445 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
32447 unsigned char perm2[MAX_VECT_LEN];
32451 if (d->vmode != V8HImode || d->op0 != d->op1)
32454 /* The two permutations only operate in 64-bit lanes. */
32455 for (i = 0; i < 4; ++i)
32456 if (d->perm[i] >= 4)
32458 for (i = 4; i < 8; ++i)
32459 if (d->perm[i] < 4)
32465 /* Emit the pshuflw. */
32466 memcpy (perm2, d->perm, 4);
32467 for (i = 4; i < 8; ++i)
32469 ok = expand_vselect (d->target, d->op0, perm2, 8);
32472 /* Emit the pshufhw. */
32473 memcpy (perm2 + 4, d->perm + 4, 4);
32474 for (i = 0; i < 4; ++i)
32476 ok = expand_vselect (d->target, d->target, perm2, 8);
32482 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32483 the permutation using the SSSE3 palignr instruction. This succeeds
32484 when all of the elements in PERM fit within one vector and we merely
32485 need to shift them down so that a single vector permutation has a
32486 chance to succeed. */
32489 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
32491 unsigned i, nelt = d->nelt;
32496 /* Even with AVX, palignr only operates on 128-bit vectors. */
32497 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32500 min = nelt, max = 0;
32501 for (i = 0; i < nelt; ++i)
32503 unsigned e = d->perm[i];
32509 if (min == 0 || max - min >= nelt)
32512 /* Given that we have SSSE3, we know we'll be able to implement the
32513 single operand permutation after the palignr with pshufb. */
32517 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
32518 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
32519 gen_lowpart (TImode, d->op1),
32520 gen_lowpart (TImode, d->op0), shift));
32522 d->op0 = d->op1 = d->target;
32525 for (i = 0; i < nelt; ++i)
32527 unsigned e = d->perm[i] - min;
32533 /* Test for the degenerate case where the alignment by itself
32534 produces the desired permutation. */
32538 ok = expand_vec_perm_1 (d);
32544 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
32545 a two vector permutation into a single vector permutation by using
32546 an interleave operation to merge the vectors. */
32549 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
32551 struct expand_vec_perm_d dremap, dfinal;
32552 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
32553 unsigned contents, h1, h2, h3, h4;
32554 unsigned char remap[2 * MAX_VECT_LEN];
32558 if (d->op0 == d->op1)
32561 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
32562 lanes. We can use similar techniques with the vperm2f128 instruction,
32563 but it requires slightly different logic. */
32564 if (GET_MODE_SIZE (d->vmode) != 16)
32567 /* Examine from whence the elements come. */
32569 for (i = 0; i < nelt; ++i)
32570 contents |= 1u << d->perm[i];
32572 /* Split the two input vectors into 4 halves. */
32573 h1 = (1u << nelt2) - 1;
32578 memset (remap, 0xff, sizeof (remap));
32581 /* If the elements from the low halves use interleave low, and similarly
32582 for interleave high. If the elements are from mis-matched halves, we
32583 can use shufps for V4SF/V4SI or do a DImode shuffle. */
32584 if ((contents & (h1 | h3)) == contents)
32586 for (i = 0; i < nelt2; ++i)
32589 remap[i + nelt] = i * 2 + 1;
32590 dremap.perm[i * 2] = i;
32591 dremap.perm[i * 2 + 1] = i + nelt;
32594 else if ((contents & (h2 | h4)) == contents)
32596 for (i = 0; i < nelt2; ++i)
32598 remap[i + nelt2] = i * 2;
32599 remap[i + nelt + nelt2] = i * 2 + 1;
32600 dremap.perm[i * 2] = i + nelt2;
32601 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
32604 else if ((contents & (h1 | h4)) == contents)
32606 for (i = 0; i < nelt2; ++i)
32609 remap[i + nelt + nelt2] = i + nelt2;
32610 dremap.perm[i] = i;
32611 dremap.perm[i + nelt2] = i + nelt + nelt2;
32615 dremap.vmode = V2DImode;
32617 dremap.perm[0] = 0;
32618 dremap.perm[1] = 3;
32621 else if ((contents & (h2 | h3)) == contents)
32623 for (i = 0; i < nelt2; ++i)
32625 remap[i + nelt2] = i;
32626 remap[i + nelt] = i + nelt2;
32627 dremap.perm[i] = i + nelt2;
32628 dremap.perm[i + nelt2] = i + nelt;
32632 dremap.vmode = V2DImode;
32634 dremap.perm[0] = 1;
32635 dremap.perm[1] = 2;
32641 /* Use the remapping array set up above to move the elements from their
32642 swizzled locations into their final destinations. */
32644 for (i = 0; i < nelt; ++i)
32646 unsigned e = remap[d->perm[i]];
32647 gcc_assert (e < nelt);
32648 dfinal.perm[i] = e;
32650 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
32651 dfinal.op1 = dfinal.op0;
32652 dremap.target = dfinal.op0;
32654 /* Test if the final remap can be done with a single insn. For V4SFmode or
32655 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
32657 ok = expand_vec_perm_1 (&dfinal);
32658 seq = get_insns ();
32664 if (dremap.vmode != dfinal.vmode)
32666 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
32667 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
32668 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
32671 ok = expand_vec_perm_1 (&dremap);
32678 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
32679 permutation with two pshufb insns and an ior. We should have already
32680 failed all two instruction sequences. */
32683 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
32685 rtx rperm[2][16], vperm, l, h, op, m128;
32686 unsigned int i, nelt, eltsz;
32688 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
32690 gcc_assert (d->op0 != d->op1);
32693 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
32695 /* Generate two permutation masks. If the required element is within
32696 the given vector it is shuffled into the proper lane. If the required
32697 element is in the other vector, force a zero into the lane by setting
32698 bit 7 in the permutation mask. */
32699 m128 = GEN_INT (-128);
32700 for (i = 0; i < nelt; ++i)
32702 unsigned j, e = d->perm[i];
32703 unsigned which = (e >= nelt);
32707 for (j = 0; j < eltsz; ++j)
32709 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
32710 rperm[1-which][i*eltsz + j] = m128;
32714 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
32715 vperm = force_reg (V16QImode, vperm);
32717 l = gen_reg_rtx (V16QImode);
32718 op = gen_lowpart (V16QImode, d->op0);
32719 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
32721 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
32722 vperm = force_reg (V16QImode, vperm);
32724 h = gen_reg_rtx (V16QImode);
32725 op = gen_lowpart (V16QImode, d->op1);
32726 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
32728 op = gen_lowpart (V16QImode, d->target);
32729 emit_insn (gen_iorv16qi3 (op, l, h));
32734 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
32735 and extract-odd permutations. */
32738 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
32745 t1 = gen_reg_rtx (V4DFmode);
32746 t2 = gen_reg_rtx (V4DFmode);
32748 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
32749 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
32750 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
32752 /* Now an unpck[lh]pd will produce the result required. */
32754 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
32756 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
32762 int mask = odd ? 0xdd : 0x88;
32764 t1 = gen_reg_rtx (V8SFmode);
32765 t2 = gen_reg_rtx (V8SFmode);
32766 t3 = gen_reg_rtx (V8SFmode);
32768 /* Shuffle within the 128-bit lanes to produce:
32769 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
32770 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
32773 /* Shuffle the lanes around to produce:
32774 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
32775 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
32778 /* Shuffle within the 128-bit lanes to produce:
32779 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
32780 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
32782 /* Shuffle within the 128-bit lanes to produce:
32783 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
32784 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
32786 /* Shuffle the lanes around to produce:
32787 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
32788 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
32797 /* These are always directly implementable by expand_vec_perm_1. */
32798 gcc_unreachable ();
32802 return expand_vec_perm_pshufb2 (d);
32805 /* We need 2*log2(N)-1 operations to achieve odd/even
32806 with interleave. */
32807 t1 = gen_reg_rtx (V8HImode);
32808 t2 = gen_reg_rtx (V8HImode);
32809 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
32810 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
32811 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
32812 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
32814 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
32816 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
32823 return expand_vec_perm_pshufb2 (d);
32826 t1 = gen_reg_rtx (V16QImode);
32827 t2 = gen_reg_rtx (V16QImode);
32828 t3 = gen_reg_rtx (V16QImode);
32829 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
32830 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
32831 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
32832 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
32833 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
32834 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
32836 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
32838 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
32844 gcc_unreachable ();
32850 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
32851 extract-even and extract-odd permutations. */
32854 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
32856 unsigned i, odd, nelt = d->nelt;
32859 if (odd != 0 && odd != 1)
32862 for (i = 1; i < nelt; ++i)
32863 if (d->perm[i] != 2 * i + odd)
32866 return expand_vec_perm_even_odd_1 (d, odd);
32869 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
32870 permutations. We assume that expand_vec_perm_1 has already failed. */
32873 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
32875 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
32876 enum machine_mode vmode = d->vmode;
32877 unsigned char perm2[4];
32885 /* These are special-cased in sse.md so that we can optionally
32886 use the vbroadcast instruction. They expand to two insns
32887 if the input happens to be in a register. */
32888 gcc_unreachable ();
32894 /* These are always implementable using standard shuffle patterns. */
32895 gcc_unreachable ();
32899 /* These can be implemented via interleave. We save one insn by
32900 stopping once we have promoted to V4SImode and then use pshufd. */
32903 optab otab = vec_interleave_low_optab;
32907 otab = vec_interleave_high_optab;
32912 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
32913 vmode = get_mode_wider_vector (vmode);
32914 op0 = gen_lowpart (vmode, op0);
32916 while (vmode != V4SImode);
32918 memset (perm2, elt, 4);
32919 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
32924 gcc_unreachable ();
32928 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
32929 broadcast permutations. */
32932 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
32934 unsigned i, elt, nelt = d->nelt;
32936 if (d->op0 != d->op1)
32940 for (i = 1; i < nelt; ++i)
32941 if (d->perm[i] != elt)
32944 return expand_vec_perm_broadcast_1 (d);
32947 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
32948 With all of the interface bits taken care of, perform the expansion
32949 in D and return true on success. */
32952 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
32954 /* Try a single instruction expansion. */
32955 if (expand_vec_perm_1 (d))
32958 /* Try sequences of two instructions. */
32960 if (expand_vec_perm_pshuflw_pshufhw (d))
32963 if (expand_vec_perm_palignr (d))
32966 if (expand_vec_perm_interleave2 (d))
32969 if (expand_vec_perm_broadcast (d))
32972 /* Try sequences of three instructions. */
32974 if (expand_vec_perm_pshufb2 (d))
32977 /* ??? Look for narrow permutations whose element orderings would
32978 allow the promotion to a wider mode. */
32980 /* ??? Look for sequences of interleave or a wider permute that place
32981 the data into the correct lanes for a half-vector shuffle like
32982 pshuf[lh]w or vpermilps. */
32984 /* ??? Look for sequences of interleave that produce the desired results.
32985 The combinatorics of punpck[lh] get pretty ugly... */
32987 if (expand_vec_perm_even_odd (d))
32993 /* Extract the values from the vector CST into the permutation array in D.
32994 Return 0 on error, 1 if all values from the permutation come from the
32995 first vector, 2 if all values from the second vector, and 3 otherwise. */
32998 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
33000 tree list = TREE_VECTOR_CST_ELTS (cst);
33001 unsigned i, nelt = d->nelt;
33004 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
33006 unsigned HOST_WIDE_INT e;
33008 if (!host_integerp (TREE_VALUE (list), 1))
33010 e = tree_low_cst (TREE_VALUE (list), 1);
33014 ret |= (e < nelt ? 1 : 2);
33017 gcc_assert (list == NULL);
33019 /* For all elements from second vector, fold the elements to first. */
33021 for (i = 0; i < nelt; ++i)
33022 d->perm[i] -= nelt;
33028 ix86_expand_vec_perm_builtin (tree exp)
33030 struct expand_vec_perm_d d;
33031 tree arg0, arg1, arg2;
33033 arg0 = CALL_EXPR_ARG (exp, 0);
33034 arg1 = CALL_EXPR_ARG (exp, 1);
33035 arg2 = CALL_EXPR_ARG (exp, 2);
33037 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
33038 d.nelt = GET_MODE_NUNITS (d.vmode);
33039 d.testing_p = false;
33040 gcc_assert (VECTOR_MODE_P (d.vmode));
33042 if (TREE_CODE (arg2) != VECTOR_CST)
33044 error_at (EXPR_LOCATION (exp),
33045 "vector permutation requires vector constant");
33049 switch (extract_vec_perm_cst (&d, arg2))
33055 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
33059 if (!operand_equal_p (arg0, arg1, 0))
33061 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33062 d.op0 = force_reg (d.vmode, d.op0);
33063 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33064 d.op1 = force_reg (d.vmode, d.op1);
33068 /* The elements of PERM do not suggest that only the first operand
33069 is used, but both operands are identical. Allow easier matching
33070 of the permutation by folding the permutation into the single
33073 unsigned i, nelt = d.nelt;
33074 for (i = 0; i < nelt; ++i)
33075 if (d.perm[i] >= nelt)
33081 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
33082 d.op0 = force_reg (d.vmode, d.op0);
33087 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
33088 d.op0 = force_reg (d.vmode, d.op0);
33093 d.target = gen_reg_rtx (d.vmode);
33094 if (ix86_expand_vec_perm_builtin_1 (&d))
33097 /* For compiler generated permutations, we should never got here, because
33098 the compiler should also be checking the ok hook. But since this is a
33099 builtin the user has access too, so don't abort. */
33103 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
33106 sorry ("vector permutation (%d %d %d %d)",
33107 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
33110 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
33111 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33112 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
33115 sorry ("vector permutation "
33116 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
33117 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
33118 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
33119 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
33120 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
33123 gcc_unreachable ();
33126 return CONST0_RTX (d.vmode);
33129 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
33132 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
33134 struct expand_vec_perm_d d;
33138 d.vmode = TYPE_MODE (vec_type);
33139 d.nelt = GET_MODE_NUNITS (d.vmode);
33140 d.testing_p = true;
33142 /* Given sufficient ISA support we can just return true here
33143 for selected vector modes. */
33144 if (GET_MODE_SIZE (d.vmode) == 16)
33146 /* All implementable with a single vpperm insn. */
33149 /* All implementable with 2 pshufb + 1 ior. */
33152 /* All implementable with shufpd or unpck[lh]pd. */
33157 vec_mask = extract_vec_perm_cst (&d, mask);
33159 /* This hook is cannot be called in response to something that the
33160 user does (unlike the builtin expander) so we shouldn't ever see
33161 an error generated from the extract. */
33162 gcc_assert (vec_mask > 0 && vec_mask <= 3);
33163 one_vec = (vec_mask != 3);
33165 /* Implementable with shufps or pshufd. */
33166 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
33169 /* Otherwise we have to go through the motions and see if we can
33170 figure out how to generate the requested permutation. */
33171 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
33172 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
33174 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
33177 ret = ix86_expand_vec_perm_builtin_1 (&d);
33184 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
33186 struct expand_vec_perm_d d;
33192 d.vmode = GET_MODE (targ);
33193 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
33194 d.testing_p = false;
33196 for (i = 0; i < nelt; ++i)
33197 d.perm[i] = i * 2 + odd;
33199 /* We'll either be able to implement the permutation directly... */
33200 if (expand_vec_perm_1 (&d))
33203 /* ... or we use the special-case patterns. */
33204 expand_vec_perm_even_odd_1 (&d, odd);
33207 /* This function returns the calling abi specific va_list type node.
33208 It returns the FNDECL specific va_list type. */
33211 ix86_fn_abi_va_list (tree fndecl)
33214 return va_list_type_node;
33215 gcc_assert (fndecl != NULL_TREE);
33217 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
33218 return ms_va_list_type_node;
33220 return sysv_va_list_type_node;
33223 /* Returns the canonical va_list type specified by TYPE. If there
33224 is no valid TYPE provided, it return NULL_TREE. */
33227 ix86_canonical_va_list_type (tree type)
33231 /* Resolve references and pointers to va_list type. */
33232 if (TREE_CODE (type) == MEM_REF)
33233 type = TREE_TYPE (type);
33234 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
33235 type = TREE_TYPE (type);
33236 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
33237 type = TREE_TYPE (type);
33241 wtype = va_list_type_node;
33242 gcc_assert (wtype != NULL_TREE);
33244 if (TREE_CODE (wtype) == ARRAY_TYPE)
33246 /* If va_list is an array type, the argument may have decayed
33247 to a pointer type, e.g. by being passed to another function.
33248 In that case, unwrap both types so that we can compare the
33249 underlying records. */
33250 if (TREE_CODE (htype) == ARRAY_TYPE
33251 || POINTER_TYPE_P (htype))
33253 wtype = TREE_TYPE (wtype);
33254 htype = TREE_TYPE (htype);
33257 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33258 return va_list_type_node;
33259 wtype = sysv_va_list_type_node;
33260 gcc_assert (wtype != NULL_TREE);
33262 if (TREE_CODE (wtype) == ARRAY_TYPE)
33264 /* If va_list is an array type, the argument may have decayed
33265 to a pointer type, e.g. by being passed to another function.
33266 In that case, unwrap both types so that we can compare the
33267 underlying records. */
33268 if (TREE_CODE (htype) == ARRAY_TYPE
33269 || POINTER_TYPE_P (htype))
33271 wtype = TREE_TYPE (wtype);
33272 htype = TREE_TYPE (htype);
33275 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33276 return sysv_va_list_type_node;
33277 wtype = ms_va_list_type_node;
33278 gcc_assert (wtype != NULL_TREE);
33280 if (TREE_CODE (wtype) == ARRAY_TYPE)
33282 /* If va_list is an array type, the argument may have decayed
33283 to a pointer type, e.g. by being passed to another function.
33284 In that case, unwrap both types so that we can compare the
33285 underlying records. */
33286 if (TREE_CODE (htype) == ARRAY_TYPE
33287 || POINTER_TYPE_P (htype))
33289 wtype = TREE_TYPE (wtype);
33290 htype = TREE_TYPE (htype);
33293 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
33294 return ms_va_list_type_node;
33297 return std_canonical_va_list_type (type);
33300 /* Iterate through the target-specific builtin types for va_list.
33301 IDX denotes the iterator, *PTREE is set to the result type of
33302 the va_list builtin, and *PNAME to its internal type.
33303 Returns zero if there is no element for this index, otherwise
33304 IDX should be increased upon the next call.
33305 Note, do not iterate a base builtin's name like __builtin_va_list.
33306 Used from c_common_nodes_and_builtins. */
33309 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
33319 *ptree = ms_va_list_type_node;
33320 *pname = "__builtin_ms_va_list";
33324 *ptree = sysv_va_list_type_node;
33325 *pname = "__builtin_sysv_va_list";
33333 #undef TARGET_SCHED_DISPATCH
33334 #define TARGET_SCHED_DISPATCH has_dispatch
33335 #undef TARGET_SCHED_DISPATCH_DO
33336 #define TARGET_SCHED_DISPATCH_DO do_dispatch
33338 /* The size of the dispatch window is the total number of bytes of
33339 object code allowed in a window. */
33340 #define DISPATCH_WINDOW_SIZE 16
33342 /* Number of dispatch windows considered for scheduling. */
33343 #define MAX_DISPATCH_WINDOWS 3
33345 /* Maximum number of instructions in a window. */
33348 /* Maximum number of immediate operands in a window. */
33351 /* Maximum number of immediate bits allowed in a window. */
33352 #define MAX_IMM_SIZE 128
33354 /* Maximum number of 32 bit immediates allowed in a window. */
33355 #define MAX_IMM_32 4
33357 /* Maximum number of 64 bit immediates allowed in a window. */
33358 #define MAX_IMM_64 2
33360 /* Maximum total of loads or prefetches allowed in a window. */
33363 /* Maximum total of stores allowed in a window. */
33364 #define MAX_STORE 1
33370 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
33371 enum dispatch_group {
33386 /* Number of allowable groups in a dispatch window. It is an array
33387 indexed by dispatch_group enum. 100 is used as a big number,
33388 because the number of these kind of operations does not have any
33389 effect in dispatch window, but we need them for other reasons in
33391 static unsigned int num_allowable_groups[disp_last] = {
33392 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
33395 char group_name[disp_last + 1][16] = {
33396 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
33397 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
33398 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
33401 /* Instruction path. */
33404 path_single, /* Single micro op. */
33405 path_double, /* Double micro op. */
33406 path_multi, /* Instructions with more than 2 micro op.. */
33410 /* sched_insn_info defines a window to the instructions scheduled in
33411 the basic block. It contains a pointer to the insn_info table and
33412 the instruction scheduled.
33414 Windows are allocated for each basic block and are linked
33416 typedef struct sched_insn_info_s {
33418 enum dispatch_group group;
33419 enum insn_path path;
33424 /* Linked list of dispatch windows. This is a two way list of
33425 dispatch windows of a basic block. It contains information about
33426 the number of uops in the window and the total number of
33427 instructions and of bytes in the object code for this dispatch
33429 typedef struct dispatch_windows_s {
33430 int num_insn; /* Number of insn in the window. */
33431 int num_uops; /* Number of uops in the window. */
33432 int window_size; /* Number of bytes in the window. */
33433 int window_num; /* Window number between 0 or 1. */
33434 int num_imm; /* Number of immediates in an insn. */
33435 int num_imm_32; /* Number of 32 bit immediates in an insn. */
33436 int num_imm_64; /* Number of 64 bit immediates in an insn. */
33437 int imm_size; /* Total immediates in the window. */
33438 int num_loads; /* Total memory loads in the window. */
33439 int num_stores; /* Total memory stores in the window. */
33440 int violation; /* Violation exists in window. */
33441 sched_insn_info *window; /* Pointer to the window. */
33442 struct dispatch_windows_s *next;
33443 struct dispatch_windows_s *prev;
33444 } dispatch_windows;
33446 /* Immediate valuse used in an insn. */
33447 typedef struct imm_info_s
33454 static dispatch_windows *dispatch_window_list;
33455 static dispatch_windows *dispatch_window_list1;
33457 /* Get dispatch group of insn. */
33459 static enum dispatch_group
33460 get_mem_group (rtx insn)
33462 enum attr_memory memory;
33464 if (INSN_CODE (insn) < 0)
33465 return disp_no_group;
33466 memory = get_attr_memory (insn);
33467 if (memory == MEMORY_STORE)
33470 if (memory == MEMORY_LOAD)
33473 if (memory == MEMORY_BOTH)
33474 return disp_load_store;
33476 return disp_no_group;
33479 /* Return true if insn is a compare instruction. */
33484 enum attr_type type;
33486 type = get_attr_type (insn);
33487 return (type == TYPE_TEST
33488 || type == TYPE_ICMP
33489 || type == TYPE_FCMP
33490 || GET_CODE (PATTERN (insn)) == COMPARE);
33493 /* Return true if a dispatch violation encountered. */
33496 dispatch_violation (void)
33498 if (dispatch_window_list->next)
33499 return dispatch_window_list->next->violation;
33500 return dispatch_window_list->violation;
33503 /* Return true if insn is a branch instruction. */
33506 is_branch (rtx insn)
33508 return (CALL_P (insn) || JUMP_P (insn));
33511 /* Return true if insn is a prefetch instruction. */
33514 is_prefetch (rtx insn)
33516 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
33519 /* This function initializes a dispatch window and the list container holding a
33520 pointer to the window. */
33523 init_window (int window_num)
33526 dispatch_windows *new_list;
33528 if (window_num == 0)
33529 new_list = dispatch_window_list;
33531 new_list = dispatch_window_list1;
33533 new_list->num_insn = 0;
33534 new_list->num_uops = 0;
33535 new_list->window_size = 0;
33536 new_list->next = NULL;
33537 new_list->prev = NULL;
33538 new_list->window_num = window_num;
33539 new_list->num_imm = 0;
33540 new_list->num_imm_32 = 0;
33541 new_list->num_imm_64 = 0;
33542 new_list->imm_size = 0;
33543 new_list->num_loads = 0;
33544 new_list->num_stores = 0;
33545 new_list->violation = false;
33547 for (i = 0; i < MAX_INSN; i++)
33549 new_list->window[i].insn = NULL;
33550 new_list->window[i].group = disp_no_group;
33551 new_list->window[i].path = no_path;
33552 new_list->window[i].byte_len = 0;
33553 new_list->window[i].imm_bytes = 0;
33558 /* This function allocates and initializes a dispatch window and the
33559 list container holding a pointer to the window. */
33561 static dispatch_windows *
33562 allocate_window (void)
33564 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
33565 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
33570 /* This routine initializes the dispatch scheduling information. It
33571 initiates building dispatch scheduler tables and constructs the
33572 first dispatch window. */
33575 init_dispatch_sched (void)
33577 /* Allocate a dispatch list and a window. */
33578 dispatch_window_list = allocate_window ();
33579 dispatch_window_list1 = allocate_window ();
33584 /* This function returns true if a branch is detected. End of a basic block
33585 does not have to be a branch, but here we assume only branches end a
33589 is_end_basic_block (enum dispatch_group group)
33591 return group == disp_branch;
33594 /* This function is called when the end of a window processing is reached. */
33597 process_end_window (void)
33599 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
33600 if (dispatch_window_list->next)
33602 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
33603 gcc_assert (dispatch_window_list->window_size
33604 + dispatch_window_list1->window_size <= 48);
33610 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
33611 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
33612 for 48 bytes of instructions. Note that these windows are not dispatch
33613 windows that their sizes are DISPATCH_WINDOW_SIZE. */
33615 static dispatch_windows *
33616 allocate_next_window (int window_num)
33618 if (window_num == 0)
33620 if (dispatch_window_list->next)
33623 return dispatch_window_list;
33626 dispatch_window_list->next = dispatch_window_list1;
33627 dispatch_window_list1->prev = dispatch_window_list;
33629 return dispatch_window_list1;
33632 /* Increment the number of immediate operands of an instruction. */
33635 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
33640 switch ( GET_CODE (*in_rtx))
33645 (imm_values->imm)++;
33646 if (x86_64_immediate_operand (*in_rtx, SImode))
33647 (imm_values->imm32)++;
33649 (imm_values->imm64)++;
33653 (imm_values->imm)++;
33654 (imm_values->imm64)++;
33658 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
33660 (imm_values->imm)++;
33661 (imm_values->imm32)++;
33672 /* Compute number of immediate operands of an instruction. */
33675 find_constant (rtx in_rtx, imm_info *imm_values)
33677 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
33678 (rtx_function) find_constant_1, (void *) imm_values);
33681 /* Return total size of immediate operands of an instruction along with number
33682 of corresponding immediate-operands. It initializes its parameters to zero
33683 befor calling FIND_CONSTANT.
33684 INSN is the input instruction. IMM is the total of immediates.
33685 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
33689 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
33691 imm_info imm_values = {0, 0, 0};
33693 find_constant (insn, &imm_values);
33694 *imm = imm_values.imm;
33695 *imm32 = imm_values.imm32;
33696 *imm64 = imm_values.imm64;
33697 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
33700 /* This function indicates if an operand of an instruction is an
33704 has_immediate (rtx insn)
33706 int num_imm_operand;
33707 int num_imm32_operand;
33708 int num_imm64_operand;
33711 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33712 &num_imm64_operand);
33716 /* Return single or double path for instructions. */
33718 static enum insn_path
33719 get_insn_path (rtx insn)
33721 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
33723 if ((int)path == 0)
33724 return path_single;
33726 if ((int)path == 1)
33727 return path_double;
33732 /* Return insn dispatch group. */
33734 static enum dispatch_group
33735 get_insn_group (rtx insn)
33737 enum dispatch_group group = get_mem_group (insn);
33741 if (is_branch (insn))
33742 return disp_branch;
33747 if (has_immediate (insn))
33750 if (is_prefetch (insn))
33751 return disp_prefetch;
33753 return disp_no_group;
33756 /* Count number of GROUP restricted instructions in a dispatch
33757 window WINDOW_LIST. */
33760 count_num_restricted (rtx insn, dispatch_windows *window_list)
33762 enum dispatch_group group = get_insn_group (insn);
33764 int num_imm_operand;
33765 int num_imm32_operand;
33766 int num_imm64_operand;
33768 if (group == disp_no_group)
33771 if (group == disp_imm)
33773 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33774 &num_imm64_operand);
33775 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
33776 || num_imm_operand + window_list->num_imm > MAX_IMM
33777 || (num_imm32_operand > 0
33778 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
33779 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
33780 || (num_imm64_operand > 0
33781 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
33782 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
33783 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
33784 && num_imm64_operand > 0
33785 && ((window_list->num_imm_64 > 0
33786 && window_list->num_insn >= 2)
33787 || window_list->num_insn >= 3)))
33793 if ((group == disp_load_store
33794 && (window_list->num_loads >= MAX_LOAD
33795 || window_list->num_stores >= MAX_STORE))
33796 || ((group == disp_load
33797 || group == disp_prefetch)
33798 && window_list->num_loads >= MAX_LOAD)
33799 || (group == disp_store
33800 && window_list->num_stores >= MAX_STORE))
33806 /* This function returns true if insn satisfies dispatch rules on the
33807 last window scheduled. */
33810 fits_dispatch_window (rtx insn)
33812 dispatch_windows *window_list = dispatch_window_list;
33813 dispatch_windows *window_list_next = dispatch_window_list->next;
33814 unsigned int num_restrict;
33815 enum dispatch_group group = get_insn_group (insn);
33816 enum insn_path path = get_insn_path (insn);
33819 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
33820 instructions should be given the lowest priority in the
33821 scheduling process in Haifa scheduler to make sure they will be
33822 scheduled in the same dispatch window as the refrence to them. */
33823 if (group == disp_jcc || group == disp_cmp)
33826 /* Check nonrestricted. */
33827 if (group == disp_no_group || group == disp_branch)
33830 /* Get last dispatch window. */
33831 if (window_list_next)
33832 window_list = window_list_next;
33834 if (window_list->window_num == 1)
33836 sum = window_list->prev->window_size + window_list->window_size;
33839 || (min_insn_size (insn) + sum) >= 48)
33840 /* Window 1 is full. Go for next window. */
33844 num_restrict = count_num_restricted (insn, window_list);
33846 if (num_restrict > num_allowable_groups[group])
33849 /* See if it fits in the first window. */
33850 if (window_list->window_num == 0)
33852 /* The first widow should have only single and double path
33854 if (path == path_double
33855 && (window_list->num_uops + 2) > MAX_INSN)
33857 else if (path != path_single)
33863 /* Add an instruction INSN with NUM_UOPS micro-operations to the
33864 dispatch window WINDOW_LIST. */
33867 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
33869 int byte_len = min_insn_size (insn);
33870 int num_insn = window_list->num_insn;
33872 sched_insn_info *window = window_list->window;
33873 enum dispatch_group group = get_insn_group (insn);
33874 enum insn_path path = get_insn_path (insn);
33875 int num_imm_operand;
33876 int num_imm32_operand;
33877 int num_imm64_operand;
33879 if (!window_list->violation && group != disp_cmp
33880 && !fits_dispatch_window (insn))
33881 window_list->violation = true;
33883 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33884 &num_imm64_operand);
33886 /* Initialize window with new instruction. */
33887 window[num_insn].insn = insn;
33888 window[num_insn].byte_len = byte_len;
33889 window[num_insn].group = group;
33890 window[num_insn].path = path;
33891 window[num_insn].imm_bytes = imm_size;
33893 window_list->window_size += byte_len;
33894 window_list->num_insn = num_insn + 1;
33895 window_list->num_uops = window_list->num_uops + num_uops;
33896 window_list->imm_size += imm_size;
33897 window_list->num_imm += num_imm_operand;
33898 window_list->num_imm_32 += num_imm32_operand;
33899 window_list->num_imm_64 += num_imm64_operand;
33901 if (group == disp_store)
33902 window_list->num_stores += 1;
33903 else if (group == disp_load
33904 || group == disp_prefetch)
33905 window_list->num_loads += 1;
33906 else if (group == disp_load_store)
33908 window_list->num_stores += 1;
33909 window_list->num_loads += 1;
33913 /* Adds a scheduled instruction, INSN, to the current dispatch window.
33914 If the total bytes of instructions or the number of instructions in
33915 the window exceed allowable, it allocates a new window. */
33918 add_to_dispatch_window (rtx insn)
33921 dispatch_windows *window_list;
33922 dispatch_windows *next_list;
33923 dispatch_windows *window0_list;
33924 enum insn_path path;
33925 enum dispatch_group insn_group;
33933 if (INSN_CODE (insn) < 0)
33936 byte_len = min_insn_size (insn);
33937 window_list = dispatch_window_list;
33938 next_list = window_list->next;
33939 path = get_insn_path (insn);
33940 insn_group = get_insn_group (insn);
33942 /* Get the last dispatch window. */
33944 window_list = dispatch_window_list->next;
33946 if (path == path_single)
33948 else if (path == path_double)
33951 insn_num_uops = (int) path;
33953 /* If current window is full, get a new window.
33954 Window number zero is full, if MAX_INSN uops are scheduled in it.
33955 Window number one is full, if window zero's bytes plus window
33956 one's bytes is 32, or if the bytes of the new instruction added
33957 to the total makes it greater than 48, or it has already MAX_INSN
33958 instructions in it. */
33959 num_insn = window_list->num_insn;
33960 num_uops = window_list->num_uops;
33961 window_num = window_list->window_num;
33962 insn_fits = fits_dispatch_window (insn);
33964 if (num_insn >= MAX_INSN
33965 || num_uops + insn_num_uops > MAX_INSN
33968 window_num = ~window_num & 1;
33969 window_list = allocate_next_window (window_num);
33972 if (window_num == 0)
33974 add_insn_window (insn, window_list, insn_num_uops);
33975 if (window_list->num_insn >= MAX_INSN
33976 && insn_group == disp_branch)
33978 process_end_window ();
33982 else if (window_num == 1)
33984 window0_list = window_list->prev;
33985 sum = window0_list->window_size + window_list->window_size;
33987 || (byte_len + sum) >= 48)
33989 process_end_window ();
33990 window_list = dispatch_window_list;
33993 add_insn_window (insn, window_list, insn_num_uops);
33996 gcc_unreachable ();
33998 if (is_end_basic_block (insn_group))
34000 /* End of basic block is reached do end-basic-block process. */
34001 process_end_window ();
34006 /* Print the dispatch window, WINDOW_NUM, to FILE. */
34008 DEBUG_FUNCTION static void
34009 debug_dispatch_window_file (FILE *file, int window_num)
34011 dispatch_windows *list;
34014 if (window_num == 0)
34015 list = dispatch_window_list;
34017 list = dispatch_window_list1;
34019 fprintf (file, "Window #%d:\n", list->window_num);
34020 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
34021 list->num_insn, list->num_uops, list->window_size);
34022 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34023 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
34025 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
34027 fprintf (file, " insn info:\n");
34029 for (i = 0; i < MAX_INSN; i++)
34031 if (!list->window[i].insn)
34033 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
34034 i, group_name[list->window[i].group],
34035 i, (void *)list->window[i].insn,
34036 i, list->window[i].path,
34037 i, list->window[i].byte_len,
34038 i, list->window[i].imm_bytes);
34042 /* Print to stdout a dispatch window. */
34044 DEBUG_FUNCTION void
34045 debug_dispatch_window (int window_num)
34047 debug_dispatch_window_file (stdout, window_num);
34050 /* Print INSN dispatch information to FILE. */
34052 DEBUG_FUNCTION static void
34053 debug_insn_dispatch_info_file (FILE *file, rtx insn)
34056 enum insn_path path;
34057 enum dispatch_group group;
34059 int num_imm_operand;
34060 int num_imm32_operand;
34061 int num_imm64_operand;
34063 if (INSN_CODE (insn) < 0)
34066 byte_len = min_insn_size (insn);
34067 path = get_insn_path (insn);
34068 group = get_insn_group (insn);
34069 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
34070 &num_imm64_operand);
34072 fprintf (file, " insn info:\n");
34073 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
34074 group_name[group], path, byte_len);
34075 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
34076 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
34079 /* Print to STDERR the status of the ready list with respect to
34080 dispatch windows. */
34082 DEBUG_FUNCTION void
34083 debug_ready_dispatch (void)
34086 int no_ready = number_in_ready ();
34088 fprintf (stdout, "Number of ready: %d\n", no_ready);
34090 for (i = 0; i < no_ready; i++)
34091 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
34094 /* This routine is the driver of the dispatch scheduler. */
34097 do_dispatch (rtx insn, int mode)
34099 if (mode == DISPATCH_INIT)
34100 init_dispatch_sched ();
34101 else if (mode == ADD_TO_DISPATCH_WINDOW)
34102 add_to_dispatch_window (insn);
34105 /* Return TRUE if Dispatch Scheduling is supported. */
34108 has_dispatch (rtx insn, int action)
34110 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
34116 case IS_DISPATCH_ON:
34121 return is_cmp (insn);
34123 case DISPATCH_VIOLATION:
34124 return dispatch_violation ();
34126 case FITS_DISPATCH_WINDOW:
34127 return fits_dispatch_window (insn);
34133 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
34134 place emms and femms instructions. */
34136 static enum machine_mode
34137 ix86_preferred_simd_mode (enum machine_mode mode)
34139 /* Disable double precision vectorizer if needed. */
34140 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
34143 if (!TARGET_AVX && !TARGET_SSE)
34149 return TARGET_AVX ? V8SFmode : V4SFmode;
34151 return TARGET_AVX ? V4DFmode : V2DFmode;
34167 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
34170 static unsigned int
34171 ix86_autovectorize_vector_sizes (void)
34173 return TARGET_AVX ? 32 | 16 : 0;
34176 /* Initialize the GCC target structure. */
34177 #undef TARGET_RETURN_IN_MEMORY
34178 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
34180 #undef TARGET_LEGITIMIZE_ADDRESS
34181 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
34183 #undef TARGET_ATTRIBUTE_TABLE
34184 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
34185 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34186 # undef TARGET_MERGE_DECL_ATTRIBUTES
34187 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
34190 #undef TARGET_COMP_TYPE_ATTRIBUTES
34191 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
34193 #undef TARGET_INIT_BUILTINS
34194 #define TARGET_INIT_BUILTINS ix86_init_builtins
34195 #undef TARGET_BUILTIN_DECL
34196 #define TARGET_BUILTIN_DECL ix86_builtin_decl
34197 #undef TARGET_EXPAND_BUILTIN
34198 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
34200 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
34201 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
34202 ix86_builtin_vectorized_function
34204 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
34205 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
34207 #undef TARGET_BUILTIN_RECIPROCAL
34208 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
34210 #undef TARGET_ASM_FUNCTION_EPILOGUE
34211 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
34213 #undef TARGET_ENCODE_SECTION_INFO
34214 #ifndef SUBTARGET_ENCODE_SECTION_INFO
34215 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
34217 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
34220 #undef TARGET_ASM_OPEN_PAREN
34221 #define TARGET_ASM_OPEN_PAREN ""
34222 #undef TARGET_ASM_CLOSE_PAREN
34223 #define TARGET_ASM_CLOSE_PAREN ""
34225 #undef TARGET_ASM_BYTE_OP
34226 #define TARGET_ASM_BYTE_OP ASM_BYTE
34228 #undef TARGET_ASM_ALIGNED_HI_OP
34229 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
34230 #undef TARGET_ASM_ALIGNED_SI_OP
34231 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
34233 #undef TARGET_ASM_ALIGNED_DI_OP
34234 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
34237 #undef TARGET_PROFILE_BEFORE_PROLOGUE
34238 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
34240 #undef TARGET_ASM_UNALIGNED_HI_OP
34241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
34242 #undef TARGET_ASM_UNALIGNED_SI_OP
34243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
34244 #undef TARGET_ASM_UNALIGNED_DI_OP
34245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
34247 #undef TARGET_PRINT_OPERAND
34248 #define TARGET_PRINT_OPERAND ix86_print_operand
34249 #undef TARGET_PRINT_OPERAND_ADDRESS
34250 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
34251 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
34252 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
34253 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
34254 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
34256 #undef TARGET_SCHED_INIT_GLOBAL
34257 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
34258 #undef TARGET_SCHED_ADJUST_COST
34259 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
34260 #undef TARGET_SCHED_ISSUE_RATE
34261 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
34262 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
34263 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
34264 ia32_multipass_dfa_lookahead
34266 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
34267 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
34270 #undef TARGET_HAVE_TLS
34271 #define TARGET_HAVE_TLS true
34273 #undef TARGET_CANNOT_FORCE_CONST_MEM
34274 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
34275 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
34276 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
34278 #undef TARGET_DELEGITIMIZE_ADDRESS
34279 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
34281 #undef TARGET_MS_BITFIELD_LAYOUT_P
34282 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
34285 #undef TARGET_BINDS_LOCAL_P
34286 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
34288 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
34289 #undef TARGET_BINDS_LOCAL_P
34290 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
34293 #undef TARGET_ASM_OUTPUT_MI_THUNK
34294 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
34295 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
34296 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
34298 #undef TARGET_ASM_FILE_START
34299 #define TARGET_ASM_FILE_START x86_file_start
34301 #undef TARGET_DEFAULT_TARGET_FLAGS
34302 #define TARGET_DEFAULT_TARGET_FLAGS \
34304 | TARGET_SUBTARGET_DEFAULT \
34305 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
34308 #undef TARGET_HANDLE_OPTION
34309 #define TARGET_HANDLE_OPTION ix86_handle_option
34311 #undef TARGET_OPTION_OVERRIDE
34312 #define TARGET_OPTION_OVERRIDE ix86_option_override
34313 #undef TARGET_OPTION_OPTIMIZATION_TABLE
34314 #define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
34315 #undef TARGET_OPTION_INIT_STRUCT
34316 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
34318 #undef TARGET_REGISTER_MOVE_COST
34319 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
34320 #undef TARGET_MEMORY_MOVE_COST
34321 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
34322 #undef TARGET_RTX_COSTS
34323 #define TARGET_RTX_COSTS ix86_rtx_costs
34324 #undef TARGET_ADDRESS_COST
34325 #define TARGET_ADDRESS_COST ix86_address_cost
34327 #undef TARGET_FIXED_CONDITION_CODE_REGS
34328 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
34329 #undef TARGET_CC_MODES_COMPATIBLE
34330 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
34332 #undef TARGET_MACHINE_DEPENDENT_REORG
34333 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
34335 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
34336 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
34338 #undef TARGET_BUILD_BUILTIN_VA_LIST
34339 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
34341 #undef TARGET_ENUM_VA_LIST_P
34342 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
34344 #undef TARGET_FN_ABI_VA_LIST
34345 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
34347 #undef TARGET_CANONICAL_VA_LIST_TYPE
34348 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
34350 #undef TARGET_EXPAND_BUILTIN_VA_START
34351 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
34353 #undef TARGET_MD_ASM_CLOBBERS
34354 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
34356 #undef TARGET_PROMOTE_PROTOTYPES
34357 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
34358 #undef TARGET_STRUCT_VALUE_RTX
34359 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
34360 #undef TARGET_SETUP_INCOMING_VARARGS
34361 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
34362 #undef TARGET_MUST_PASS_IN_STACK
34363 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
34364 #undef TARGET_FUNCTION_ARG_ADVANCE
34365 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
34366 #undef TARGET_FUNCTION_ARG
34367 #define TARGET_FUNCTION_ARG ix86_function_arg
34368 #undef TARGET_PASS_BY_REFERENCE
34369 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
34370 #undef TARGET_INTERNAL_ARG_POINTER
34371 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
34372 #undef TARGET_UPDATE_STACK_BOUNDARY
34373 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
34374 #undef TARGET_GET_DRAP_RTX
34375 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
34376 #undef TARGET_STRICT_ARGUMENT_NAMING
34377 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
34378 #undef TARGET_STATIC_CHAIN
34379 #define TARGET_STATIC_CHAIN ix86_static_chain
34380 #undef TARGET_TRAMPOLINE_INIT
34381 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
34382 #undef TARGET_RETURN_POPS_ARGS
34383 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
34385 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
34386 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
34388 #undef TARGET_SCALAR_MODE_SUPPORTED_P
34389 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
34391 #undef TARGET_VECTOR_MODE_SUPPORTED_P
34392 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
34394 #undef TARGET_C_MODE_FOR_SUFFIX
34395 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
34398 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
34399 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
34402 #ifdef SUBTARGET_INSERT_ATTRIBUTES
34403 #undef TARGET_INSERT_ATTRIBUTES
34404 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
34407 #undef TARGET_MANGLE_TYPE
34408 #define TARGET_MANGLE_TYPE ix86_mangle_type
34410 #undef TARGET_STACK_PROTECT_FAIL
34411 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
34413 #undef TARGET_SUPPORTS_SPLIT_STACK
34414 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
34416 #undef TARGET_FUNCTION_VALUE
34417 #define TARGET_FUNCTION_VALUE ix86_function_value
34419 #undef TARGET_FUNCTION_VALUE_REGNO_P
34420 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
34422 #undef TARGET_SECONDARY_RELOAD
34423 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
34425 #undef TARGET_PREFERRED_RELOAD_CLASS
34426 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
34427 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
34428 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
34429 #undef TARGET_CLASS_LIKELY_SPILLED_P
34430 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
34432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
34433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
34434 ix86_builtin_vectorization_cost
34435 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
34436 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
34437 ix86_vectorize_builtin_vec_perm
34438 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
34439 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
34440 ix86_vectorize_builtin_vec_perm_ok
34441 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
34442 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
34443 ix86_preferred_simd_mode
34444 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
34445 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
34446 ix86_autovectorize_vector_sizes
34448 #undef TARGET_SET_CURRENT_FUNCTION
34449 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
34451 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
34452 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
34454 #undef TARGET_OPTION_SAVE
34455 #define TARGET_OPTION_SAVE ix86_function_specific_save
34457 #undef TARGET_OPTION_RESTORE
34458 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
34460 #undef TARGET_OPTION_PRINT
34461 #define TARGET_OPTION_PRINT ix86_function_specific_print
34463 #undef TARGET_CAN_INLINE_P
34464 #define TARGET_CAN_INLINE_P ix86_can_inline_p
34466 #undef TARGET_EXPAND_TO_RTL_HOOK
34467 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
34469 #undef TARGET_LEGITIMATE_ADDRESS_P
34470 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
34472 #undef TARGET_IRA_COVER_CLASSES
34473 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
34475 #undef TARGET_FRAME_POINTER_REQUIRED
34476 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
34478 #undef TARGET_CAN_ELIMINATE
34479 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
34481 #undef TARGET_EXTRA_LIVE_ON_ENTRY
34482 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
34484 #undef TARGET_ASM_CODE_END
34485 #define TARGET_ASM_CODE_END ix86_code_end
34487 struct gcc_target targetm = TARGET_INITIALIZER;
34489 #include "gt-i386.h"