1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost = {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs *ix86_cost = &pentium_cost;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
188 const int x86_zero_extend_with_and = m_486 | m_PENT;
189 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
190 const int x86_double_with_add = ~m_386;
191 const int x86_use_bit_test = m_386;
192 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
193 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194 const int x86_use_any_reg = m_486;
195 const int x86_cmove = m_PPRO | m_ATHLON;
196 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197 const int x86_use_sahf = m_PPRO | m_K6;
198 const int x86_partial_reg_stall = m_PPRO;
199 const int x86_use_loop = m_K6;
200 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
201 const int x86_use_mov0 = m_K6;
202 const int x86_use_cltd = ~(m_PENT | m_K6);
203 const int x86_read_modify_write = ~m_PENT;
204 const int x86_read_modify = ~(m_PENT | m_PPRO);
205 const int x86_split_long_moves = m_PPRO;
206 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
207 const int x86_single_stringop = m_386;
208 const int x86_qimode_math = ~(0);
209 const int x86_promote_qi_regs = 0;
210 const int x86_himode_math = ~(m_PPRO);
211 const int x86_promote_hi_regs = m_PPRO;
212 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214 const int x86_add_esp_4 = m_ATHLON | m_K6;
215 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
216 const int x86_integer_DFmode_moves = ~m_ATHLON;
217 const int x86_partial_reg_dependency = m_ATHLON;
218 const int x86_memory_mismatch_stall = m_ATHLON;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
232 AREG, DREG, CREG, BREG,
234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
248 /* The "default" register map. */
250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def *ix86_compare_op0 = NULL_RTX;
326 struct rtx_def *ix86_compare_op1 = NULL_RTX;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu;
341 /* which instruction set architecture to use. */
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order;
350 static char regs_allocated[FIRST_PSEUDO_REGISTER];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string;
355 /* ix86_regparm_string as a number */
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost;
374 const char *ix86_branch_cost_string;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs;
378 const char *ix86_align_funcs_string;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps;
386 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
389 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
390 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
392 static rtx gen_push PARAMS ((rtx));
393 static int memory_address_length PARAMS ((rtx addr));
394 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
395 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
396 static int ix86_safe_length PARAMS ((rtx));
397 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
398 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
399 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
400 static void ix86_dump_ppro_packet PARAMS ((FILE *));
401 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
402 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
404 static void ix86_init_machine_status PARAMS ((struct function *));
405 static void ix86_mark_machine_status PARAMS ((struct function *));
406 static void ix86_free_machine_status PARAMS ((struct function *));
407 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408 static int ix86_safe_length_prefix PARAMS ((rtx));
409 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
411 static int ix86_nsaved_regs PARAMS((void));
412 static void ix86_emit_save_regs PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
414 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
415 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
417 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
421 rtx base, index, disp;
425 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
427 struct builtin_description;
428 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
430 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
432 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
437 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
438 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
442 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
444 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
445 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
446 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
447 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
449 /* Sometimes certain combinations of command options do not make
450 sense on a particular target machine. You can define a macro
451 `OVERRIDE_OPTIONS' to take account of this. This macro, if
452 defined, is executed once just after all the command options have
455 Don't use this macro to turn on various extra optimizations for
456 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
461 /* Comes from final.c -- no real reason to change it. */
462 #define MAX_CODE_ALIGN 16
466 struct processor_costs *cost; /* Processor costs */
467 int target_enable; /* Target flags to enable. */
468 int target_disable; /* Target flags to disable. */
469 int align_loop; /* Default alignments. */
474 const processor_target_table[PROCESSOR_max] =
476 {&i386_cost, 0, 0, 2, 2, 2, 1},
477 {&i486_cost, 0, 0, 4, 4, 4, 1},
478 {&pentium_cost, 0, 0, -4, -4, -4, 1},
479 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
480 {&k6_cost, 0, 0, -5, -5, 4, 1},
481 {&athlon_cost, 0, 0, 4, -4, 4, 1}
486 const char *name; /* processor name or nickname. */
487 enum processor_type processor;
489 const processor_alias_table[] =
491 {"i386", PROCESSOR_I386},
492 {"i486", PROCESSOR_I486},
493 {"i586", PROCESSOR_PENTIUM},
494 {"pentium", PROCESSOR_PENTIUM},
495 {"i686", PROCESSOR_PENTIUMPRO},
496 {"pentiumpro", PROCESSOR_PENTIUMPRO},
497 {"k6", PROCESSOR_K6},
498 {"athlon", PROCESSOR_ATHLON},
501 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
503 #ifdef SUBTARGET_OVERRIDE_OPTIONS
504 SUBTARGET_OVERRIDE_OPTIONS;
507 ix86_arch = PROCESSOR_I386;
508 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
510 if (ix86_arch_string != 0)
513 for (i = 0; i < pta_size; i++)
514 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
516 ix86_arch = processor_alias_table[i].processor;
517 /* Default cpu tuning to the architecture. */
518 ix86_cpu = ix86_arch;
522 error ("bad value (%s) for -march= switch", ix86_arch_string);
525 if (ix86_cpu_string != 0)
528 for (i = 0; i < pta_size; i++)
529 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
531 ix86_cpu = processor_alias_table[i].processor;
535 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
538 ix86_cost = processor_target_table[ix86_cpu].cost;
539 target_flags |= processor_target_table[ix86_cpu].target_enable;
540 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
542 /* Arrange to set up i386_stack_locals for all functions. */
543 init_machine_status = ix86_init_machine_status;
544 mark_machine_status = ix86_mark_machine_status;
545 free_machine_status = ix86_free_machine_status;
547 /* Validate registers in register allocation order. */
548 if (ix86_reg_alloc_order)
551 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
557 case 'a': regno = 0; break;
558 case 'd': regno = 1; break;
559 case 'c': regno = 2; break;
560 case 'b': regno = 3; break;
561 case 'S': regno = 4; break;
562 case 'D': regno = 5; break;
563 case 'B': regno = 6; break;
565 default: fatal ("Register '%c' is unknown", ch);
568 if (regs_allocated[regno])
569 fatal ("Register '%c' already specified in allocation order", ch);
571 regs_allocated[regno] = 1;
575 /* Validate -mregparm= value. */
576 if (ix86_regparm_string)
578 ix86_regparm = atoi (ix86_regparm_string);
579 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
580 fatal ("-mregparm=%d is not between 0 and %d",
581 ix86_regparm, REGPARM_MAX);
584 /* Validate -malign-loops= value, or provide default. */
585 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
586 if (ix86_align_loops_string)
588 ix86_align_loops = atoi (ix86_align_loops_string);
589 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
590 fatal ("-malign-loops=%d is not between 0 and %d",
591 ix86_align_loops, MAX_CODE_ALIGN);
594 /* Validate -malign-jumps= value, or provide default. */
595 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
596 if (ix86_align_jumps_string)
598 ix86_align_jumps = atoi (ix86_align_jumps_string);
599 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
600 fatal ("-malign-jumps=%d is not between 0 and %d",
601 ix86_align_jumps, MAX_CODE_ALIGN);
604 /* Validate -malign-functions= value, or provide default. */
605 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
606 if (ix86_align_funcs_string)
608 ix86_align_funcs = atoi (ix86_align_funcs_string);
609 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
610 fatal ("-malign-functions=%d is not between 0 and %d",
611 ix86_align_funcs, MAX_CODE_ALIGN);
614 /* Validate -mpreferred-stack-boundary= value, or provide default.
615 The default of 128 bits is for Pentium III's SSE __m128. */
616 ix86_preferred_stack_boundary = 128;
617 if (ix86_preferred_stack_boundary_string)
619 int i = atoi (ix86_preferred_stack_boundary_string);
621 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
622 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
625 /* Validate -mbranch-cost= value, or provide default. */
626 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
627 if (ix86_branch_cost_string)
629 ix86_branch_cost = atoi (ix86_branch_cost_string);
630 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
631 fatal ("-mbranch-cost=%d is not between 0 and 5",
635 /* Keep nonleaf frame pointers. */
636 if (TARGET_OMIT_LEAF_FRAME_POINTER)
637 flag_omit_frame_pointer = 1;
639 /* If we're doing fast math, we don't care about comparison order
640 wrt NaNs. This lets us use a shorter comparison sequence. */
642 target_flags &= ~MASK_IEEE_FP;
644 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
647 target_flags |= MASK_MMX;
650 /* A C statement (sans semicolon) to choose the order in which to
651 allocate hard registers for pseudo-registers local to a basic
654 Store the desired register order in the array `reg_alloc_order'.
655 Element 0 should be the register to allocate first; element 1, the
656 next register; and so on.
658 The macro body should not assume anything about the contents of
659 `reg_alloc_order' before execution of the macro.
661 On most machines, it is not necessary to define this macro. */
664 order_regs_for_local_alloc ()
668 /* User specified the register allocation order. */
670 if (ix86_reg_alloc_order)
672 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
678 case 'a': regno = 0; break;
679 case 'd': regno = 1; break;
680 case 'c': regno = 2; break;
681 case 'b': regno = 3; break;
682 case 'S': regno = 4; break;
683 case 'D': regno = 5; break;
684 case 'B': regno = 6; break;
687 reg_alloc_order[order++] = regno;
690 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
692 if (! regs_allocated[i])
693 reg_alloc_order[order++] = i;
697 /* If user did not specify a register allocation order, use natural order. */
700 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
701 reg_alloc_order[i] = i;
706 optimization_options (level, size)
708 int size ATTRIBUTE_UNUSED;
710 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
711 make the problem with not enough registers even worse. */
712 #ifdef INSN_SCHEDULING
714 flag_schedule_insns = 0;
718 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
719 attribute for DECL. The attributes in ATTRIBUTES have previously been
723 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
724 tree decl ATTRIBUTE_UNUSED;
725 tree attributes ATTRIBUTE_UNUSED;
726 tree identifier ATTRIBUTE_UNUSED;
727 tree args ATTRIBUTE_UNUSED;
732 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
733 attribute for TYPE. The attributes in ATTRIBUTES have previously been
737 ix86_valid_type_attribute_p (type, attributes, identifier, args)
739 tree attributes ATTRIBUTE_UNUSED;
743 if (TREE_CODE (type) != FUNCTION_TYPE
744 && TREE_CODE (type) != METHOD_TYPE
745 && TREE_CODE (type) != FIELD_DECL
746 && TREE_CODE (type) != TYPE_DECL)
749 /* Stdcall attribute says callee is responsible for popping arguments
750 if they are not variable. */
751 if (is_attribute_p ("stdcall", identifier))
752 return (args == NULL_TREE);
754 /* Cdecl attribute says the callee is a normal C declaration. */
755 if (is_attribute_p ("cdecl", identifier))
756 return (args == NULL_TREE);
758 /* Regparm attribute specifies how many integer arguments are to be
759 passed in registers. */
760 if (is_attribute_p ("regparm", identifier))
764 if (! args || TREE_CODE (args) != TREE_LIST
765 || TREE_CHAIN (args) != NULL_TREE
766 || TREE_VALUE (args) == NULL_TREE)
769 cst = TREE_VALUE (args);
770 if (TREE_CODE (cst) != INTEGER_CST)
773 if (compare_tree_int (cst, REGPARM_MAX) > 0)
782 /* Return 0 if the attributes for two types are incompatible, 1 if they
783 are compatible, and 2 if they are nearly compatible (which causes a
784 warning to be generated). */
787 ix86_comp_type_attributes (type1, type2)
791 /* Check for mismatch of non-default calling convention. */
792 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
794 if (TREE_CODE (type1) != FUNCTION_TYPE)
797 /* Check for mismatched return types (cdecl vs stdcall). */
798 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
799 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
804 /* Value is the number of bytes of arguments automatically
805 popped when returning from a subroutine call.
806 FUNDECL is the declaration node of the function (as a tree),
807 FUNTYPE is the data type of the function (as a tree),
808 or for a library call it is an identifier node for the subroutine name.
809 SIZE is the number of bytes of arguments passed on the stack.
811 On the 80386, the RTD insn may be used to pop them if the number
812 of args is fixed, but if the number is variable then the caller
813 must pop them all. RTD can't be used for library calls now
814 because the library is compiled with the Unix compiler.
815 Use of RTD is a selectable option, since it is incompatible with
816 standard Unix calling sequences. If the option is not selected,
817 the caller must always pop the args.
819 The attribute stdcall is equivalent to RTD on a per module basis. */
822 ix86_return_pops_args (fundecl, funtype, size)
827 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
829 /* Cdecl functions override -mrtd, and never pop the stack. */
830 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
832 /* Stdcall functions will pop the stack if not variable args. */
833 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
837 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
838 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
843 /* Lose any fake structure return argument. */
844 if (aggregate_value_p (TREE_TYPE (funtype)))
845 return GET_MODE_SIZE (Pmode);
850 /* Argument support functions. */
852 /* Initialize a variable CUM of type CUMULATIVE_ARGS
853 for a call to a function whose data type is FNTYPE.
854 For a library call, FNTYPE is 0. */
857 init_cumulative_args (cum, fntype, libname)
858 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
859 tree fntype; /* tree ptr for function decl */
860 rtx libname; /* SYMBOL_REF of library name or 0 */
862 static CUMULATIVE_ARGS zero_cum;
863 tree param, next_param;
865 if (TARGET_DEBUG_ARG)
867 fprintf (stderr, "\ninit_cumulative_args (");
869 fprintf (stderr, "fntype code = %s, ret code = %s",
870 tree_code_name[(int) TREE_CODE (fntype)],
871 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
873 fprintf (stderr, "no fntype");
876 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
881 /* Set up the number of registers to use for passing arguments. */
882 cum->nregs = ix86_regparm;
885 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
888 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
891 /* Determine if this function has variable arguments. This is
892 indicated by the last argument being 'void_type_mode' if there
893 are no variable arguments. If there are variable arguments, then
894 we won't pass anything in registers */
898 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
899 param != 0; param = next_param)
901 next_param = TREE_CHAIN (param);
902 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
907 if (TARGET_DEBUG_ARG)
908 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
913 /* Update the data in CUM to advance over an argument
914 of mode MODE and data type TYPE.
915 (TYPE is null for libcalls where that information may not be available.) */
918 function_arg_advance (cum, mode, type, named)
919 CUMULATIVE_ARGS *cum; /* current arg information */
920 enum machine_mode mode; /* current arg mode */
921 tree type; /* type of the argument or 0 if lib support */
922 int named; /* whether or not the argument was named */
925 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
926 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
928 if (TARGET_DEBUG_ARG)
930 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
931 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
946 /* Define where to put the arguments to a function.
947 Value is zero to push the argument on the stack,
948 or a hard register in which to store the argument.
950 MODE is the argument's machine mode.
951 TYPE is the data type of the argument (as a tree).
952 This is null for libcalls where that information may
954 CUM is a variable of type CUMULATIVE_ARGS which gives info about
955 the preceding args and about the function being called.
956 NAMED is nonzero if this argument is a named parameter
957 (otherwise it is an extra parameter matching an ellipsis). */
960 function_arg (cum, mode, type, named)
961 CUMULATIVE_ARGS *cum; /* current arg information */
962 enum machine_mode mode; /* current arg mode */
963 tree type; /* type of the argument or 0 if lib support */
964 int named; /* != 0 for normal args, == 0 for ... args */
968 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
969 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
973 /* For now, pass fp/complex values on the stack. */
982 if (words <= cum->nregs)
983 ret = gen_rtx_REG (mode, cum->regno);
987 if (TARGET_DEBUG_ARG)
990 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
991 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
994 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
996 fprintf (stderr, ", stack");
998 fprintf (stderr, " )\n");
1005 /* Return nonzero if OP is (const_int 1), else return zero. */
1008 const_int_1_operand (op, mode)
1010 enum machine_mode mode ATTRIBUTE_UNUSED;
1012 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1015 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1016 reference and a constant. */
1019 symbolic_operand (op, mode)
1021 enum machine_mode mode ATTRIBUTE_UNUSED;
1023 switch (GET_CODE (op))
1031 if (GET_CODE (op) == SYMBOL_REF
1032 || GET_CODE (op) == LABEL_REF
1033 || (GET_CODE (op) == UNSPEC
1034 && XINT (op, 1) >= 6
1035 && XINT (op, 1) <= 7))
1037 if (GET_CODE (op) != PLUS
1038 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1042 if (GET_CODE (op) == SYMBOL_REF
1043 || GET_CODE (op) == LABEL_REF)
1045 /* Only @GOTOFF gets offsets. */
1046 if (GET_CODE (op) != UNSPEC
1047 || XINT (op, 1) != 7)
1050 op = XVECEXP (op, 0, 0);
1051 if (GET_CODE (op) == SYMBOL_REF
1052 || GET_CODE (op) == LABEL_REF)
1061 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1064 pic_symbolic_operand (op, mode)
1066 enum machine_mode mode ATTRIBUTE_UNUSED;
1068 if (GET_CODE (op) == CONST)
1071 if (GET_CODE (op) == UNSPEC)
1073 if (GET_CODE (op) != PLUS
1074 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1077 if (GET_CODE (op) == UNSPEC)
1083 /* Test for a valid operand for a call instruction. Don't allow the
1084 arg pointer register or virtual regs since they may decay into
1085 reg + const, which the patterns can't handle. */
1088 call_insn_operand (op, mode)
1090 enum machine_mode mode ATTRIBUTE_UNUSED;
1092 /* Disallow indirect through a virtual register. This leads to
1093 compiler aborts when trying to eliminate them. */
1094 if (GET_CODE (op) == REG
1095 && (op == arg_pointer_rtx
1096 || op == frame_pointer_rtx
1097 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1098 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1101 /* Disallow `call 1234'. Due to varying assembler lameness this
1102 gets either rejected or translated to `call .+1234'. */
1103 if (GET_CODE (op) == CONST_INT)
1106 /* Explicitly allow SYMBOL_REF even if pic. */
1107 if (GET_CODE (op) == SYMBOL_REF)
1110 /* Half-pic doesn't allow anything but registers and constants.
1111 We've just taken care of the later. */
1113 return register_operand (op, Pmode);
1115 /* Otherwise we can allow any general_operand in the address. */
1116 return general_operand (op, Pmode);
1120 constant_call_address_operand (op, mode)
1122 enum machine_mode mode ATTRIBUTE_UNUSED;
1124 if (GET_CODE (op) == CONST
1125 && GET_CODE (XEXP (op, 0)) == PLUS
1126 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1127 op = XEXP (XEXP (op, 0), 0);
1128 return GET_CODE (op) == SYMBOL_REF;
1131 /* Match exactly zero and one. */
1134 const0_operand (op, mode)
1136 enum machine_mode mode;
1138 return op == CONST0_RTX (mode);
1142 const1_operand (op, mode)
1144 enum machine_mode mode ATTRIBUTE_UNUSED;
1146 return op == const1_rtx;
1149 /* Match 2, 4, or 8. Used for leal multiplicands. */
1152 const248_operand (op, mode)
1154 enum machine_mode mode ATTRIBUTE_UNUSED;
1156 return (GET_CODE (op) == CONST_INT
1157 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1160 /* True if this is a constant appropriate for an increment or decremenmt. */
1163 incdec_operand (op, mode)
1165 enum machine_mode mode;
1167 if (op == const1_rtx || op == constm1_rtx)
1169 if (GET_CODE (op) != CONST_INT)
1171 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1173 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1175 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1180 /* Return false if this is the stack pointer, or any other fake
1181 register eliminable to the stack pointer. Otherwise, this is
1184 This is used to prevent esp from being used as an index reg.
1185 Which would only happen in pathological cases. */
1188 reg_no_sp_operand (op, mode)
1190 enum machine_mode mode;
1193 if (GET_CODE (t) == SUBREG)
1195 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1198 return register_operand (op, mode);
1202 mmx_reg_operand (op, mode)
1204 enum machine_mode mode ATTRIBUTE_UNUSED;
1206 return MMX_REG_P (op);
1209 /* Return false if this is any eliminable register. Otherwise
1213 general_no_elim_operand (op, mode)
1215 enum machine_mode mode;
1218 if (GET_CODE (t) == SUBREG)
1220 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1221 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1222 || t == virtual_stack_dynamic_rtx)
1225 return general_operand (op, mode);
1228 /* Return false if this is any eliminable register. Otherwise
1229 register_operand or const_int. */
1232 nonmemory_no_elim_operand (op, mode)
1234 enum machine_mode mode;
1237 if (GET_CODE (t) == SUBREG)
1239 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1240 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1241 || t == virtual_stack_dynamic_rtx)
1244 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1247 /* Return true if op is a Q_REGS class register. */
1250 q_regs_operand (op, mode)
1252 enum machine_mode mode;
1254 if (mode != VOIDmode && GET_MODE (op) != mode)
1256 if (GET_CODE (op) == SUBREG)
1257 op = SUBREG_REG (op);
1258 return QI_REG_P (op);
1261 /* Return true if op is a NON_Q_REGS class register. */
1264 non_q_regs_operand (op, mode)
1266 enum machine_mode mode;
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1270 if (GET_CODE (op) == SUBREG)
1271 op = SUBREG_REG (op);
1272 return NON_QI_REG_P (op);
1275 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1278 sse_comparison_operator (op, mode)
1280 enum machine_mode mode ATTRIBUTE_UNUSED;
1282 enum rtx_code code = GET_CODE (op);
1283 return code == EQ || code == LT || code == LE || code == UNORDERED;
1285 /* Return 1 if OP is a valid comparison operator in valid mode. */
1287 ix86_comparison_operator (op, mode)
1289 enum machine_mode mode;
1291 enum machine_mode inmode;
1292 enum rtx_code code = GET_CODE (op);
1293 if (mode != VOIDmode && GET_MODE (op) != mode)
1295 if (GET_RTX_CLASS (code) != '<')
1297 inmode = GET_MODE (XEXP (op, 0));
1299 if (inmode == CCFPmode || inmode == CCFPUmode)
1301 enum rtx_code second_code, bypass_code;
1302 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1303 return (bypass_code == NIL && second_code == NIL);
1310 if (inmode == CCmode || inmode == CCGCmode
1311 || inmode == CCGOCmode || inmode == CCNOmode)
1314 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1315 if (inmode == CCmode)
1319 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1327 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1330 fcmov_comparison_operator (op, mode)
1332 enum machine_mode mode;
1334 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
1335 enum rtx_code code = GET_CODE (op);
1336 if (mode != VOIDmode && GET_MODE (op) != mode)
1338 if (GET_RTX_CLASS (code) != '<')
1340 inmode = GET_MODE (XEXP (op, 0));
1341 if (inmode == CCFPmode || inmode == CCFPUmode)
1343 enum rtx_code second_code, bypass_code;
1344 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
1345 if (bypass_code != NIL || second_code != NIL)
1347 code = ix86_fp_compare_code_to_integer (code);
1349 /* i387 supports just limited amount of conditional codes. */
1352 case LTU: case GTU: case LEU: case GEU:
1353 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
1356 case ORDERED: case UNORDERED:
1364 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1367 promotable_binary_operator (op, mode)
1369 enum machine_mode mode ATTRIBUTE_UNUSED;
1371 switch (GET_CODE (op))
1374 /* Modern CPUs have same latency for HImode and SImode multiply,
1375 but 386 and 486 do HImode multiply faster. */
1376 return ix86_cpu > PROCESSOR_I486;
1388 /* Nearly general operand, but accept any const_double, since we wish
1389 to be able to drop them into memory rather than have them get pulled
1393 cmp_fp_expander_operand (op, mode)
1395 enum machine_mode mode;
1397 if (mode != VOIDmode && mode != GET_MODE (op))
1399 if (GET_CODE (op) == CONST_DOUBLE)
1401 return general_operand (op, mode);
1404 /* Match an SI or HImode register for a zero_extract. */
1407 ext_register_operand (op, mode)
1409 enum machine_mode mode ATTRIBUTE_UNUSED;
1411 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1413 return register_operand (op, VOIDmode);
1416 /* Return 1 if this is a valid binary floating-point operation.
1417 OP is the expression matched, and MODE is its mode. */
1420 binary_fp_operator (op, mode)
1422 enum machine_mode mode;
1424 if (mode != VOIDmode && mode != GET_MODE (op))
1427 switch (GET_CODE (op))
1433 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1441 mult_operator(op, mode)
1443 enum machine_mode mode ATTRIBUTE_UNUSED;
1445 return GET_CODE (op) == MULT;
1449 div_operator(op, mode)
1451 enum machine_mode mode ATTRIBUTE_UNUSED;
1453 return GET_CODE (op) == DIV;
1457 arith_or_logical_operator (op, mode)
1459 enum machine_mode mode;
1461 return ((mode == VOIDmode || GET_MODE (op) == mode)
1462 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1463 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1466 /* Returns 1 if OP is memory operand with a displacement. */
1469 memory_displacement_operand (op, mode)
1471 enum machine_mode mode;
1473 struct ix86_address parts;
1475 if (! memory_operand (op, mode))
1478 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1481 return parts.disp != NULL_RTX;
1484 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1485 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1487 ??? It seems likely that this will only work because cmpsi is an
1488 expander, and no actual insns use this. */
1491 cmpsi_operand (op, mode)
1493 enum machine_mode mode;
1495 if (general_operand (op, mode))
1498 if (GET_CODE (op) == AND
1499 && GET_MODE (op) == SImode
1500 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1501 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1502 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1503 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1504 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1505 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1511 /* Returns 1 if OP is memory operand that can not be represented by the
1515 long_memory_operand (op, mode)
1517 enum machine_mode mode;
1519 if (! memory_operand (op, mode))
1522 return memory_address_length (op) != 0;
1525 /* Return nonzero if the rtx is known aligned. */
1528 aligned_operand (op, mode)
1530 enum machine_mode mode;
1532 struct ix86_address parts;
1534 if (!general_operand (op, mode))
1537 /* Registers and immediate operands are always "aligned". */
1538 if (GET_CODE (op) != MEM)
1541 /* Don't even try to do any aligned optimizations with volatiles. */
1542 if (MEM_VOLATILE_P (op))
1547 /* Pushes and pops are only valid on the stack pointer. */
1548 if (GET_CODE (op) == PRE_DEC
1549 || GET_CODE (op) == POST_INC)
1552 /* Decode the address. */
1553 if (! ix86_decompose_address (op, &parts))
1556 /* Look for some component that isn't known to be aligned. */
1560 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1565 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1570 if (GET_CODE (parts.disp) != CONST_INT
1571 || (INTVAL (parts.disp) & 3) != 0)
1575 /* Didn't find one -- this must be an aligned address. */
1579 /* Return true if the constant is something that can be loaded with
1580 a special instruction. Only handle 0.0 and 1.0; others are less
1584 standard_80387_constant_p (x)
1587 if (GET_CODE (x) != CONST_DOUBLE)
1590 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1596 if (setjmp (handler))
1599 set_float_handler (handler);
1600 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1601 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1602 is1 = REAL_VALUES_EQUAL (d, dconst1);
1603 set_float_handler (NULL_PTR);
1611 /* Note that on the 80387, other constants, such as pi,
1612 are much slower to load as standard constants
1613 than to load from doubles in memory! */
1614 /* ??? Not true on K6: all constants are equal cost. */
1621 /* Returns 1 if OP contains a symbol reference */
1624 symbolic_reference_mentioned_p (op)
1627 register const char *fmt;
1630 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1633 fmt = GET_RTX_FORMAT (GET_CODE (op));
1634 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1640 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1641 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1645 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1652 /* Return 1 if it is appropriate to emit `ret' instructions in the
1653 body of a function. Do this only if the epilogue is simple, needing a
1654 couple of insns. Prior to reloading, we can't tell how many registers
1655 must be saved, so return 0 then. Return 0 if there is no frame
1656 marker to de-allocate.
1658 If NON_SAVING_SETJMP is defined and true, then it is not possible
1659 for the epilogue to be simple, so return 0. This is a special case
1660 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1661 until final, but jump_optimize may need to know sooner if a
1665 ix86_can_use_return_insn_p ()
1667 HOST_WIDE_INT tsize;
1670 #ifdef NON_SAVING_SETJMP
1671 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1674 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1675 if (profile_block_flag == 2)
1679 if (! reload_completed || frame_pointer_needed)
1682 /* Don't allow more than 32 pop, since that's all we can do
1683 with one instruction. */
1684 if (current_function_pops_args
1685 && current_function_args_size >= 32768)
1688 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1689 return tsize == 0 && nregs == 0;
1692 static const char *pic_label_name;
1693 static int pic_label_output;
1695 /* This function generates code for -fpic that loads %ebx with
1696 the return address of the caller and then returns. */
1699 asm_output_function_prefix (file, name)
1701 const char *name ATTRIBUTE_UNUSED;
1704 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1705 || current_function_uses_const_pool);
1706 xops[0] = pic_offset_table_rtx;
1707 xops[1] = stack_pointer_rtx;
1709 /* Deep branch prediction favors having a return for every call. */
1710 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1712 if (!pic_label_output)
1714 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1715 internal (non-global) label that's being emitted, it didn't make
1716 sense to have .type information for local labels. This caused
1717 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1718 me debug info for a label that you're declaring non-global?) this
1719 was changed to call ASM_OUTPUT_LABEL() instead. */
1721 ASM_OUTPUT_LABEL (file, pic_label_name);
1723 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1724 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1725 output_asm_insn ("ret", xops);
1727 pic_label_output = 1;
1733 load_pic_register ()
1737 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1739 if (TARGET_DEEP_BRANCH_PREDICTION)
1741 if (pic_label_name == NULL)
1744 ASM_GENERATE_INTERNAL_LABEL (buf, "LPR", 0);
1745 pic_label_name = ggc_strdup (buf);
1747 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1751 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1754 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1756 if (! TARGET_DEEP_BRANCH_PREDICTION)
1757 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1759 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1762 /* Generate an SImode "push" pattern for input ARG. */
1768 return gen_rtx_SET (VOIDmode,
1769 gen_rtx_MEM (SImode,
1770 gen_rtx_PRE_DEC (SImode,
1771 stack_pointer_rtx)),
1775 /* Return number of registers to be saved on the stack. */
1781 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1782 || current_function_uses_const_pool);
1783 int limit = (frame_pointer_needed
1784 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1787 for (regno = limit - 1; regno >= 0; regno--)
1788 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1789 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1796 /* Return the offset between two registers, one to be eliminated, and the other
1797 its replacement, at the start of a routine. */
1800 ix86_initial_elimination_offset (from, to)
1807 /* Stack grows downward:
1813 saved frame pointer if frame_pointer_needed
1814 <- HARD_FRAME_POINTER
1824 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1825 /* Skip saved PC and previous frame pointer.
1826 Executed only when frame_pointer_needed. */
1828 else if (from == FRAME_POINTER_REGNUM
1829 && to == HARD_FRAME_POINTER_REGNUM)
1831 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1832 padding1 += nregs * UNITS_PER_WORD;
1837 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1838 int frame_size = frame_pointer_needed ? 8 : 4;
1839 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1840 &nregs, &padding1, (int *) 0);
1842 if (to != STACK_POINTER_REGNUM)
1844 else if (from == ARG_POINTER_REGNUM)
1845 return tsize + nregs * UNITS_PER_WORD + frame_size;
1846 else if (from != FRAME_POINTER_REGNUM)
1849 return tsize - padding1;
1853 /* Compute the size of local storage taking into consideration the
1854 desired stack alignment which is to be maintained. Also determine
1855 the number of registers saved below the local storage.
1857 PADDING1 returns padding before stack frame and PADDING2 returns
1858 padding after stack frame;
1861 static HOST_WIDE_INT
1862 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1864 int *nregs_on_stack;
1871 HOST_WIDE_INT total_size;
1872 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1874 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1876 nregs = ix86_nsaved_regs ();
1879 offset = frame_pointer_needed ? 8 : 4;
1881 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1882 since i386 port is the only using those features that may break easilly. */
1884 if (size && !stack_alignment_needed)
1886 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1888 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1890 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1892 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1895 if (stack_alignment_needed < 4)
1896 stack_alignment_needed = 4;
1898 offset += nregs * UNITS_PER_WORD;
1900 if (ACCUMULATE_OUTGOING_ARGS)
1901 total_size += current_function_outgoing_args_size;
1903 total_size += offset;
1905 /* Align start of frame for local function. */
1906 padding1 = ((offset + stack_alignment_needed - 1)
1907 & -stack_alignment_needed) - offset;
1908 total_size += padding1;
1910 /* Align stack boundary. */
1911 padding2 = ((total_size + preferred_alignment - 1)
1912 & -preferred_alignment) - total_size;
1914 if (ACCUMULATE_OUTGOING_ARGS)
1915 padding2 += current_function_outgoing_args_size;
1918 *nregs_on_stack = nregs;
1920 *rpadding1 = padding1;
1922 *rpadding2 = padding2;
1924 return size + padding1 + padding2;
1927 /* Emit code to save registers in the prologue. */
1930 ix86_emit_save_regs ()
1935 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1936 || current_function_uses_const_pool);
1937 limit = (frame_pointer_needed
1938 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1940 for (regno = limit - 1; regno >= 0; regno--)
1941 if ((regs_ever_live[regno] && !call_used_regs[regno])
1942 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1944 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1945 RTX_FRAME_RELATED_P (insn) = 1;
1949 /* Expand the prologue into a bunch of separate insns. */
1952 ix86_expand_prologue ()
1954 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1957 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1958 || current_function_uses_const_pool);
1960 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1961 slower on all targets. Also sdb doesn't like it. */
1963 if (frame_pointer_needed)
1965 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1966 RTX_FRAME_RELATED_P (insn) = 1;
1968 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1969 RTX_FRAME_RELATED_P (insn) = 1;
1972 ix86_emit_save_regs ();
1976 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1978 if (frame_pointer_needed)
1979 insn = emit_insn (gen_pro_epilogue_adjust_stack
1980 (stack_pointer_rtx, stack_pointer_rtx,
1981 GEN_INT (-tsize), hard_frame_pointer_rtx));
1983 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1985 RTX_FRAME_RELATED_P (insn) = 1;
1989 /* ??? Is this only valid for Win32? */
1993 arg0 = gen_rtx_REG (SImode, 0);
1994 emit_move_insn (arg0, GEN_INT (tsize));
1996 sym = gen_rtx_MEM (FUNCTION_MODE,
1997 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1998 insn = emit_call_insn (gen_call (sym, const0_rtx));
2000 CALL_INSN_FUNCTION_USAGE (insn)
2001 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
2002 CALL_INSN_FUNCTION_USAGE (insn));
2005 #ifdef SUBTARGET_PROLOGUE
2010 load_pic_register ();
2012 /* If we are profiling, make sure no instructions are scheduled before
2013 the call to mcount. However, if -fpic, the above call will have
2015 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2016 emit_insn (gen_blockage ());
2019 /* Emit code to add TSIZE to esp value. Use POP instruction when
2023 ix86_emit_epilogue_esp_adjustment (tsize)
2026 /* If a frame pointer is present, we must be sure to tie the sp
2027 to the fp so that we don't mis-schedule. */
2028 if (frame_pointer_needed)
2029 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2032 hard_frame_pointer_rtx));
2034 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2038 /* Emit code to restore saved registers using MOV insns. First register
2039 is restored from POINTER + OFFSET. */
2041 ix86_emit_restore_regs_using_mov (pointer, offset)
2046 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2047 || current_function_uses_const_pool);
2048 int limit = (frame_pointer_needed
2049 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2051 for (regno = 0; regno < limit; regno++)
2052 if ((regs_ever_live[regno] && !call_used_regs[regno])
2053 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2055 emit_move_insn (gen_rtx_REG (SImode, regno),
2056 adj_offsettable_operand (gen_rtx_MEM (SImode,
2063 /* Restore function stack, frame, and registers. */
2066 ix86_expand_epilogue (emit_return)
2072 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2073 || current_function_uses_const_pool);
2074 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2075 HOST_WIDE_INT offset;
2076 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2077 (int *) 0, (int *) 0);
2079 /* Calculate start of saved registers relative to ebp. */
2080 offset = -nregs * UNITS_PER_WORD;
2082 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2083 if (profile_block_flag == 2)
2085 FUNCTION_BLOCK_PROFILER_EXIT;
2089 /* If we're only restoring one register and sp is not valid then
2090 using a move instruction to restore the register since it's
2091 less work than reloading sp and popping the register.
2093 The default code result in stack adjustment using add/lea instruction,
2094 while this code results in LEAVE instruction (or discrete equivalent),
2095 so it is profitable in some other cases as well. Especially when there
2096 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2097 and there is exactly one register to pop. This heruistic may need some
2098 tuning in future. */
2099 if ((!sp_valid && nregs <= 1)
2100 || (frame_pointer_needed && !nregs && tsize)
2101 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2104 /* Restore registers. We can use ebp or esp to address the memory
2105 locations. If both are available, default to ebp, since offsets
2106 are known to be small. Only exception is esp pointing directly to the
2107 end of block of saved registers, where we may simplify addressing
2110 if (!frame_pointer_needed || (sp_valid && !tsize))
2111 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2113 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2115 if (!frame_pointer_needed)
2116 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2117 /* If not an i386, mov & pop is faster than "leave". */
2118 else if (TARGET_USE_LEAVE || optimize_size)
2119 emit_insn (gen_leave ());
2122 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2123 hard_frame_pointer_rtx,
2125 hard_frame_pointer_rtx));
2126 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2131 /* First step is to deallocate the stack frame so that we can
2132 pop the registers. */
2135 if (!frame_pointer_needed)
2137 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2138 hard_frame_pointer_rtx,
2140 hard_frame_pointer_rtx));
2143 ix86_emit_epilogue_esp_adjustment (tsize);
2145 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2146 if ((regs_ever_live[regno] && !call_used_regs[regno])
2147 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2148 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2151 /* Sibcall epilogues don't want a return instruction. */
2155 if (current_function_pops_args && current_function_args_size)
2157 rtx popc = GEN_INT (current_function_pops_args);
2159 /* i386 can only pop 64K bytes. If asked to pop more, pop
2160 return address, do explicit add, and jump indirectly to the
2163 if (current_function_pops_args >= 65536)
2165 rtx ecx = gen_rtx_REG (SImode, 2);
2167 emit_insn (gen_popsi1 (ecx));
2168 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2169 emit_jump_insn (gen_return_indirect_internal (ecx));
2172 emit_jump_insn (gen_return_pop_internal (popc));
2175 emit_jump_insn (gen_return_internal ());
2178 /* Extract the parts of an RTL expression that is a valid memory address
2179 for an instruction. Return false if the structure of the address is
2183 ix86_decompose_address (addr, out)
2185 struct ix86_address *out;
2187 rtx base = NULL_RTX;
2188 rtx index = NULL_RTX;
2189 rtx disp = NULL_RTX;
2190 HOST_WIDE_INT scale = 1;
2191 rtx scale_rtx = NULL_RTX;
2193 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2195 else if (GET_CODE (addr) == PLUS)
2197 rtx op0 = XEXP (addr, 0);
2198 rtx op1 = XEXP (addr, 1);
2199 enum rtx_code code0 = GET_CODE (op0);
2200 enum rtx_code code1 = GET_CODE (op1);
2202 if (code0 == REG || code0 == SUBREG)
2204 if (code1 == REG || code1 == SUBREG)
2205 index = op0, base = op1; /* index + base */
2207 base = op0, disp = op1; /* base + displacement */
2209 else if (code0 == MULT)
2211 index = XEXP (op0, 0);
2212 scale_rtx = XEXP (op0, 1);
2213 if (code1 == REG || code1 == SUBREG)
2214 base = op1; /* index*scale + base */
2216 disp = op1; /* index*scale + disp */
2218 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2220 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2221 scale_rtx = XEXP (XEXP (op0, 0), 1);
2222 base = XEXP (op0, 1);
2225 else if (code0 == PLUS)
2227 index = XEXP (op0, 0); /* index + base + disp */
2228 base = XEXP (op0, 1);
2234 else if (GET_CODE (addr) == MULT)
2236 index = XEXP (addr, 0); /* index*scale */
2237 scale_rtx = XEXP (addr, 1);
2239 else if (GET_CODE (addr) == ASHIFT)
2243 /* We're called for lea too, which implements ashift on occasion. */
2244 index = XEXP (addr, 0);
2245 tmp = XEXP (addr, 1);
2246 if (GET_CODE (tmp) != CONST_INT)
2248 scale = INTVAL (tmp);
2249 if ((unsigned HOST_WIDE_INT) scale > 3)
2254 disp = addr; /* displacement */
2256 /* Extract the integral value of scale. */
2259 if (GET_CODE (scale_rtx) != CONST_INT)
2261 scale = INTVAL (scale_rtx);
2264 /* Allow arg pointer and stack pointer as index if there is not scaling */
2265 if (base && index && scale == 1
2266 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2267 || index == stack_pointer_rtx))
2274 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2275 if ((base == hard_frame_pointer_rtx
2276 || base == frame_pointer_rtx
2277 || base == arg_pointer_rtx) && !disp)
2280 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2281 Avoid this by transforming to [%esi+0]. */
2282 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2283 && base && !index && !disp
2285 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2288 /* Special case: encode reg+reg instead of reg*2. */
2289 if (!base && index && scale && scale == 2)
2290 base = index, scale = 1;
2292 /* Special case: scaling cannot be encoded without base or displacement. */
2293 if (!base && !disp && index && scale != 1)
2304 /* Return cost of the memory address x.
2305 For i386, it is better to use a complex address than let gcc copy
2306 the address into a reg and make a new pseudo. But not if the address
2307 requires to two regs - that would mean more pseudos with longer
2310 ix86_address_cost (x)
2313 struct ix86_address parts;
2316 if (!ix86_decompose_address (x, &parts))
2319 /* More complex memory references are better. */
2320 if (parts.disp && parts.disp != const0_rtx)
2323 /* Attempt to minimize number of registers in the address. */
2325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2327 && (!REG_P (parts.index)
2328 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2332 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2334 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2335 && parts.base != parts.index)
2338 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2339 since it's predecode logic can't detect the length of instructions
2340 and it degenerates to vector decoded. Increase cost of such
2341 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2342 to split such addresses or even refuse such addresses at all.
2344 Following addressing modes are affected:
2349 The first and last case may be avoidable by explicitly coding the zero in
2350 memory address, but I don't have AMD-K6 machine handy to check this
2354 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2355 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2356 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2362 /* If X is a machine specific address (i.e. a symbol or label being
2363 referenced as a displacement from the GOT implemented using an
2364 UNSPEC), then return the base term. Otherwise return X. */
2367 ix86_find_base_term (x)
2372 if (GET_CODE (x) != PLUS
2373 || XEXP (x, 0) != pic_offset_table_rtx
2374 || GET_CODE (XEXP (x, 1)) != CONST)
2377 term = XEXP (XEXP (x, 1), 0);
2379 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2380 term = XEXP (term, 0);
2382 if (GET_CODE (term) != UNSPEC
2383 || XVECLEN (term, 0) != 1
2384 || XINT (term, 1) != 7)
2387 term = XVECEXP (term, 0, 0);
2389 if (GET_CODE (term) != SYMBOL_REF
2390 && GET_CODE (term) != LABEL_REF)
2396 /* Determine if a given CONST RTX is a valid memory displacement
2400 legitimate_pic_address_disp_p (disp)
2403 if (GET_CODE (disp) != CONST)
2405 disp = XEXP (disp, 0);
2407 if (GET_CODE (disp) == PLUS)
2409 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2411 disp = XEXP (disp, 0);
2414 if (GET_CODE (disp) != UNSPEC
2415 || XVECLEN (disp, 0) != 1)
2418 /* Must be @GOT or @GOTOFF. */
2419 if (XINT (disp, 1) != 6
2420 && XINT (disp, 1) != 7)
2423 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2424 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2430 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2431 memory address for an instruction. The MODE argument is the machine mode
2432 for the MEM expression that wants to use this address.
2434 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2435 convert common non-canonical forms to canonical form so that they will
2439 legitimate_address_p (mode, addr, strict)
2440 enum machine_mode mode;
2444 struct ix86_address parts;
2445 rtx base, index, disp;
2446 HOST_WIDE_INT scale;
2447 const char *reason = NULL;
2448 rtx reason_rtx = NULL_RTX;
2450 if (TARGET_DEBUG_ADDR)
2453 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2454 GET_MODE_NAME (mode), strict);
2458 if (! ix86_decompose_address (addr, &parts))
2460 reason = "decomposition failed";
2465 index = parts.index;
2467 scale = parts.scale;
2469 /* Validate base register.
2471 Don't allow SUBREG's here, it can lead to spill failures when the base
2472 is one word out of a two word structure, which is represented internally
2479 if (GET_CODE (base) != REG)
2481 reason = "base is not a register";
2485 if (GET_MODE (base) != Pmode)
2487 reason = "base is not in Pmode";
2491 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2492 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2494 reason = "base is not valid";
2499 /* Validate index register.
2501 Don't allow SUBREG's here, it can lead to spill failures when the index
2502 is one word out of a two word structure, which is represented internally
2509 if (GET_CODE (index) != REG)
2511 reason = "index is not a register";
2515 if (GET_MODE (index) != Pmode)
2517 reason = "index is not in Pmode";
2521 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2522 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2524 reason = "index is not valid";
2529 /* Validate scale factor. */
2532 reason_rtx = GEN_INT (scale);
2535 reason = "scale without index";
2539 if (scale != 2 && scale != 4 && scale != 8)
2541 reason = "scale is not a valid multiplier";
2546 /* Validate displacement. */
2551 if (!CONSTANT_ADDRESS_P (disp))
2553 reason = "displacement is not constant";
2557 if (GET_CODE (disp) == CONST_DOUBLE)
2559 reason = "displacement is a const_double";
2563 if (flag_pic && SYMBOLIC_CONST (disp))
2565 if (! legitimate_pic_address_disp_p (disp))
2567 reason = "displacement is an invalid pic construct";
2571 /* This code used to verify that a symbolic pic displacement
2572 includes the pic_offset_table_rtx register.
2574 While this is good idea, unfortunately these constructs may
2575 be created by "adds using lea" optimization for incorrect
2584 This code is nonsensical, but results in addressing
2585 GOT table with pic_offset_table_rtx base. We can't
2586 just refuse it easilly, since it gets matched by
2587 "addsi3" pattern, that later gets split to lea in the
2588 case output register differs from input. While this
2589 can be handled by separate addsi pattern for this case
2590 that never results in lea, this seems to be easier and
2591 correct fix for crash to disable this test. */
2593 else if (HALF_PIC_P ())
2595 if (! HALF_PIC_ADDRESS_P (disp)
2596 || (base != NULL_RTX || index != NULL_RTX))
2598 reason = "displacement is an invalid half-pic reference";
2604 /* Everything looks valid. */
2605 if (TARGET_DEBUG_ADDR)
2606 fprintf (stderr, "Success.\n");
2610 if (TARGET_DEBUG_ADDR)
2612 fprintf (stderr, "Error: %s\n", reason);
2613 debug_rtx (reason_rtx);
2618 /* Return an unique alias set for the GOT. */
2620 static HOST_WIDE_INT
2621 ix86_GOT_alias_set ()
2623 static HOST_WIDE_INT set = -1;
2625 set = new_alias_set ();
2629 /* Return a legitimate reference for ORIG (an address) using the
2630 register REG. If REG is 0, a new pseudo is generated.
2632 There are two types of references that must be handled:
2634 1. Global data references must load the address from the GOT, via
2635 the PIC reg. An insn is emitted to do this load, and the reg is
2638 2. Static data references, constant pool addresses, and code labels
2639 compute the address as an offset from the GOT, whose base is in
2640 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2641 differentiate them from global data objects. The returned
2642 address is the PIC reg + an unspec constant.
2644 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2645 reg also appears in the address. */
2648 legitimize_pic_address (orig, reg)
2656 if (GET_CODE (addr) == LABEL_REF
2657 || (GET_CODE (addr) == SYMBOL_REF
2658 && (CONSTANT_POOL_ADDRESS_P (addr)
2659 || SYMBOL_REF_FLAG (addr))))
2661 /* This symbol may be referenced via a displacement from the PIC
2662 base address (@GOTOFF). */
2664 current_function_uses_pic_offset_table = 1;
2665 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2666 new = gen_rtx_CONST (Pmode, new);
2667 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2671 emit_move_insn (reg, new);
2675 else if (GET_CODE (addr) == SYMBOL_REF)
2677 /* This symbol must be referenced via a load from the
2678 Global Offset Table (@GOT). */
2680 current_function_uses_pic_offset_table = 1;
2681 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2682 new = gen_rtx_CONST (Pmode, new);
2683 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2684 new = gen_rtx_MEM (Pmode, new);
2685 RTX_UNCHANGING_P (new) = 1;
2686 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2689 reg = gen_reg_rtx (Pmode);
2690 emit_move_insn (reg, new);
2695 if (GET_CODE (addr) == CONST)
2697 addr = XEXP (addr, 0);
2698 if (GET_CODE (addr) == UNSPEC)
2700 /* Check that the unspec is one of the ones we generate? */
2702 else if (GET_CODE (addr) != PLUS)
2705 if (GET_CODE (addr) == PLUS)
2707 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2709 /* Check first to see if this is a constant offset from a @GOTOFF
2710 symbol reference. */
2711 if ((GET_CODE (op0) == LABEL_REF
2712 || (GET_CODE (op0) == SYMBOL_REF
2713 && (CONSTANT_POOL_ADDRESS_P (op0)
2714 || SYMBOL_REF_FLAG (op0))))
2715 && GET_CODE (op1) == CONST_INT)
2717 current_function_uses_pic_offset_table = 1;
2718 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2719 new = gen_rtx_PLUS (Pmode, new, op1);
2720 new = gen_rtx_CONST (Pmode, new);
2721 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2725 emit_move_insn (reg, new);
2731 base = legitimize_pic_address (XEXP (addr, 0), reg);
2732 new = legitimize_pic_address (XEXP (addr, 1),
2733 base == reg ? NULL_RTX : reg);
2735 if (GET_CODE (new) == CONST_INT)
2736 new = plus_constant (base, INTVAL (new));
2739 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2741 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2742 new = XEXP (new, 1);
2744 new = gen_rtx_PLUS (Pmode, base, new);
2752 /* Try machine-dependent ways of modifying an illegitimate address
2753 to be legitimate. If we find one, return the new, valid address.
2754 This macro is used in only one place: `memory_address' in explow.c.
2756 OLDX is the address as it was before break_out_memory_refs was called.
2757 In some cases it is useful to look at this to decide what needs to be done.
2759 MODE and WIN are passed so that this macro can use
2760 GO_IF_LEGITIMATE_ADDRESS.
2762 It is always safe for this macro to do nothing. It exists to recognize
2763 opportunities to optimize the output.
2765 For the 80386, we handle X+REG by loading X into a register R and
2766 using R+REG. R will go in a general reg and indexing will be used.
2767 However, if REG is a broken-out memory address or multiplication,
2768 nothing needs to be done because REG can certainly go in a general reg.
2770 When -fpic is used, special handling is needed for symbolic references.
2771 See comments by legitimize_pic_address in i386.c for details. */
2774 legitimize_address (x, oldx, mode)
2776 register rtx oldx ATTRIBUTE_UNUSED;
2777 enum machine_mode mode;
2782 if (TARGET_DEBUG_ADDR)
2784 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2785 GET_MODE_NAME (mode));
2789 if (flag_pic && SYMBOLIC_CONST (x))
2790 return legitimize_pic_address (x, 0);
2792 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2793 if (GET_CODE (x) == ASHIFT
2794 && GET_CODE (XEXP (x, 1)) == CONST_INT
2795 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2798 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2799 GEN_INT (1 << log));
2802 if (GET_CODE (x) == PLUS)
2804 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2806 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2807 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2808 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2811 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2812 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2813 GEN_INT (1 << log));
2816 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2817 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2818 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2821 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2822 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2823 GEN_INT (1 << log));
2826 /* Put multiply first if it isn't already. */
2827 if (GET_CODE (XEXP (x, 1)) == MULT)
2829 rtx tmp = XEXP (x, 0);
2830 XEXP (x, 0) = XEXP (x, 1);
2835 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2836 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2837 created by virtual register instantiation, register elimination, and
2838 similar optimizations. */
2839 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2842 x = gen_rtx_PLUS (Pmode,
2843 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2844 XEXP (XEXP (x, 1), 0)),
2845 XEXP (XEXP (x, 1), 1));
2849 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2850 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2851 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2852 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2853 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2854 && CONSTANT_P (XEXP (x, 1)))
2857 rtx other = NULL_RTX;
2859 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2861 constant = XEXP (x, 1);
2862 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2864 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2866 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2867 other = XEXP (x, 1);
2875 x = gen_rtx_PLUS (Pmode,
2876 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2877 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2878 plus_constant (other, INTVAL (constant)));
2882 if (changed && legitimate_address_p (mode, x, FALSE))
2885 if (GET_CODE (XEXP (x, 0)) == MULT)
2888 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2891 if (GET_CODE (XEXP (x, 1)) == MULT)
2894 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2898 && GET_CODE (XEXP (x, 1)) == REG
2899 && GET_CODE (XEXP (x, 0)) == REG)
2902 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2905 x = legitimize_pic_address (x, 0);
2908 if (changed && legitimate_address_p (mode, x, FALSE))
2911 if (GET_CODE (XEXP (x, 0)) == REG)
2913 register rtx temp = gen_reg_rtx (Pmode);
2914 register rtx val = force_operand (XEXP (x, 1), temp);
2916 emit_move_insn (temp, val);
2922 else if (GET_CODE (XEXP (x, 1)) == REG)
2924 register rtx temp = gen_reg_rtx (Pmode);
2925 register rtx val = force_operand (XEXP (x, 0), temp);
2927 emit_move_insn (temp, val);
2937 /* Print an integer constant expression in assembler syntax. Addition
2938 and subtraction are the only arithmetic that may appear in these
2939 expressions. FILE is the stdio stream to write to, X is the rtx, and
2940 CODE is the operand print code from the output string. */
2943 output_pic_addr_const (file, x, code)
2950 switch (GET_CODE (x))
2960 assemble_name (file, XSTR (x, 0));
2961 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2962 fputs ("@PLT", file);
2969 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2970 assemble_name (asm_out_file, buf);
2974 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2978 /* This used to output parentheses around the expression,
2979 but that does not work on the 386 (either ATT or BSD assembler). */
2980 output_pic_addr_const (file, XEXP (x, 0), code);
2984 if (GET_MODE (x) == VOIDmode)
2986 /* We can use %d if the number is <32 bits and positive. */
2987 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2988 fprintf (file, "0x%lx%08lx",
2989 (unsigned long) CONST_DOUBLE_HIGH (x),
2990 (unsigned long) CONST_DOUBLE_LOW (x));
2992 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2995 /* We can't handle floating point constants;
2996 PRINT_OPERAND must handle them. */
2997 output_operand_lossage ("floating constant misused");
3001 /* Some assemblers need integer constants to appear first. */
3002 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
3004 output_pic_addr_const (file, XEXP (x, 0), code);
3006 output_pic_addr_const (file, XEXP (x, 1), code);
3008 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3010 output_pic_addr_const (file, XEXP (x, 1), code);
3012 output_pic_addr_const (file, XEXP (x, 0), code);
3019 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3020 output_pic_addr_const (file, XEXP (x, 0), code);
3022 output_pic_addr_const (file, XEXP (x, 1), code);
3023 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3027 if (XVECLEN (x, 0) != 1)
3029 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3030 switch (XINT (x, 1))
3033 fputs ("@GOT", file);
3036 fputs ("@GOTOFF", file);
3039 fputs ("@PLT", file);
3042 output_operand_lossage ("invalid UNSPEC as operand");
3048 output_operand_lossage ("invalid expression as operand");
3052 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3053 We need to handle our special PIC relocations. */
3056 i386_dwarf_output_addr_const (file, x)
3060 fprintf (file, "%s", INT_ASM_OP);
3062 output_pic_addr_const (file, x, '\0');
3064 output_addr_const (file, x);
3068 /* In the name of slightly smaller debug output, and to cater to
3069 general assembler losage, recognize PIC+GOTOFF and turn it back
3070 into a direct symbol reference. */
3073 i386_simplify_dwarf_addr (orig_x)
3078 if (GET_CODE (x) != PLUS
3079 || GET_CODE (XEXP (x, 0)) != REG
3080 || GET_CODE (XEXP (x, 1)) != CONST)
3083 x = XEXP (XEXP (x, 1), 0);
3084 if (GET_CODE (x) == UNSPEC
3085 && (XINT (x, 1) == 6
3086 || XINT (x, 1) == 7))
3087 return XVECEXP (x, 0, 0);
3089 if (GET_CODE (x) == PLUS
3090 && GET_CODE (XEXP (x, 0)) == UNSPEC
3091 && GET_CODE (XEXP (x, 1)) == CONST_INT
3092 && (XINT (XEXP (x, 0), 1) == 6
3093 || XINT (XEXP (x, 0), 1) == 7))
3094 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3100 put_condition_code (code, mode, reverse, fp, file)
3102 enum machine_mode mode;
3108 if (mode == CCFPmode || mode == CCFPUmode)
3110 enum rtx_code second_code, bypass_code;
3111 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3112 if (bypass_code != NIL || second_code != NIL)
3114 code = ix86_fp_compare_code_to_integer (code);
3118 code = reverse_condition (code);
3129 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3134 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3135 Those same assemblers have the same but opposite losage on cmov. */
3138 suffix = fp ? "nbe" : "a";
3141 if (mode == CCNOmode || mode == CCGOCmode)
3143 else if (mode == CCmode || mode == CCGCmode)
3154 if (mode == CCNOmode || mode == CCGOCmode)
3156 else if (mode == CCmode || mode == CCGCmode)
3165 suffix = fp ? "nb" : "ae";
3168 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3178 suffix = fp ? "u" : "p";
3181 suffix = fp ? "nu" : "np";
3186 fputs (suffix, file);
3190 print_reg (x, code, file)
3195 if (REGNO (x) == ARG_POINTER_REGNUM
3196 || REGNO (x) == FRAME_POINTER_REGNUM
3197 || REGNO (x) == FLAGS_REG
3198 || REGNO (x) == FPSR_REG)
3201 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3206 else if (code == 'b')
3208 else if (code == 'k')
3210 else if (code == 'y')
3212 else if (code == 'h')
3214 else if (code == 'm' || MMX_REG_P (x))
3217 code = GET_MODE_SIZE (GET_MODE (x));
3222 fputs (hi_reg_name[REGNO (x)], file);
3225 if (STACK_TOP_P (x))
3227 fputs ("st(0)", file);
3239 fputs (hi_reg_name[REGNO (x)], file);
3242 fputs (qi_reg_name[REGNO (x)], file);
3245 fputs (qi_high_reg_name[REGNO (x)], file);
3253 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3254 C -- print opcode suffix for set/cmov insn.
3255 c -- like C, but print reversed condition
3256 R -- print the prefix for register names.
3257 z -- print the opcode suffix for the size of the current operand.
3258 * -- print a star (in certain assembler syntax)
3259 A -- print an absolute memory reference.
3260 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3261 s -- print a shift double count, followed by the assemblers argument
3263 b -- print the QImode name of the register for the indicated operand.
3264 %b0 would print %al if operands[0] is reg 0.
3265 w -- likewise, print the HImode name of the register.
3266 k -- likewise, print the SImode name of the register.
3267 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3268 y -- print "st(0)" instead of "st" as a register.
3269 m -- print "st(n)" as an mmx register. */
3272 print_operand (file, x, code)
3282 if (ASSEMBLER_DIALECT == 0)
3287 if (ASSEMBLER_DIALECT == 0)
3289 else if (ASSEMBLER_DIALECT == 1)
3291 /* Intel syntax. For absolute addresses, registers should not
3292 be surrounded by braces. */
3293 if (GET_CODE (x) != REG)
3296 PRINT_OPERAND (file, x, 0);
3302 PRINT_OPERAND (file, x, 0);
3307 if (ASSEMBLER_DIALECT == 0)
3312 if (ASSEMBLER_DIALECT == 0)
3317 if (ASSEMBLER_DIALECT == 0)
3322 if (ASSEMBLER_DIALECT == 0)
3327 if (ASSEMBLER_DIALECT == 0)
3332 if (ASSEMBLER_DIALECT == 0)
3337 /* 387 opcodes don't get size suffixes if the operands are
3340 if (STACK_REG_P (x))
3343 /* this is the size of op from size of operand */
3344 switch (GET_MODE_SIZE (GET_MODE (x)))
3347 #ifdef HAVE_GAS_FILDS_FISTS
3353 if (GET_MODE (x) == SFmode)
3368 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3370 #ifdef GAS_MNEMONICS
3396 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3398 PRINT_OPERAND (file, x, 0);
3404 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3407 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3410 /* Like above, but reverse condition */
3412 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3421 sprintf (str, "invalid operand code `%c'", code);
3422 output_operand_lossage (str);
3427 if (GET_CODE (x) == REG)
3429 PRINT_REG (x, code, file);
3432 else if (GET_CODE (x) == MEM)
3434 /* No `byte ptr' prefix for call instructions. */
3435 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3438 switch (GET_MODE_SIZE (GET_MODE (x)))
3440 case 1: size = "BYTE"; break;
3441 case 2: size = "WORD"; break;
3442 case 4: size = "DWORD"; break;
3443 case 8: size = "QWORD"; break;
3444 case 12: size = "XWORD"; break;
3445 case 16: size = "XMMWORD"; break;
3450 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3453 else if (code == 'w')
3455 else if (code == 'k')
3459 fputs (" PTR ", file);
3463 if (flag_pic && CONSTANT_ADDRESS_P (x))
3464 output_pic_addr_const (file, x, code);
3469 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3474 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3475 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3477 if (ASSEMBLER_DIALECT == 0)
3479 fprintf (file, "0x%lx", l);
3482 /* These float cases don't actually occur as immediate operands. */
3483 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3488 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3489 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3490 fprintf (file, "%s", dstr);
3493 else if (GET_CODE (x) == CONST_DOUBLE
3494 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3499 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3500 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3501 fprintf (file, "%s", dstr);
3507 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3509 if (ASSEMBLER_DIALECT == 0)
3512 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3513 || GET_CODE (x) == LABEL_REF)
3515 if (ASSEMBLER_DIALECT == 0)
3518 fputs ("OFFSET FLAT:", file);
3521 if (GET_CODE (x) == CONST_INT)
3522 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3524 output_pic_addr_const (file, x, code);
3526 output_addr_const (file, x);
3530 /* Print a memory operand whose address is ADDR. */
3533 print_operand_address (file, addr)
3537 struct ix86_address parts;
3538 rtx base, index, disp;
3541 if (! ix86_decompose_address (addr, &parts))
3545 index = parts.index;
3547 scale = parts.scale;
3549 if (!base && !index)
3551 /* Displacement only requires special attention. */
3553 if (GET_CODE (disp) == CONST_INT)
3555 if (ASSEMBLER_DIALECT != 0)
3557 if (USER_LABEL_PREFIX[0] == 0)
3559 fputs ("ds:", file);
3561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3564 output_pic_addr_const (file, addr, 0);
3566 output_addr_const (file, addr);
3570 if (ASSEMBLER_DIALECT == 0)
3575 output_pic_addr_const (file, disp, 0);
3576 else if (GET_CODE (disp) == LABEL_REF)
3577 output_asm_label (disp);
3579 output_addr_const (file, disp);
3584 PRINT_REG (base, 0, file);
3588 PRINT_REG (index, 0, file);
3590 fprintf (file, ",%d", scale);
3596 rtx offset = NULL_RTX;
3600 /* Pull out the offset of a symbol; print any symbol itself. */
3601 if (GET_CODE (disp) == CONST
3602 && GET_CODE (XEXP (disp, 0)) == PLUS
3603 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3605 offset = XEXP (XEXP (disp, 0), 1);
3606 disp = gen_rtx_CONST (VOIDmode,
3607 XEXP (XEXP (disp, 0), 0));
3611 output_pic_addr_const (file, disp, 0);
3612 else if (GET_CODE (disp) == LABEL_REF)
3613 output_asm_label (disp);
3614 else if (GET_CODE (disp) == CONST_INT)
3617 output_addr_const (file, disp);
3623 PRINT_REG (base, 0, file);
3626 if (INTVAL (offset) >= 0)
3628 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3632 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3639 PRINT_REG (index, 0, file);
3641 fprintf (file, "*%d", scale);
3648 /* Split one or more DImode RTL references into pairs of SImode
3649 references. The RTL can be REG, offsettable MEM, integer constant, or
3650 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3651 split and "num" is its length. lo_half and hi_half are output arrays
3652 that parallel "operands". */
3655 split_di (operands, num, lo_half, hi_half)
3658 rtx lo_half[], hi_half[];
3662 rtx op = operands[num];
3663 if (CONSTANT_P (op))
3664 split_double (op, &lo_half[num], &hi_half[num]);
3665 else if (! reload_completed)
3667 lo_half[num] = gen_lowpart (SImode, op);
3668 hi_half[num] = gen_highpart (SImode, op);
3670 else if (GET_CODE (op) == REG)
3672 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3673 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3675 else if (offsettable_memref_p (op))
3677 rtx lo_addr = XEXP (op, 0);
3678 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3679 lo_half[num] = change_address (op, SImode, lo_addr);
3680 hi_half[num] = change_address (op, SImode, hi_addr);
3687 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3688 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3689 is the expression of the binary operation. The output may either be
3690 emitted here, or returned to the caller, like all output_* functions.
3692 There is no guarantee that the operands are the same mode, as they
3693 might be within FLOAT or FLOAT_EXTEND expressions. */
3695 #ifndef SYSV386_COMPAT
3696 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3697 wants to fix the assemblers because that causes incompatibility
3698 with gcc. No-one wants to fix gcc because that causes
3699 incompatibility with assemblers... You can use the option of
3700 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3701 #define SYSV386_COMPAT 1
3705 output_387_binary_op (insn, operands)
3709 static char buf[30];
3712 #ifdef ENABLE_CHECKING
3713 /* Even if we do not want to check the inputs, this documents input
3714 constraints. Which helps in understanding the following code. */
3715 if (STACK_REG_P (operands[0])
3716 && ((REG_P (operands[1])
3717 && REGNO (operands[0]) == REGNO (operands[1])
3718 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3719 || (REG_P (operands[2])
3720 && REGNO (operands[0]) == REGNO (operands[2])
3721 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3722 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3728 switch (GET_CODE (operands[3]))
3731 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3732 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3739 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3740 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3747 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3748 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3755 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3756 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3768 switch (GET_CODE (operands[3]))
3772 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3774 rtx temp = operands[2];
3775 operands[2] = operands[1];
3779 /* know operands[0] == operands[1]. */
3781 if (GET_CODE (operands[2]) == MEM)
3787 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3789 if (STACK_TOP_P (operands[0]))
3790 /* How is it that we are storing to a dead operand[2]?
3791 Well, presumably operands[1] is dead too. We can't
3792 store the result to st(0) as st(0) gets popped on this
3793 instruction. Instead store to operands[2] (which I
3794 think has to be st(1)). st(1) will be popped later.
3795 gcc <= 2.8.1 didn't have this check and generated
3796 assembly code that the Unixware assembler rejected. */
3797 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3799 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3803 if (STACK_TOP_P (operands[0]))
3804 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3806 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3811 if (GET_CODE (operands[1]) == MEM)
3817 if (GET_CODE (operands[2]) == MEM)
3823 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3826 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3827 derived assemblers, confusingly reverse the direction of
3828 the operation for fsub{r} and fdiv{r} when the
3829 destination register is not st(0). The Intel assembler
3830 doesn't have this brain damage. Read !SYSV386_COMPAT to
3831 figure out what the hardware really does. */
3832 if (STACK_TOP_P (operands[0]))
3833 p = "{p\t%0, %2|rp\t%2, %0}";
3835 p = "{rp\t%2, %0|p\t%0, %2}";
3837 if (STACK_TOP_P (operands[0]))
3838 /* As above for fmul/fadd, we can't store to st(0). */
3839 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3841 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3846 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3849 if (STACK_TOP_P (operands[0]))
3850 p = "{rp\t%0, %1|p\t%1, %0}";
3852 p = "{p\t%1, %0|rp\t%0, %1}";
3854 if (STACK_TOP_P (operands[0]))
3855 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3857 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3862 if (STACK_TOP_P (operands[0]))
3864 if (STACK_TOP_P (operands[1]))
3865 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3867 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3870 else if (STACK_TOP_P (operands[1]))
3873 p = "{\t%1, %0|r\t%0, %1}";
3875 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3881 p = "{r\t%2, %0|\t%0, %2}";
3883 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3896 /* Output code for INSN to convert a float to a signed int. OPERANDS
3897 are the insn operands. The output may be [HSD]Imode and the input
3898 operand may be [SDX]Fmode. */
3901 output_fix_trunc (insn, operands)
3905 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3906 int dimode_p = GET_MODE (operands[0]) == DImode;
3909 /* Jump through a hoop or two for DImode, since the hardware has no
3910 non-popping instruction. We used to do this a different way, but
3911 that was somewhat fragile and broke with post-reload splitters. */
3912 if (dimode_p && !stack_top_dies)
3913 output_asm_insn ("fld\t%y1", operands);
3915 if (! STACK_TOP_P (operands[1]))
3918 xops[0] = GEN_INT (12);
3919 xops[1] = adj_offsettable_operand (operands[2], 1);
3920 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3922 xops[2] = operands[0];
3923 if (GET_CODE (operands[0]) != MEM)
3924 xops[2] = operands[3];
3926 output_asm_insn ("fnstcw\t%2", operands);
3927 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3928 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3929 output_asm_insn ("fldcw\t%2", operands);
3930 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3932 if (stack_top_dies || dimode_p)
3933 output_asm_insn ("fistp%z2\t%2", xops);
3935 output_asm_insn ("fist%z2\t%2", xops);
3937 output_asm_insn ("fldcw\t%2", operands);
3939 if (GET_CODE (operands[0]) != MEM)
3943 split_di (operands+0, 1, xops+0, xops+1);
3944 split_di (operands+3, 1, xops+2, xops+3);
3945 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3946 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3948 else if (GET_MODE (operands[0]) == SImode)
3949 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3951 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3957 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3958 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3959 when fucom should be used. */
3962 output_fp_compare (insn, operands, eflags_p, unordered_p)
3965 int eflags_p, unordered_p;
3968 rtx cmp_op0 = operands[0];
3969 rtx cmp_op1 = operands[1];
3974 cmp_op1 = operands[2];
3977 if (! STACK_TOP_P (cmp_op0))
3980 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3982 if (STACK_REG_P (cmp_op1)
3984 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3985 && REGNO (cmp_op1) != FIRST_STACK_REG)
3987 /* If both the top of the 387 stack dies, and the other operand
3988 is also a stack register that dies, then this must be a
3989 `fcompp' float compare */
3993 /* There is no double popping fcomi variant. Fortunately,
3994 eflags is immune from the fstp's cc clobbering. */
3996 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3998 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
4006 return "fucompp\n\tfnstsw\t%0";
4008 return "fcompp\n\tfnstsw\t%0";
4021 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4023 static const char * const alt[24] =
4035 "fcomi\t{%y1, %0|%0, %y1}",
4036 "fcomip\t{%y1, %0|%0, %y1}",
4037 "fucomi\t{%y1, %0|%0, %y1}",
4038 "fucomip\t{%y1, %0|%0, %y1}",
4045 "fcom%z2\t%y2\n\tfnstsw\t%0",
4046 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4047 "fucom%z2\t%y2\n\tfnstsw\t%0",
4048 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4050 "ficom%z2\t%y2\n\tfnstsw\t%0",
4051 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4059 mask = eflags_p << 3;
4060 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4061 mask |= unordered_p << 1;
4062 mask |= stack_top_dies;
4074 /* Output assembler code to FILE to initialize basic-block profiling.
4076 If profile_block_flag == 2
4078 Output code to call the subroutine `__bb_init_trace_func'
4079 and pass two parameters to it. The first parameter is
4080 the address of a block allocated in the object module.
4081 The second parameter is the number of the first basic block
4084 The name of the block is a local symbol made with this statement:
4086 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4088 Of course, since you are writing the definition of
4089 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4090 can take a short cut in the definition of this macro and use the
4091 name that you know will result.
4093 The number of the first basic block of the function is
4094 passed to the macro in BLOCK_OR_LABEL.
4096 If described in a virtual assembler language the code to be
4100 parameter2 <- BLOCK_OR_LABEL
4101 call __bb_init_trace_func
4103 else if profile_block_flag != 0
4105 Output code to call the subroutine `__bb_init_func'
4106 and pass one single parameter to it, which is the same
4107 as the first parameter to `__bb_init_trace_func'.
4109 The first word of this parameter is a flag which will be nonzero if
4110 the object module has already been initialized. So test this word
4111 first, and do not call `__bb_init_func' if the flag is nonzero.
4112 Note: When profile_block_flag == 2 the test need not be done
4113 but `__bb_init_trace_func' *must* be called.
4115 BLOCK_OR_LABEL may be used to generate a label number as a
4116 branch destination in case `__bb_init_func' will not be called.
4118 If described in a virtual assembler language the code to be
4129 ix86_output_function_block_profiler (file, block_or_label)
4133 static int num_func = 0;
4135 char block_table[80], false_label[80];
4137 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4139 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4140 xops[5] = stack_pointer_rtx;
4141 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4143 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4145 switch (profile_block_flag)
4148 xops[2] = GEN_INT (block_or_label);
4149 xops[3] = gen_rtx_MEM (Pmode,
4150 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4151 xops[6] = GEN_INT (8);
4153 output_asm_insn ("push{l}\t%2", xops);
4155 output_asm_insn ("push{l}\t%1", xops);
4158 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4159 output_asm_insn ("push{l}\t%7", xops);
4161 output_asm_insn ("call\t%P3", xops);
4162 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4166 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4168 xops[0] = const0_rtx;
4169 xops[2] = gen_rtx_MEM (Pmode,
4170 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4171 xops[3] = gen_rtx_MEM (Pmode,
4172 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4173 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4174 xops[6] = GEN_INT (4);
4176 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4178 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4179 output_asm_insn ("jne\t%2", xops);
4182 output_asm_insn ("push{l}\t%1", xops);
4185 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4186 output_asm_insn ("push{l}\t%7", xops);
4188 output_asm_insn ("call\t%P3", xops);
4189 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4190 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4196 /* Output assembler code to FILE to increment a counter associated
4197 with basic block number BLOCKNO.
4199 If profile_block_flag == 2
4201 Output code to initialize the global structure `__bb' and
4202 call the function `__bb_trace_func' which will increment the
4205 `__bb' consists of two words. In the first word the number
4206 of the basic block has to be stored. In the second word
4207 the address of a block allocated in the object module
4210 The basic block number is given by BLOCKNO.
4212 The address of the block is given by the label created with
4214 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4216 by FUNCTION_BLOCK_PROFILER.
4218 Of course, since you are writing the definition of
4219 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4220 can take a short cut in the definition of this macro and use the
4221 name that you know will result.
4223 If described in a virtual assembler language the code to be
4226 move BLOCKNO -> (__bb)
4227 move LPBX0 -> (__bb+4)
4228 call __bb_trace_func
4230 Note that function `__bb_trace_func' must not change the
4231 machine state, especially the flag register. To grant
4232 this, you must output code to save and restore registers
4233 either in this macro or in the macros MACHINE_STATE_SAVE
4234 and MACHINE_STATE_RESTORE. The last two macros will be
4235 used in the function `__bb_trace_func', so you must make
4236 sure that the function prologue does not change any
4237 register prior to saving it with MACHINE_STATE_SAVE.
4239 else if profile_block_flag != 0
4241 Output code to increment the counter directly.
4242 Basic blocks are numbered separately from zero within each
4243 compiled object module. The count associated with block number
4244 BLOCKNO is at index BLOCKNO in an array of words; the name of
4245 this array is a local symbol made with this statement:
4247 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4249 Of course, since you are writing the definition of
4250 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4251 can take a short cut in the definition of this macro and use the
4252 name that you know will result.
4254 If described in a virtual assembler language the code to be
4257 inc (LPBX2+4*BLOCKNO)
4261 ix86_output_block_profiler (file, blockno)
4262 FILE *file ATTRIBUTE_UNUSED;
4265 rtx xops[8], cnt_rtx;
4267 char *block_table = counts;
4269 switch (profile_block_flag)
4272 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4274 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4275 xops[2] = GEN_INT (blockno);
4276 xops[3] = gen_rtx_MEM (Pmode,
4277 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4278 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4279 xops[5] = plus_constant (xops[4], 4);
4280 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4281 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4283 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4285 output_asm_insn ("pushf", xops);
4286 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4289 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4290 output_asm_insn ("push{l}\t%7", xops);
4291 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4292 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4293 output_asm_insn ("pop{l}\t%7", xops);
4296 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4297 output_asm_insn ("call\t%P3", xops);
4298 output_asm_insn ("popf", xops);
4303 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4304 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4305 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4308 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4311 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4313 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4314 output_asm_insn ("inc{l}\t%0", xops);
4321 ix86_expand_move (mode, operands)
4322 enum machine_mode mode;
4325 int strict = (reload_in_progress || reload_completed);
4328 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4330 /* Emit insns to move operands[1] into operands[0]. */
4332 if (GET_CODE (operands[0]) == MEM)
4333 operands[1] = force_reg (Pmode, operands[1]);
4336 rtx temp = operands[0];
4337 if (GET_CODE (temp) != REG)
4338 temp = gen_reg_rtx (Pmode);
4339 temp = legitimize_pic_address (operands[1], temp);
4340 if (temp == operands[0])
4347 if (GET_CODE (operands[0]) == MEM
4348 && (GET_MODE (operands[0]) == QImode
4349 || !push_operand (operands[0], mode))
4350 && GET_CODE (operands[1]) == MEM)
4351 operands[1] = force_reg (mode, operands[1]);
4353 if (push_operand (operands[0], mode)
4354 && ! general_no_elim_operand (operands[1], mode))
4355 operands[1] = copy_to_mode_reg (mode, operands[1]);
4357 if (FLOAT_MODE_P (mode))
4359 /* If we are loading a floating point constant to a register,
4360 force the value to memory now, since we'll get better code
4361 out the back end. */
4365 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4366 && register_operand (operands[0], mode))
4367 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4371 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4376 /* Attempt to expand a binary operator. Make the expansion closer to the
4377 actual machine, then just general_operand, which will allow 3 separate
4378 memory references (one output, two input) in a single insn. */
4381 ix86_expand_binary_operator (code, mode, operands)
4383 enum machine_mode mode;
4386 int matching_memory;
4387 rtx src1, src2, dst, op, clob;
4393 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4394 if (GET_RTX_CLASS (code) == 'c'
4395 && (rtx_equal_p (dst, src2)
4396 || immediate_operand (src1, mode)))
4403 /* If the destination is memory, and we do not have matching source
4404 operands, do things in registers. */
4405 matching_memory = 0;
4406 if (GET_CODE (dst) == MEM)
4408 if (rtx_equal_p (dst, src1))
4409 matching_memory = 1;
4410 else if (GET_RTX_CLASS (code) == 'c'
4411 && rtx_equal_p (dst, src2))
4412 matching_memory = 2;
4414 dst = gen_reg_rtx (mode);
4417 /* Both source operands cannot be in memory. */
4418 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4420 if (matching_memory != 2)
4421 src2 = force_reg (mode, src2);
4423 src1 = force_reg (mode, src1);
4426 /* If the operation is not commutable, source 1 cannot be a constant
4427 or non-matching memory. */
4428 if ((CONSTANT_P (src1)
4429 || (!matching_memory && GET_CODE (src1) == MEM))
4430 && GET_RTX_CLASS (code) != 'c')
4431 src1 = force_reg (mode, src1);
4433 /* If optimizing, copy to regs to improve CSE */
4434 if (optimize && ! no_new_pseudos)
4436 if (GET_CODE (dst) == MEM)
4437 dst = gen_reg_rtx (mode);
4438 if (GET_CODE (src1) == MEM)
4439 src1 = force_reg (mode, src1);
4440 if (GET_CODE (src2) == MEM)
4441 src2 = force_reg (mode, src2);
4444 /* Emit the instruction. */
4446 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4447 if (reload_in_progress)
4449 /* Reload doesn't know about the flags register, and doesn't know that
4450 it doesn't want to clobber it. We can only do this with PLUS. */
4457 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4458 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4461 /* Fix up the destination if needed. */
4462 if (dst != operands[0])
4463 emit_move_insn (operands[0], dst);
4466 /* Return TRUE or FALSE depending on whether the binary operator meets the
4467 appropriate constraints. */
4470 ix86_binary_operator_ok (code, mode, operands)
4472 enum machine_mode mode ATTRIBUTE_UNUSED;
4475 /* Both source operands cannot be in memory. */
4476 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4478 /* If the operation is not commutable, source 1 cannot be a constant. */
4479 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4481 /* If the destination is memory, we must have a matching source operand. */
4482 if (GET_CODE (operands[0]) == MEM
4483 && ! (rtx_equal_p (operands[0], operands[1])
4484 || (GET_RTX_CLASS (code) == 'c'
4485 && rtx_equal_p (operands[0], operands[2]))))
4487 /* If the operation is not commutable and the source 1 is memory, we must
4488 have a matching destionation. */
4489 if (GET_CODE (operands[1]) == MEM
4490 && GET_RTX_CLASS (code) != 'c'
4491 && ! rtx_equal_p (operands[0], operands[1]))
4496 /* Attempt to expand a unary operator. Make the expansion closer to the
4497 actual machine, then just general_operand, which will allow 2 separate
4498 memory references (one output, one input) in a single insn. */
4501 ix86_expand_unary_operator (code, mode, operands)
4503 enum machine_mode mode;
4506 int matching_memory;
4507 rtx src, dst, op, clob;
4512 /* If the destination is memory, and we do not have matching source
4513 operands, do things in registers. */
4514 matching_memory = 0;
4515 if (GET_CODE (dst) == MEM)
4517 if (rtx_equal_p (dst, src))
4518 matching_memory = 1;
4520 dst = gen_reg_rtx (mode);
4523 /* When source operand is memory, destination must match. */
4524 if (!matching_memory && GET_CODE (src) == MEM)
4525 src = force_reg (mode, src);
4527 /* If optimizing, copy to regs to improve CSE */
4528 if (optimize && ! no_new_pseudos)
4530 if (GET_CODE (dst) == MEM)
4531 dst = gen_reg_rtx (mode);
4532 if (GET_CODE (src) == MEM)
4533 src = force_reg (mode, src);
4536 /* Emit the instruction. */
4538 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4539 if (reload_in_progress || code == NOT)
4541 /* Reload doesn't know about the flags register, and doesn't know that
4542 it doesn't want to clobber it. */
4549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4550 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4553 /* Fix up the destination if needed. */
4554 if (dst != operands[0])
4555 emit_move_insn (operands[0], dst);
4558 /* Return TRUE or FALSE depending on whether the unary operator meets the
4559 appropriate constraints. */
4562 ix86_unary_operator_ok (code, mode, operands)
4563 enum rtx_code code ATTRIBUTE_UNUSED;
4564 enum machine_mode mode ATTRIBUTE_UNUSED;
4565 rtx operands[2] ATTRIBUTE_UNUSED;
4567 /* If one of operands is memory, source and destination must match. */
4568 if ((GET_CODE (operands[0]) == MEM
4569 || GET_CODE (operands[1]) == MEM)
4570 && ! rtx_equal_p (operands[0], operands[1]))
4575 /* Return TRUE or FALSE depending on whether the first SET in INSN
4576 has source and destination with matching CC modes, and that the
4577 CC mode is at least as constrained as REQ_MODE. */
4580 ix86_match_ccmode (insn, req_mode)
4582 enum machine_mode req_mode;
4585 enum machine_mode set_mode;
4587 set = PATTERN (insn);
4588 if (GET_CODE (set) == PARALLEL)
4589 set = XVECEXP (set, 0, 0);
4590 if (GET_CODE (set) != SET)
4592 if (GET_CODE (SET_SRC (set)) != COMPARE)
4595 set_mode = GET_MODE (SET_DEST (set));
4599 if (req_mode != CCNOmode
4600 && (req_mode != CCmode
4601 || XEXP (SET_SRC (set), 1) != const0_rtx))
4605 if (req_mode == CCGCmode)
4609 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4613 if (req_mode == CCZmode)
4623 return (GET_MODE (SET_SRC (set)) == set_mode);
4626 /* Generate insn patterns to do an integer compare of OPERANDS. */
4629 ix86_expand_int_compare (code, op0, op1)
4633 enum machine_mode cmpmode;
4636 cmpmode = SELECT_CC_MODE (code, op0, op1);
4637 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4639 /* This is very simple, but making the interface the same as in the
4640 FP case makes the rest of the code easier. */
4641 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4642 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4644 /* Return the test that should be put into the flags user, i.e.
4645 the bcc, scc, or cmov instruction. */
4646 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4649 /* Figure out whether to use ordered or unordered fp comparisons.
4650 Return the appropriate mode to use. */
4653 ix86_fp_compare_mode (code)
4654 enum rtx_code code ATTRIBUTE_UNUSED;
4656 /* ??? In order to make all comparisons reversible, we do all comparisons
4657 non-trapping when compiling for IEEE. Once gcc is able to distinguish
4658 all forms trapping and nontrapping comparisons, we can make inequality
4659 comparisons trapping again, since it results in better code when using
4660 FCOM based compares. */
4661 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
4665 ix86_cc_mode (code, op0, op1)
4669 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4670 return ix86_fp_compare_mode (code);
4673 /* Only zero flag is needed. */
4675 case NE: /* ZF!=0 */
4677 /* Codes needing carry flag. */
4678 case GEU: /* CF=0 */
4679 case GTU: /* CF=0 & ZF=0 */
4680 case LTU: /* CF=1 */
4681 case LEU: /* CF=1 | ZF=1 */
4683 /* Codes possibly doable only with sign flag when
4684 comparing against zero. */
4685 case GE: /* SF=OF or SF=0 */
4686 case LT: /* SF<>OF or SF=1 */
4687 if (op1 == const0_rtx)
4690 /* For other cases Carry flag is not required. */
4692 /* Codes doable only with sign flag when comparing
4693 against zero, but we miss jump instruction for it
4694 so we need to use relational tests agains overflow
4695 that thus needs to be zero. */
4696 case GT: /* ZF=0 & SF=OF */
4697 case LE: /* ZF=1 | SF<>OF */
4698 if (op1 == const0_rtx)
4707 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4710 ix86_use_fcomi_compare (code)
4711 enum rtx_code code ATTRIBUTE_UNUSED;
4713 enum rtx_code swapped_code = swap_condition (code);
4714 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
4715 || (ix86_fp_comparison_cost (swapped_code)
4716 == ix86_fp_comparison_fcomi_cost (swapped_code)));
4719 /* Swap, force into registers, or otherwise massage the two operands
4720 to a fp comparison. The operands are updated in place; the new
4721 comparsion code is returned. */
4723 static enum rtx_code
4724 ix86_prepare_fp_compare_args (code, pop0, pop1)
4728 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4729 rtx op0 = *pop0, op1 = *pop1;
4730 enum machine_mode op_mode = GET_MODE (op0);
4732 /* All of the unordered compare instructions only work on registers.
4733 The same is true of the XFmode compare instructions. The same is
4734 true of the fcomi compare instructions. */
4736 if (fpcmp_mode == CCFPUmode
4737 || op_mode == XFmode
4738 || op_mode == TFmode
4739 || ix86_use_fcomi_compare (code))
4741 op0 = force_reg (op_mode, op0);
4742 op1 = force_reg (op_mode, op1);
4746 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4747 things around if they appear profitable, otherwise force op0
4750 if (standard_80387_constant_p (op0) == 0
4751 || (GET_CODE (op0) == MEM
4752 && ! (standard_80387_constant_p (op1) == 0
4753 || GET_CODE (op1) == MEM)))
4756 tmp = op0, op0 = op1, op1 = tmp;
4757 code = swap_condition (code);
4760 if (GET_CODE (op0) != REG)
4761 op0 = force_reg (op_mode, op0);
4763 if (CONSTANT_P (op1))
4765 if (standard_80387_constant_p (op1))
4766 op1 = force_reg (op_mode, op1);
4768 op1 = validize_mem (force_const_mem (op_mode, op1));
4772 /* Try to rearrange the comparison to make it cheaper. */
4773 if (ix86_fp_comparison_cost (code)
4774 > ix86_fp_comparison_cost (swap_condition (code))
4775 && (GET_CODE (op0) == REG || !reload_completed))
4778 tmp = op0, op0 = op1, op1 = tmp;
4779 code = swap_condition (code);
4780 if (GET_CODE (op0) != REG)
4781 op0 = force_reg (op_mode, op0);
4789 /* Convert comparison codes we use to represent FP comparison to integer
4790 code that will result in proper branch. Return UNKNOWN if no such code
4792 static enum rtx_code
4793 ix86_fp_compare_code_to_integer (code)
4823 /* Split comparison code CODE into comparisons we can do using branch
4824 instructions. BYPASS_CODE is comparison code for branch that will
4825 branch around FIRST_CODE and SECOND_CODE. If some of branches
4826 is not required, set value to NIL.
4827 We never require more than two branches. */
4829 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
4830 enum rtx_code code, *bypass_code, *first_code, *second_code;
4836 /* The fcomi comparison sets flags as follows:
4846 case GT: /* GTU - CF=0 & ZF=0 */
4847 case GE: /* GEU - CF=0 */
4848 case ORDERED: /* PF=0 */
4849 case UNORDERED: /* PF=1 */
4850 case UNEQ: /* EQ - ZF=1 */
4851 case UNLT: /* LTU - CF=1 */
4852 case UNLE: /* LEU - CF=1 | ZF=1 */
4853 case LTGT: /* EQ - ZF=0 */
4855 case LT: /* LTU - CF=1 - fails on unordered */
4857 *bypass_code = UNORDERED;
4859 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
4861 *bypass_code = UNORDERED;
4863 case EQ: /* EQ - ZF=1 - fails on unordered */
4865 *bypass_code = UNORDERED;
4867 case NE: /* NE - ZF=0 - fails on unordered */
4869 *second_code = UNORDERED;
4871 case UNGE: /* GEU - CF=0 - fails on unordered */
4873 *second_code = UNORDERED;
4875 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
4877 *second_code = UNORDERED;
4882 if (!TARGET_IEEE_FP)
4889 /* Return cost of comparison done fcom + arithmetics operations on AX.
4890 All following functions do use number of instructions as an cost metrics.
4891 In future this should be tweaked to compute bytes for optimize_size and
4892 take into account performance of various instructions on various CPUs. */
4894 ix86_fp_comparison_arithmetics_cost (code)
4897 if (!TARGET_IEEE_FP)
4899 /* The cost of code output by ix86_expand_fp_compare. */
4927 /* Return cost of comparison done using fcomi operation.
4928 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4930 ix86_fp_comparison_fcomi_cost (code)
4933 enum rtx_code bypass_code, first_code, second_code;
4934 /* Return arbitarily high cost when instruction is not supported - this
4935 prevents gcc from using it. */
4938 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
4939 return (bypass_code != NIL || second_code != NIL) + 2;
4942 /* Return cost of comparison done using sahf operation.
4943 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4945 ix86_fp_comparison_sahf_cost (code)
4948 enum rtx_code bypass_code, first_code, second_code;
4949 /* Return arbitarily high cost when instruction is not preferred - this
4950 avoids gcc from using it. */
4951 if (!TARGET_USE_SAHF && !optimize_size)
4953 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
4954 return (bypass_code != NIL || second_code != NIL) + 3;
4957 /* Compute cost of the comparison done using any method.
4958 See ix86_fp_comparison_arithmetics_cost for the metrics. */
4960 ix86_fp_comparison_cost (code)
4963 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
4966 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
4967 sahf_cost = ix86_fp_comparison_sahf_cost (code);
4969 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
4970 if (min > sahf_cost)
4972 if (min > fcomi_cost)
4977 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4980 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
4982 rtx op0, op1, scratch;
4986 enum machine_mode fpcmp_mode, intcmp_mode;
4988 int cost = ix86_fp_comparison_cost (code);
4989 enum rtx_code bypass_code, first_code, second_code;
4991 fpcmp_mode = ix86_fp_compare_mode (code);
4992 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4995 *second_test = NULL_RTX;
4997 *bypass_test = NULL_RTX;
4999 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
5001 /* Do fcomi/sahf based test when profitable. */
5002 if ((bypass_code == NIL || bypass_test)
5003 && (second_code == NIL || second_test)
5004 && ix86_fp_comparison_arithmetics_cost (code) > cost)
5008 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5009 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
5015 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5016 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5017 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5018 emit_insn (gen_x86_sahf_1 (scratch));
5021 /* The FP codes work out to act like unsigned. */
5022 intcmp_mode = fpcmp_mode;
5024 if (bypass_code != NIL)
5025 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
5026 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5028 if (second_code != NIL)
5029 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
5030 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5035 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
5036 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
5037 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
5038 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
5040 /* In the unordered case, we have to check C2 for NaN's, which
5041 doesn't happen to work out to anything nice combination-wise.
5042 So do some bit twiddling on the value we've got in AH to come
5043 up with an appropriate set of condition codes. */
5045 intcmp_mode = CCNOmode;
5050 if (code == GT || !TARGET_IEEE_FP)
5052 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5057 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5058 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5059 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
5060 intcmp_mode = CCmode;
5066 if (code == LT && TARGET_IEEE_FP)
5068 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5069 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
5070 intcmp_mode = CCmode;
5075 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5081 if (code == GE || !TARGET_IEEE_FP)
5083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
5088 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5089 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5096 if (code == LE && TARGET_IEEE_FP)
5098 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5099 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
5100 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5101 intcmp_mode = CCmode;
5106 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
5112 if (code == EQ && TARGET_IEEE_FP)
5114 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5115 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
5116 intcmp_mode = CCmode;
5121 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5128 if (code == NE && TARGET_IEEE_FP)
5130 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
5131 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
5137 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5143 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5147 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
5156 /* Return the test that should be put into the flags user, i.e.
5157 the bcc, scc, or cmov instruction. */
5158 return gen_rtx_fmt_ee (code, VOIDmode,
5159 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5164 ix86_expand_compare (code, second_test, bypass_test)
5166 rtx *second_test, *bypass_test;
5169 op0 = ix86_compare_op0;
5170 op1 = ix86_compare_op1;
5173 *second_test = NULL_RTX;
5175 *bypass_test = NULL_RTX;
5177 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5178 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode),
5179 second_test, bypass_test);
5181 ret = ix86_expand_int_compare (code, op0, op1);
5187 ix86_expand_branch (code, label)
5193 switch (GET_MODE (ix86_compare_op0))
5198 tmp = ix86_expand_compare (code, NULL, NULL);
5199 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5200 gen_rtx_LABEL_REF (VOIDmode, label),
5202 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5209 /* Don't expand the comparison early, so that we get better code
5210 when jump or whoever decides to reverse the comparison. */
5215 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5218 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5219 ix86_compare_op0, ix86_compare_op1);
5220 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5221 gen_rtx_LABEL_REF (VOIDmode, label),
5223 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5225 use_fcomi = ix86_use_fcomi_compare (code);
5226 vec = rtvec_alloc (3 + !use_fcomi);
5227 RTVEC_ELT (vec, 0) = tmp;
5229 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5231 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5234 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5236 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5241 /* Expand DImode branch into multiple compare+branch. */
5243 rtx lo[2], hi[2], label2;
5244 enum rtx_code code1, code2, code3;
5246 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5248 tmp = ix86_compare_op0;
5249 ix86_compare_op0 = ix86_compare_op1;
5250 ix86_compare_op1 = tmp;
5251 code = swap_condition (code);
5253 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5254 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5256 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5257 avoid two branches. This costs one extra insn, so disable when
5258 optimizing for size. */
5260 if ((code == EQ || code == NE)
5262 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5267 if (hi[1] != const0_rtx)
5268 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5269 NULL_RTX, 0, OPTAB_WIDEN);
5272 if (lo[1] != const0_rtx)
5273 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5274 NULL_RTX, 0, OPTAB_WIDEN);
5276 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5277 NULL_RTX, 0, OPTAB_WIDEN);
5279 ix86_compare_op0 = tmp;
5280 ix86_compare_op1 = const0_rtx;
5281 ix86_expand_branch (code, label);
5285 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5286 op1 is a constant and the low word is zero, then we can just
5287 examine the high word. */
5289 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5292 case LT: case LTU: case GE: case GEU:
5293 ix86_compare_op0 = hi[0];
5294 ix86_compare_op1 = hi[1];
5295 ix86_expand_branch (code, label);
5301 /* Otherwise, we need two or three jumps. */
5303 label2 = gen_label_rtx ();
5306 code2 = swap_condition (code);
5307 code3 = unsigned_condition (code);
5311 case LT: case GT: case LTU: case GTU:
5314 case LE: code1 = LT; code2 = GT; break;
5315 case GE: code1 = GT; code2 = LT; break;
5316 case LEU: code1 = LTU; code2 = GTU; break;
5317 case GEU: code1 = GTU; code2 = LTU; break;
5319 case EQ: code1 = NIL; code2 = NE; break;
5320 case NE: code2 = NIL; break;
5328 * if (hi(a) < hi(b)) goto true;
5329 * if (hi(a) > hi(b)) goto false;
5330 * if (lo(a) < lo(b)) goto true;
5334 ix86_compare_op0 = hi[0];
5335 ix86_compare_op1 = hi[1];
5338 ix86_expand_branch (code1, label);
5340 ix86_expand_branch (code2, label2);
5342 ix86_compare_op0 = lo[0];
5343 ix86_compare_op1 = lo[1];
5344 ix86_expand_branch (code3, label);
5347 emit_label (label2);
5356 /* Split branch based on floating point condition. */
5358 ix86_split_fp_branch (condition, op1, op2, target1, target2, tmp)
5359 rtx condition, op1, op2, target1, target2, tmp;
5362 rtx label = NULL_RTX;
5363 enum rtx_code code = GET_CODE (condition);
5365 if (target2 != pc_rtx)
5368 code = reverse_condition_maybe_unordered (code);
5373 condition = ix86_expand_fp_compare (code, op1, op2,
5374 tmp, &second, &bypass);
5375 if (bypass != NULL_RTX)
5377 label = gen_label_rtx ();
5378 emit_jump_insn (gen_rtx_SET
5380 gen_rtx_IF_THEN_ELSE (VOIDmode,
5382 gen_rtx_LABEL_REF (VOIDmode,
5386 /* AMD Athlon and probably other CPUs too have fast bypass path between the
5387 comparison and first branch. The second branch takes longer to execute
5388 so place first branch the worse predicable one if possible. */
5389 if (second != NULL_RTX
5390 && (GET_CODE (second) == UNORDERED || GET_CODE (second) == ORDERED))
5392 rtx tmp = condition;
5396 emit_jump_insn (gen_rtx_SET
5398 gen_rtx_IF_THEN_ELSE (VOIDmode,
5399 condition, target1, target2)));
5400 if (second != NULL_RTX)
5401 emit_jump_insn (gen_rtx_SET
5403 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, target2)));
5404 if (label != NULL_RTX)
5409 ix86_expand_setcc (code, dest)
5413 rtx ret, tmp, tmpreg;
5414 rtx second_test, bypass_test;
5417 if (GET_MODE (ix86_compare_op0) == DImode)
5418 return 0; /* FAIL */
5420 /* Three modes of generation:
5421 0 -- destination does not overlap compare sources:
5422 clear dest first, emit strict_low_part setcc.
5423 1 -- destination does overlap compare sources:
5424 emit subreg setcc, zero extend.
5425 2 -- destination is in QImode:
5431 if (GET_MODE (dest) == QImode)
5433 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5434 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5438 emit_move_insn (dest, const0_rtx);
5440 ret = ix86_expand_compare (code, &second_test, &bypass_test);
5441 PUT_MODE (ret, QImode);
5447 tmp = gen_lowpart (QImode, dest);
5449 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5453 if (!cse_not_expected)
5454 tmp = gen_reg_rtx (QImode);
5456 tmp = gen_lowpart (QImode, dest);
5460 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5461 if (bypass_test || second_test)
5463 rtx test = second_test;
5465 rtx tmp2 = gen_reg_rtx (QImode);
5472 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
5474 PUT_MODE (test, QImode);
5475 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
5478 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
5480 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
5487 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5488 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5489 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5490 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5494 return 1; /* DONE */
5498 ix86_expand_int_movcc (operands)
5501 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5502 rtx compare_seq, compare_op;
5503 rtx second_test, bypass_test;
5505 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5506 In case comparsion is done with immediate, we can convert it to LTU or
5507 GEU by altering the integer. */
5509 if ((code == LEU || code == GTU)
5510 && GET_CODE (ix86_compare_op1) == CONST_INT
5511 && GET_MODE (operands[0]) != HImode
5512 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5513 && GET_CODE (operands[2]) == CONST_INT
5514 && GET_CODE (operands[3]) == CONST_INT)
5520 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5524 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5525 compare_seq = gen_sequence ();
5528 compare_code = GET_CODE (compare_op);
5530 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5531 HImode insns, we'd be swallowed in word prefix ops. */
5533 if (GET_MODE (operands[0]) != HImode
5534 && GET_CODE (operands[2]) == CONST_INT
5535 && GET_CODE (operands[3]) == CONST_INT)
5537 rtx out = operands[0];
5538 HOST_WIDE_INT ct = INTVAL (operands[2]);
5539 HOST_WIDE_INT cf = INTVAL (operands[3]);
5542 if ((compare_code == LTU || compare_code == GEU)
5543 && !second_test && !bypass_test)
5546 /* Detect overlap between destination and compare sources. */
5549 /* To simplify rest of code, restrict to the GEU case. */
5550 if (compare_code == LTU)
5555 compare_code = reverse_condition (compare_code);
5556 code = reverse_condition (code);
5560 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5561 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5562 tmp = gen_reg_rtx (SImode);
5564 emit_insn (compare_seq);
5565 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5577 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5588 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5590 else if (diff == -1 && ct)
5600 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5602 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5609 * andl cf - ct, dest
5614 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5616 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5620 emit_move_insn (out, tmp);
5622 return 1; /* DONE */
5629 tmp = ct, ct = cf, cf = tmp;
5631 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5633 /* We may be reversing unordered compare to normal compare, that
5634 is not valid in general (we may convert non-trapping condition
5635 to trapping one), however on i386 we currently emit all
5636 comparisons unordered. */
5637 compare_code = reverse_condition_maybe_unordered (compare_code);
5638 code = reverse_condition_maybe_unordered (code);
5642 compare_code = reverse_condition (compare_code);
5643 code = reverse_condition (code);
5646 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5647 || diff == 3 || diff == 5 || diff == 9)
5653 * lea cf(dest*(ct-cf)),dest
5657 * This also catches the degenerate setcc-only case.
5663 out = emit_store_flag (out, code, ix86_compare_op0,
5664 ix86_compare_op1, VOIDmode, 0, 1);
5671 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5675 tmp = gen_rtx_PLUS (SImode, tmp, out);
5681 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5687 emit_move_insn (out, tmp);
5692 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5693 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5695 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5696 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5700 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5702 if (out != operands[0])
5703 emit_move_insn (operands[0], out);
5705 return 1; /* DONE */
5709 * General case: Jumpful:
5710 * xorl dest,dest cmpl op1, op2
5711 * cmpl op1, op2 movl ct, dest
5713 * decl dest movl cf, dest
5714 * andl (cf-ct),dest 1:
5719 * This is reasonably steep, but branch mispredict costs are
5720 * high on modern cpus, so consider failing only if optimizing
5723 * %%% Parameterize branch_cost on the tuning architecture, then
5724 * use that. The 80386 couldn't care less about mispredicts.
5727 if (!optimize_size && !TARGET_CMOVE)
5733 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5735 /* We may be reversing unordered compare to normal compare,
5736 that is not valid in general (we may convert non-trapping
5737 condition to trapping one), however on i386 we currently
5738 emit all comparisons unordered. */
5739 compare_code = reverse_condition_maybe_unordered (compare_code);
5740 code = reverse_condition_maybe_unordered (code);
5744 compare_code = reverse_condition (compare_code);
5745 code = reverse_condition (code);
5749 out = emit_store_flag (out, code, ix86_compare_op0,
5750 ix86_compare_op1, VOIDmode, 0, 1);
5752 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5753 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5755 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5756 if (out != operands[0])
5757 emit_move_insn (operands[0], out);
5759 return 1; /* DONE */
5765 /* Try a few things more with specific constants and a variable. */
5768 rtx var, orig_out, out, tmp;
5771 return 0; /* FAIL */
5773 /* If one of the two operands is an interesting constant, load a
5774 constant with the above and mask it in with a logical operation. */
5776 if (GET_CODE (operands[2]) == CONST_INT)
5779 if (INTVAL (operands[2]) == 0)
5780 operands[3] = constm1_rtx, op = and_optab;
5781 else if (INTVAL (operands[2]) == -1)
5782 operands[3] = const0_rtx, op = ior_optab;
5784 return 0; /* FAIL */
5786 else if (GET_CODE (operands[3]) == CONST_INT)
5789 if (INTVAL (operands[3]) == 0)
5790 operands[2] = constm1_rtx, op = and_optab;
5791 else if (INTVAL (operands[3]) == -1)
5792 operands[2] = const0_rtx, op = ior_optab;
5794 return 0; /* FAIL */
5797 return 0; /* FAIL */
5799 orig_out = operands[0];
5800 tmp = gen_reg_rtx (GET_MODE (orig_out));
5803 /* Recurse to get the constant loaded. */
5804 if (ix86_expand_int_movcc (operands) == 0)
5805 return 0; /* FAIL */
5807 /* Mask in the interesting variable. */
5808 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5810 if (out != orig_out)
5811 emit_move_insn (orig_out, out);
5813 return 1; /* DONE */
5817 * For comparison with above,
5827 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5828 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5829 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5830 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5832 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5834 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5835 emit_move_insn (tmp, operands[3]);
5838 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5840 rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
5841 emit_move_insn (tmp, operands[2]);
5845 emit_insn (compare_seq);
5846 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5847 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5848 compare_op, operands[2],
5851 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5852 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5857 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5858 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5863 return 1; /* DONE */
5867 ix86_expand_fp_movcc (operands)
5872 rtx compare_op, second_test, bypass_test;
5874 /* The floating point conditional move instructions don't directly
5875 support conditions resulting from a signed integer comparison. */
5877 code = GET_CODE (operands[1]);
5878 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5880 /* The floating point conditional move instructions don't directly
5881 support signed integer comparisons. */
5883 if (!fcmov_comparison_operator (compare_op, VOIDmode))
5885 if (second_test != NULL || bypass_test != NULL)
5887 tmp = gen_reg_rtx (QImode);
5888 ix86_expand_setcc (code, tmp);
5890 ix86_compare_op0 = tmp;
5891 ix86_compare_op1 = const0_rtx;
5892 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
5894 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
5896 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5897 emit_move_insn (tmp, operands[3]);
5900 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
5902 tmp = gen_reg_rtx (GET_MODE (operands[0]));
5903 emit_move_insn (tmp, operands[2]);
5907 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5908 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5913 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5914 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5919 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5920 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5928 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5929 works for floating pointer parameters and nonoffsetable memories.
5930 For pushes, it returns just stack offsets; the values will be saved
5931 in the right order. Maximally three parts are generated. */
5934 ix86_split_to_parts (operand, parts, mode)
5937 enum machine_mode mode;
5939 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
5941 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5943 if (size < 2 || size > 3)
5946 /* Optimize constant pool reference to immediates. This is used by fp moves,
5947 that force all constants to memory to allow combining. */
5949 if (GET_CODE (operand) == MEM
5950 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5951 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5952 operand = get_pool_constant (XEXP (operand, 0));
5954 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5956 /* The only non-offsetable memories we handle are pushes. */
5957 if (! push_operand (operand, VOIDmode))
5960 PUT_MODE (operand, SImode);
5961 parts[0] = parts[1] = parts[2] = operand;
5966 split_di (&operand, 1, &parts[0], &parts[1]);
5969 if (REG_P (operand))
5971 if (!reload_completed)
5973 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5974 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5976 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5978 else if (offsettable_memref_p (operand))
5980 PUT_MODE (operand, SImode);
5982 parts[1] = adj_offsettable_operand (operand, 4);
5984 parts[2] = adj_offsettable_operand (operand, 8);
5986 else if (GET_CODE (operand) == CONST_DOUBLE)
5991 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5996 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5997 parts[2] = GEN_INT (l[2]);
6000 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
6005 parts[1] = GEN_INT (l[1]);
6006 parts[0] = GEN_INT (l[0]);
6016 /* Emit insns to perform a move or push of DI, DF, and XF values.
6017 Return false when normal moves are needed; true when all required
6018 insns have been emitted. Operands 2-4 contain the input values
6019 int the correct order; operands 5-7 contain the output values. */
6022 ix86_split_long_move (operands1)
6031 /* Make our own copy to avoid clobbering the operands. */
6032 operands[0] = copy_rtx (operands1[0]);
6033 operands[1] = copy_rtx (operands1[1]);
6035 /* The only non-offsettable memory we handle is push. */
6036 if (push_operand (operands[0], VOIDmode))
6038 else if (GET_CODE (operands[0]) == MEM
6039 && ! offsettable_memref_p (operands[0]))
6042 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
6043 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
6045 /* When emitting push, take care for source operands on the stack. */
6046 if (push && GET_CODE (operands[1]) == MEM
6047 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
6050 part[1][1] = part[1][2];
6051 part[1][0] = part[1][1];
6054 /* We need to do copy in the right order in case an address register
6055 of the source overlaps the destination. */
6056 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
6058 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
6060 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6063 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
6066 /* Collision in the middle part can be handled by reordering. */
6067 if (collisions == 1 && size == 3
6068 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
6071 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
6072 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
6075 /* If there are more collisions, we can't handle it by reordering.
6076 Do an lea to the last part and use only one colliding move. */
6077 else if (collisions > 1)
6080 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
6081 XEXP (part[1][0], 0)));
6082 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
6083 part[1][1] = adj_offsettable_operand (part[1][0], 4);
6085 part[1][2] = adj_offsettable_operand (part[1][0], 8);
6093 /* We use only first 12 bytes of TFmode value, but for pushing we
6094 are required to adjust stack as if we were pushing real 16byte
6096 if (GET_MODE (operands1[0]) == TFmode)
6097 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
6099 emit_insn (gen_push (part[1][2]));
6101 emit_insn (gen_push (part[1][1]));
6102 emit_insn (gen_push (part[1][0]));
6106 /* Choose correct order to not overwrite the source before it is copied. */
6107 if ((REG_P (part[0][0])
6108 && REG_P (part[1][1])
6109 && (REGNO (part[0][0]) == REGNO (part[1][1])
6111 && REGNO (part[0][0]) == REGNO (part[1][2]))))
6113 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
6117 operands1[2] = part[0][2];
6118 operands1[3] = part[0][1];
6119 operands1[4] = part[0][0];
6120 operands1[5] = part[1][2];
6121 operands1[6] = part[1][1];
6122 operands1[7] = part[1][0];
6126 operands1[2] = part[0][1];
6127 operands1[3] = part[0][0];
6128 operands1[5] = part[1][1];
6129 operands1[6] = part[1][0];
6136 operands1[2] = part[0][0];
6137 operands1[3] = part[0][1];
6138 operands1[4] = part[0][2];
6139 operands1[5] = part[1][0];
6140 operands1[6] = part[1][1];
6141 operands1[7] = part[1][2];
6145 operands1[2] = part[0][0];
6146 operands1[3] = part[0][1];
6147 operands1[5] = part[1][0];
6148 operands1[6] = part[1][1];
6156 ix86_split_ashldi (operands, scratch)
6157 rtx *operands, scratch;
6159 rtx low[2], high[2];
6162 if (GET_CODE (operands[2]) == CONST_INT)
6164 split_di (operands, 2, low, high);
6165 count = INTVAL (operands[2]) & 63;
6169 emit_move_insn (high[0], low[1]);
6170 emit_move_insn (low[0], const0_rtx);
6173 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
6177 if (!rtx_equal_p (operands[0], operands[1]))
6178 emit_move_insn (operands[0], operands[1]);
6179 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
6180 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
6185 if (!rtx_equal_p (operands[0], operands[1]))
6186 emit_move_insn (operands[0], operands[1]);
6188 split_di (operands, 1, low, high);
6190 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
6191 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
6193 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6195 if (! no_new_pseudos)
6196 scratch = force_reg (SImode, const0_rtx);
6198 emit_move_insn (scratch, const0_rtx);
6200 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
6204 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
6209 ix86_split_ashrdi (operands, scratch)
6210 rtx *operands, scratch;
6212 rtx low[2], high[2];
6215 if (GET_CODE (operands[2]) == CONST_INT)
6217 split_di (operands, 2, low, high);
6218 count = INTVAL (operands[2]) & 63;
6222 emit_move_insn (low[0], high[1]);
6224 if (! reload_completed)
6225 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
6228 emit_move_insn (high[0], low[0]);
6229 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
6233 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
6237 if (!rtx_equal_p (operands[0], operands[1]))
6238 emit_move_insn (operands[0], operands[1]);
6239 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6240 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
6245 if (!rtx_equal_p (operands[0], operands[1]))
6246 emit_move_insn (operands[0], operands[1]);
6248 split_di (operands, 1, low, high);
6250 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6251 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
6253 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6255 if (! no_new_pseudos)
6256 scratch = gen_reg_rtx (SImode);
6257 emit_move_insn (scratch, high[0]);
6258 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
6259 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6263 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6268 ix86_split_lshrdi (operands, scratch)
6269 rtx *operands, scratch;
6271 rtx low[2], high[2];
6274 if (GET_CODE (operands[2]) == CONST_INT)
6276 split_di (operands, 2, low, high);
6277 count = INTVAL (operands[2]) & 63;
6281 emit_move_insn (low[0], high[1]);
6282 emit_move_insn (high[0], const0_rtx);
6285 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6289 if (!rtx_equal_p (operands[0], operands[1]))
6290 emit_move_insn (operands[0], operands[1]);
6291 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6292 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6297 if (!rtx_equal_p (operands[0], operands[1]))
6298 emit_move_insn (operands[0], operands[1]);
6300 split_di (operands, 1, low, high);
6302 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6303 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6305 /* Heh. By reversing the arguments, we can reuse this pattern. */
6306 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6308 if (! no_new_pseudos)
6309 scratch = force_reg (SImode, const0_rtx);
6311 emit_move_insn (scratch, const0_rtx);
6313 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6317 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6321 /* Expand the appropriate insns for doing strlen if not just doing
6324 out = result, initialized with the start address
6325 align_rtx = alignment of the address.
6326 scratch = scratch register, initialized with the startaddress when
6327 not aligned, otherwise undefined
6329 This is just the body. It needs the initialisations mentioned above and
6330 some address computing at the end. These things are done in i386.md. */
6333 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6334 rtx out, align_rtx, scratch;
6338 rtx align_2_label = NULL_RTX;
6339 rtx align_3_label = NULL_RTX;
6340 rtx align_4_label = gen_label_rtx ();
6341 rtx end_0_label = gen_label_rtx ();
6343 rtx tmpreg = gen_reg_rtx (SImode);
6346 if (GET_CODE (align_rtx) == CONST_INT)
6347 align = INTVAL (align_rtx);
6349 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6351 /* Is there a known alignment and is it less than 4? */
6354 /* Is there a known alignment and is it not 2? */
6357 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6358 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6360 /* Leave just the 3 lower bits. */
6361 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6362 NULL_RTX, 0, OPTAB_WIDEN);
6364 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6365 SImode, 1, 0, align_4_label);
6366 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6367 SImode, 1, 0, align_2_label);
6368 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6369 SImode, 1, 0, align_3_label);
6373 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6374 check if is aligned to 4 - byte. */
6376 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6377 NULL_RTX, 0, OPTAB_WIDEN);
6379 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6380 SImode, 1, 0, align_4_label);
6383 mem = gen_rtx_MEM (QImode, out);
6385 /* Now compare the bytes. */
6387 /* Compare the first n unaligned byte on a byte per byte basis. */
6388 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6389 QImode, 1, 0, end_0_label);
6391 /* Increment the address. */
6392 emit_insn (gen_addsi3 (out, out, const1_rtx));
6394 /* Not needed with an alignment of 2 */
6397 emit_label (align_2_label);
6399 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6400 QImode, 1, 0, end_0_label);
6402 emit_insn (gen_addsi3 (out, out, const1_rtx));
6404 emit_label (align_3_label);
6407 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6408 QImode, 1, 0, end_0_label);
6410 emit_insn (gen_addsi3 (out, out, const1_rtx));
6413 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6414 align this loop. It gives only huge programs, but does not help to
6416 emit_label (align_4_label);
6418 mem = gen_rtx_MEM (SImode, out);
6419 emit_move_insn (scratch, mem);
6420 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6422 /* This formula yields a nonzero result iff one of the bytes is zero.
6423 This saves three branches inside loop and many cycles. */
6425 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6426 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6427 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6428 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6429 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6430 SImode, 1, 0, align_4_label);
6434 rtx reg = gen_reg_rtx (SImode);
6435 emit_move_insn (reg, tmpreg);
6436 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6438 /* If zero is not in the first two bytes, move two bytes forward. */
6439 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6440 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6441 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6442 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6443 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6446 /* Emit lea manually to avoid clobbering of flags. */
6447 emit_insn (gen_rtx_SET (SImode, reg,
6448 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6450 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6451 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6452 emit_insn (gen_rtx_SET (VOIDmode, out,
6453 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6460 rtx end_2_label = gen_label_rtx ();
6461 /* Is zero in the first two bytes? */
6463 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6464 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6465 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6466 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6467 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6469 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6470 JUMP_LABEL (tmp) = end_2_label;
6472 /* Not in the first two. Move two bytes forward. */
6473 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6474 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6476 emit_label (end_2_label);
6480 /* Avoid branch in fixing the byte. */
6481 tmpreg = gen_lowpart (QImode, tmpreg);
6482 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6483 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6485 emit_label (end_0_label);
6488 /* Clear stack slot assignments remembered from previous functions.
6489 This is called from INIT_EXPANDERS once before RTL is emitted for each
6493 ix86_init_machine_status (p)
6496 p->machine = (struct machine_function *)
6497 xcalloc (1, sizeof (struct machine_function));
6500 /* Mark machine specific bits of P for GC. */
6502 ix86_mark_machine_status (p)
6505 struct machine_function *machine = p->machine;
6506 enum machine_mode mode;
6512 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6513 mode = (enum machine_mode) ((int) mode + 1))
6514 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6515 ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
6519 ix86_free_machine_status (p)
6526 /* Return a MEM corresponding to a stack slot with mode MODE.
6527 Allocate a new slot if necessary.
6529 The RTL for a function can have several slots available: N is
6530 which slot to use. */
6533 assign_386_stack_local (mode, n)
6534 enum machine_mode mode;
6537 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6540 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6541 ix86_stack_locals[(int) mode][n]
6542 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6544 return ix86_stack_locals[(int) mode][n];
6547 /* Calculate the length of the memory address in the instruction
6548 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6551 memory_address_length (addr)
6554 struct ix86_address parts;
6555 rtx base, index, disp;
6558 if (GET_CODE (addr) == PRE_DEC
6559 || GET_CODE (addr) == POST_INC)
6562 if (! ix86_decompose_address (addr, &parts))
6566 index = parts.index;
6570 /* Register Indirect. */
6571 if (base && !index && !disp)
6573 /* Special cases: ebp and esp need the two-byte modrm form. */
6574 if (addr == stack_pointer_rtx
6575 || addr == arg_pointer_rtx
6576 || addr == frame_pointer_rtx
6577 || addr == hard_frame_pointer_rtx)
6581 /* Direct Addressing. */
6582 else if (disp && !base && !index)
6587 /* Find the length of the displacement constant. */
6590 if (GET_CODE (disp) == CONST_INT
6591 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6597 /* An index requires the two-byte modrm form. */
6605 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6606 expect that insn have 8bit immediate alternative. */
6608 ix86_attr_length_immediate_default (insn, shortform)
6614 extract_insn_cached (insn);
6615 for (i = recog_data.n_operands - 1; i >= 0; --i)
6616 if (CONSTANT_P (recog_data.operand[i]))
6621 && GET_CODE (recog_data.operand[i]) == CONST_INT
6622 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6626 switch (get_attr_mode (insn))
6638 fatal_insn ("Unknown insn mode", insn);
6644 /* Compute default value for "length_address" attribute. */
6646 ix86_attr_length_address_default (insn)
6650 extract_insn_cached (insn);
6651 for (i = recog_data.n_operands - 1; i >= 0; --i)
6652 if (GET_CODE (recog_data.operand[i]) == MEM)
6654 return memory_address_length (XEXP (recog_data.operand[i], 0));
6660 /* Return the maximum number of instructions a cpu can issue. */
6667 case PROCESSOR_PENTIUM:
6671 case PROCESSOR_PENTIUMPRO:
6679 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6680 by DEP_INSN and nothing set by DEP_INSN. */
6683 ix86_flags_dependant (insn, dep_insn, insn_type)
6685 enum attr_type insn_type;
6689 /* Simplify the test for uninteresting insns. */
6690 if (insn_type != TYPE_SETCC
6691 && insn_type != TYPE_ICMOV
6692 && insn_type != TYPE_FCMOV
6693 && insn_type != TYPE_IBR)
6696 if ((set = single_set (dep_insn)) != 0)
6698 set = SET_DEST (set);
6701 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6702 && XVECLEN (PATTERN (dep_insn), 0) == 2
6703 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6704 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6706 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6707 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6712 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6715 /* This test is true if the dependant insn reads the flags but
6716 not any other potentially set register. */
6717 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6720 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6726 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6727 address with operands set by DEP_INSN. */
6730 ix86_agi_dependant (insn, dep_insn, insn_type)
6732 enum attr_type insn_type;
6736 if (insn_type == TYPE_LEA)
6738 addr = PATTERN (insn);
6739 if (GET_CODE (addr) == SET)
6741 else if (GET_CODE (addr) == PARALLEL
6742 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6743 addr = XVECEXP (addr, 0, 0);
6746 addr = SET_SRC (addr);
6751 extract_insn_cached (insn);
6752 for (i = recog_data.n_operands - 1; i >= 0; --i)
6753 if (GET_CODE (recog_data.operand[i]) == MEM)
6755 addr = XEXP (recog_data.operand[i], 0);
6762 return modified_in_p (addr, dep_insn);
6766 ix86_adjust_cost (insn, link, dep_insn, cost)
6767 rtx insn, link, dep_insn;
6770 enum attr_type insn_type, dep_insn_type;
6771 enum attr_memory memory;
6773 int dep_insn_code_number;
6775 /* Anti and output depenancies have zero cost on all CPUs. */
6776 if (REG_NOTE_KIND (link) != 0)
6779 dep_insn_code_number = recog_memoized (dep_insn);
6781 /* If we can't recognize the insns, we can't really do anything. */
6782 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6785 insn_type = get_attr_type (insn);
6786 dep_insn_type = get_attr_type (dep_insn);
6788 /* Prologue and epilogue allocators can have a false dependency on ebp.
6789 This results in one cycle extra stall on Pentium prologue scheduling,
6790 so handle this important case manually. */
6791 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6792 && dep_insn_type == TYPE_ALU
6793 && !reg_mentioned_p (stack_pointer_rtx, insn))
6798 case PROCESSOR_PENTIUM:
6799 /* Address Generation Interlock adds a cycle of latency. */
6800 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6803 /* ??? Compares pair with jump/setcc. */
6804 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6807 /* Floating point stores require value to be ready one cycle ealier. */
6808 if (insn_type == TYPE_FMOV
6809 && get_attr_memory (insn) == MEMORY_STORE
6810 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6814 case PROCESSOR_PENTIUMPRO:
6815 /* Since we can't represent delayed latencies of load+operation,
6816 increase the cost here for non-imov insns. */
6817 if (dep_insn_type != TYPE_IMOV
6818 && dep_insn_type != TYPE_FMOV
6819 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6820 || memory == MEMORY_BOTH))
6823 /* INT->FP conversion is expensive. */
6824 if (get_attr_fp_int_src (dep_insn))
6827 /* There is one cycle extra latency between an FP op and a store. */
6828 if (insn_type == TYPE_FMOV
6829 && (set = single_set (dep_insn)) != NULL_RTX
6830 && (set2 = single_set (insn)) != NULL_RTX
6831 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6832 && GET_CODE (SET_DEST (set2)) == MEM)
6837 /* The esp dependency is resolved before the instruction is really
6839 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6840 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6843 /* Since we can't represent delayed latencies of load+operation,
6844 increase the cost here for non-imov insns. */
6845 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6846 || memory == MEMORY_BOTH)
6847 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6849 /* INT->FP conversion is expensive. */
6850 if (get_attr_fp_int_src (dep_insn))
6854 case PROCESSOR_ATHLON:
6855 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6856 || memory == MEMORY_BOTH)
6858 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6873 struct ppro_sched_data
6876 int issued_this_cycle;
6881 ix86_safe_length (insn)
6884 if (recog_memoized (insn) >= 0)
6885 return get_attr_length(insn);
6891 ix86_safe_length_prefix (insn)
6894 if (recog_memoized (insn) >= 0)
6895 return get_attr_length(insn);
6900 static enum attr_memory
6901 ix86_safe_memory (insn)
6904 if (recog_memoized (insn) >= 0)
6905 return get_attr_memory(insn);
6907 return MEMORY_UNKNOWN;
6910 static enum attr_pent_pair
6911 ix86_safe_pent_pair (insn)
6914 if (recog_memoized (insn) >= 0)
6915 return get_attr_pent_pair(insn);
6917 return PENT_PAIR_NP;
6920 static enum attr_ppro_uops
6921 ix86_safe_ppro_uops (insn)
6924 if (recog_memoized (insn) >= 0)
6925 return get_attr_ppro_uops (insn);
6927 return PPRO_UOPS_MANY;
6931 ix86_dump_ppro_packet (dump)
6934 if (ix86_sched_data.ppro.decode[0])
6936 fprintf (dump, "PPRO packet: %d",
6937 INSN_UID (ix86_sched_data.ppro.decode[0]));
6938 if (ix86_sched_data.ppro.decode[1])
6939 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6940 if (ix86_sched_data.ppro.decode[2])
6941 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6946 /* We're beginning a new block. Initialize data structures as necessary. */
6949 ix86_sched_init (dump, sched_verbose)
6950 FILE *dump ATTRIBUTE_UNUSED;
6951 int sched_verbose ATTRIBUTE_UNUSED;
6953 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6956 /* Shift INSN to SLOT, and shift everything else down. */
6959 ix86_reorder_insn (insnp, slot)
6966 insnp[0] = insnp[1];
6967 while (++insnp != slot);
6972 /* Find an instruction with given pairability and minimal amount of cycles
6973 lost by the fact that the CPU waits for both pipelines to finish before
6974 reading next instructions. Also take care that both instructions together
6975 can not exceed 7 bytes. */
6978 ix86_pent_find_pair (e_ready, ready, type, first)
6981 enum attr_pent_pair type;
6984 int mincycles, cycles;
6985 enum attr_pent_pair tmp;
6986 enum attr_memory memory;
6987 rtx *insnp, *bestinsnp = NULL;
6989 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6992 memory = ix86_safe_memory (first);
6993 cycles = result_ready_cost (first);
6994 mincycles = INT_MAX;
6996 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6997 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6998 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
7000 enum attr_memory second_memory;
7001 int secondcycles, currentcycles;
7003 second_memory = ix86_safe_memory (*insnp);
7004 secondcycles = result_ready_cost (*insnp);
7005 currentcycles = abs (cycles - secondcycles);
7007 if (secondcycles >= 1 && cycles >= 1)
7009 /* Two read/modify/write instructions together takes two
7011 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
7014 /* Read modify/write instruction followed by read/modify
7015 takes one cycle longer. */
7016 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
7017 && tmp != PENT_PAIR_UV
7018 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
7021 if (currentcycles < mincycles)
7022 bestinsnp = insnp, mincycles = currentcycles;
7028 /* Subroutines of ix86_sched_reorder. */
7031 ix86_sched_reorder_pentium (ready, e_ready)
7035 enum attr_pent_pair pair1, pair2;
7038 /* This wouldn't be necessary if Haifa knew that static insn ordering
7039 is important to which pipe an insn is issued to. So we have to make
7040 some minor rearrangements. */
7042 pair1 = ix86_safe_pent_pair (*e_ready);
7044 /* If the first insn is non-pairable, let it be. */
7045 if (pair1 == PENT_PAIR_NP)
7048 pair2 = PENT_PAIR_NP;
7051 /* If the first insn is UV or PV pairable, search for a PU
7053 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
7055 insnp = ix86_pent_find_pair (e_ready-1, ready,
7056 PENT_PAIR_PU, *e_ready);
7058 pair2 = PENT_PAIR_PU;
7061 /* If the first insn is PU or UV pairable, search for a PV
7063 if (pair2 == PENT_PAIR_NP
7064 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
7066 insnp = ix86_pent_find_pair (e_ready-1, ready,
7067 PENT_PAIR_PV, *e_ready);
7069 pair2 = PENT_PAIR_PV;
7072 /* If the first insn is pairable, search for a UV
7074 if (pair2 == PENT_PAIR_NP)
7076 insnp = ix86_pent_find_pair (e_ready-1, ready,
7077 PENT_PAIR_UV, *e_ready);
7079 pair2 = PENT_PAIR_UV;
7082 if (pair2 == PENT_PAIR_NP)
7085 /* Found something! Decide if we need to swap the order. */
7086 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
7087 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
7088 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
7089 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
7090 ix86_reorder_insn (insnp, e_ready);
7092 ix86_reorder_insn (insnp, e_ready - 1);
7096 ix86_sched_reorder_ppro (ready, e_ready)
7101 enum attr_ppro_uops cur_uops;
7102 int issued_this_cycle;
7106 /* At this point .ppro.decode contains the state of the three
7107 decoders from last "cycle". That is, those insns that were
7108 actually independent. But here we're scheduling for the
7109 decoder, and we may find things that are decodable in the
7112 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
7113 issued_this_cycle = 0;
7116 cur_uops = ix86_safe_ppro_uops (*insnp);
7118 /* If the decoders are empty, and we've a complex insn at the
7119 head of the priority queue, let it issue without complaint. */
7120 if (decode[0] == NULL)
7122 if (cur_uops == PPRO_UOPS_MANY)
7128 /* Otherwise, search for a 2-4 uop unsn to issue. */
7129 while (cur_uops != PPRO_UOPS_FEW)
7133 cur_uops = ix86_safe_ppro_uops (*--insnp);
7136 /* If so, move it to the head of the line. */
7137 if (cur_uops == PPRO_UOPS_FEW)
7138 ix86_reorder_insn (insnp, e_ready);
7140 /* Issue the head of the queue. */
7141 issued_this_cycle = 1;
7142 decode[0] = *e_ready--;
7145 /* Look for simple insns to fill in the other two slots. */
7146 for (i = 1; i < 3; ++i)
7147 if (decode[i] == NULL)
7149 if (ready >= e_ready)
7153 cur_uops = ix86_safe_ppro_uops (*insnp);
7154 while (cur_uops != PPRO_UOPS_ONE)
7158 cur_uops = ix86_safe_ppro_uops (*--insnp);
7161 /* Found one. Move it to the head of the queue and issue it. */
7162 if (cur_uops == PPRO_UOPS_ONE)
7164 ix86_reorder_insn (insnp, e_ready);
7165 decode[i] = *e_ready--;
7166 issued_this_cycle++;
7170 /* ??? Didn't find one. Ideally, here we would do a lazy split
7171 of 2-uop insns, issue one and queue the other. */
7175 if (issued_this_cycle == 0)
7176 issued_this_cycle = 1;
7177 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
7180 /* We are about to being issuing insns for this clock cycle.
7181 Override the default sort algorithm to better slot instructions. */
7183 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
7184 FILE *dump ATTRIBUTE_UNUSED;
7185 int sched_verbose ATTRIBUTE_UNUSED;
7188 int clock_var ATTRIBUTE_UNUSED;
7190 rtx *e_ready = ready + n_ready - 1;
7200 case PROCESSOR_PENTIUM:
7201 ix86_sched_reorder_pentium (ready, e_ready);
7204 case PROCESSOR_PENTIUMPRO:
7205 ix86_sched_reorder_ppro (ready, e_ready);
7210 return ix86_issue_rate ();
7213 /* We are about to issue INSN. Return the number of insns left on the
7214 ready queue that can be issued this cycle. */
7217 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
7227 return can_issue_more - 1;
7229 case PROCESSOR_PENTIUMPRO:
7231 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
7233 if (uops == PPRO_UOPS_MANY)
7236 ix86_dump_ppro_packet (dump);
7237 ix86_sched_data.ppro.decode[0] = insn;
7238 ix86_sched_data.ppro.decode[1] = NULL;
7239 ix86_sched_data.ppro.decode[2] = NULL;
7241 ix86_dump_ppro_packet (dump);
7242 ix86_sched_data.ppro.decode[0] = NULL;
7244 else if (uops == PPRO_UOPS_FEW)
7247 ix86_dump_ppro_packet (dump);
7248 ix86_sched_data.ppro.decode[0] = insn;
7249 ix86_sched_data.ppro.decode[1] = NULL;
7250 ix86_sched_data.ppro.decode[2] = NULL;
7254 for (i = 0; i < 3; ++i)
7255 if (ix86_sched_data.ppro.decode[i] == NULL)
7257 ix86_sched_data.ppro.decode[i] = insn;
7265 ix86_dump_ppro_packet (dump);
7266 ix86_sched_data.ppro.decode[0] = NULL;
7267 ix86_sched_data.ppro.decode[1] = NULL;
7268 ix86_sched_data.ppro.decode[2] = NULL;
7272 return --ix86_sched_data.ppro.issued_this_cycle;
7276 /* Compute the alignment given to a constant that is being placed in memory.
7277 EXP is the constant and ALIGN is the alignment that the object would
7279 The value of this function is used instead of that alignment to align
7283 ix86_constant_alignment (exp, align)
7287 if (TREE_CODE (exp) == REAL_CST)
7289 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7291 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7294 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7301 /* Compute the alignment for a static variable.
7302 TYPE is the data type, and ALIGN is the alignment that
7303 the object would ordinarily have. The value of this function is used
7304 instead of that alignment to align the object. */
7307 ix86_data_alignment (type, align)
7311 if (AGGREGATE_TYPE_P (type)
7313 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7314 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7315 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7318 if (TREE_CODE (type) == ARRAY_TYPE)
7320 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7322 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7325 else if (TREE_CODE (type) == COMPLEX_TYPE)
7328 if (TYPE_MODE (type) == DCmode && align < 64)
7330 if (TYPE_MODE (type) == XCmode && align < 128)
7333 else if ((TREE_CODE (type) == RECORD_TYPE
7334 || TREE_CODE (type) == UNION_TYPE
7335 || TREE_CODE (type) == QUAL_UNION_TYPE)
7336 && TYPE_FIELDS (type))
7338 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7340 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7343 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7344 || TREE_CODE (type) == INTEGER_TYPE)
7346 if (TYPE_MODE (type) == DFmode && align < 64)
7348 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7355 /* Compute the alignment for a local variable.
7356 TYPE is the data type, and ALIGN is the alignment that
7357 the object would ordinarily have. The value of this macro is used
7358 instead of that alignment to align the object. */
7361 ix86_local_alignment (type, align)
7365 if (TREE_CODE (type) == ARRAY_TYPE)
7367 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7369 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7372 else if (TREE_CODE (type) == COMPLEX_TYPE)
7374 if (TYPE_MODE (type) == DCmode && align < 64)
7376 if (TYPE_MODE (type) == XCmode && align < 128)
7379 else if ((TREE_CODE (type) == RECORD_TYPE
7380 || TREE_CODE (type) == UNION_TYPE
7381 || TREE_CODE (type) == QUAL_UNION_TYPE)
7382 && TYPE_FIELDS (type))
7384 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7386 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7389 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7390 || TREE_CODE (type) == INTEGER_TYPE)
7393 if (TYPE_MODE (type) == DFmode && align < 64)
7395 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7401 #define def_builtin(NAME, TYPE, CODE) \
7402 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7403 struct builtin_description
7405 enum insn_code icode;
7407 enum ix86_builtins code;
7408 enum rtx_code comparison;
7412 static struct builtin_description bdesc_comi[] =
7414 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7415 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7416 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7417 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7418 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7419 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7420 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7421 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7422 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7423 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7424 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7425 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7428 static struct builtin_description bdesc_2arg[] =
7431 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7432 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7433 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7434 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7435 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7436 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7437 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7438 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7440 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7441 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7442 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7443 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7444 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7445 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7446 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7447 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7448 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7449 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7450 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7451 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7452 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7453 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7454 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7455 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7456 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7457 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7458 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7459 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7460 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7461 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7462 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7463 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7465 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7466 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7467 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7468 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7470 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7471 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7472 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7473 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7475 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7476 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7477 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7478 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7479 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7482 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7483 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7484 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7485 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7486 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7487 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7489 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7490 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7491 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7492 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7493 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7494 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7495 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7496 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7498 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7499 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7500 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7502 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7503 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7504 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7505 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7507 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7508 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7510 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7511 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7512 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7513 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7514 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7515 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7517 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7518 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7519 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7520 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7522 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7523 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7524 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7525 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7526 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7527 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7530 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7531 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7532 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7534 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7535 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7537 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7538 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7539 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7540 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7541 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7542 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7544 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7545 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7546 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7547 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7548 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7549 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7551 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7552 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7553 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7554 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7556 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7557 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7561 static struct builtin_description bdesc_1arg[] =
7563 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7564 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7566 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7567 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7568 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7570 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7571 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7572 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7573 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7577 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7578 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7581 ix86_init_builtins ()
7583 struct builtin_description * d;
7585 tree endlink = void_list_node;
7587 tree pchar_type_node = build_pointer_type (char_type_node);
7588 tree pfloat_type_node = build_pointer_type (float_type_node);
7589 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7590 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7593 tree int_ftype_v4sf_v4sf
7594 = build_function_type (integer_type_node,
7595 tree_cons (NULL_TREE, V4SF_type_node,
7596 tree_cons (NULL_TREE,
7599 tree v4si_ftype_v4sf_v4sf
7600 = build_function_type (V4SI_type_node,
7601 tree_cons (NULL_TREE, V4SF_type_node,
7602 tree_cons (NULL_TREE,
7605 /* MMX/SSE/integer conversions. */
7606 tree int_ftype_v4sf_int
7607 = build_function_type (integer_type_node,
7608 tree_cons (NULL_TREE, V4SF_type_node,
7609 tree_cons (NULL_TREE,
7613 = build_function_type (integer_type_node,
7614 tree_cons (NULL_TREE, V4SF_type_node,
7617 = build_function_type (integer_type_node,
7618 tree_cons (NULL_TREE, V8QI_type_node,
7621 = build_function_type (integer_type_node,
7622 tree_cons (NULL_TREE, V2SI_type_node,
7625 = build_function_type (V2SI_type_node,
7626 tree_cons (NULL_TREE, integer_type_node,
7628 tree v4sf_ftype_v4sf_int
7629 = build_function_type (integer_type_node,
7630 tree_cons (NULL_TREE, V4SF_type_node,
7631 tree_cons (NULL_TREE, integer_type_node,
7633 tree v4sf_ftype_v4sf_v2si
7634 = build_function_type (V4SF_type_node,
7635 tree_cons (NULL_TREE, V4SF_type_node,
7636 tree_cons (NULL_TREE, V2SI_type_node,
7638 tree int_ftype_v4hi_int
7639 = build_function_type (integer_type_node,
7640 tree_cons (NULL_TREE, V4HI_type_node,
7641 tree_cons (NULL_TREE, integer_type_node,
7643 tree v4hi_ftype_v4hi_int_int
7644 = build_function_type (V4HI_type_node,
7645 tree_cons (NULL_TREE, V4HI_type_node,
7646 tree_cons (NULL_TREE, integer_type_node,
7647 tree_cons (NULL_TREE,
7650 /* Miscellaneous. */
7651 tree v8qi_ftype_v4hi_v4hi
7652 = build_function_type (V8QI_type_node,
7653 tree_cons (NULL_TREE, V4HI_type_node,
7654 tree_cons (NULL_TREE, V4HI_type_node,
7656 tree v4hi_ftype_v2si_v2si
7657 = build_function_type (V4HI_type_node,
7658 tree_cons (NULL_TREE, V2SI_type_node,
7659 tree_cons (NULL_TREE, V2SI_type_node,
7661 tree v4sf_ftype_v4sf_v4sf_int
7662 = build_function_type (V4SF_type_node,
7663 tree_cons (NULL_TREE, V4SF_type_node,
7664 tree_cons (NULL_TREE, V4SF_type_node,
7665 tree_cons (NULL_TREE,
7668 tree v4hi_ftype_v8qi_v8qi
7669 = build_function_type (V4HI_type_node,
7670 tree_cons (NULL_TREE, V8QI_type_node,
7671 tree_cons (NULL_TREE, V8QI_type_node,
7673 tree v2si_ftype_v4hi_v4hi
7674 = build_function_type (V2SI_type_node,
7675 tree_cons (NULL_TREE, V4HI_type_node,
7676 tree_cons (NULL_TREE, V4HI_type_node,
7678 tree v4hi_ftype_v4hi_int
7679 = build_function_type (V4HI_type_node,
7680 tree_cons (NULL_TREE, V4HI_type_node,
7681 tree_cons (NULL_TREE, integer_type_node,
7683 tree di_ftype_di_int
7684 = build_function_type (long_long_unsigned_type_node,
7685 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7686 tree_cons (NULL_TREE, integer_type_node,
7688 tree v8qi_ftype_v8qi_di
7689 = build_function_type (V8QI_type_node,
7690 tree_cons (NULL_TREE, V8QI_type_node,
7691 tree_cons (NULL_TREE,
7692 long_long_integer_type_node,
7694 tree v4hi_ftype_v4hi_di
7695 = build_function_type (V4HI_type_node,
7696 tree_cons (NULL_TREE, V4HI_type_node,
7697 tree_cons (NULL_TREE,
7698 long_long_integer_type_node,
7700 tree v2si_ftype_v2si_di
7701 = build_function_type (V2SI_type_node,
7702 tree_cons (NULL_TREE, V2SI_type_node,
7703 tree_cons (NULL_TREE,
7704 long_long_integer_type_node,
7706 tree void_ftype_void
7707 = build_function_type (void_type_node, endlink);
7708 tree void_ftype_pchar_int
7709 = build_function_type (void_type_node,
7710 tree_cons (NULL_TREE, pchar_type_node,
7711 tree_cons (NULL_TREE, integer_type_node,
7713 tree void_ftype_unsigned
7714 = build_function_type (void_type_node,
7715 tree_cons (NULL_TREE, unsigned_type_node,
7717 tree unsigned_ftype_void
7718 = build_function_type (unsigned_type_node, endlink);
7720 = build_function_type (long_long_unsigned_type_node, endlink);
7722 = build_function_type (intTI_type_node, endlink);
7723 tree v2si_ftype_v4sf
7724 = build_function_type (V2SI_type_node,
7725 tree_cons (NULL_TREE, V4SF_type_node,
7728 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7729 tree_cons (NULL_TREE, V8QI_type_node,
7730 tree_cons (NULL_TREE,
7733 tree void_ftype_v8qi_v8qi_pchar
7734 = build_function_type (void_type_node, maskmovq_args);
7735 tree v4sf_ftype_pfloat
7736 = build_function_type (V4SF_type_node,
7737 tree_cons (NULL_TREE, pfloat_type_node,
7739 tree v4sf_ftype_float
7740 = build_function_type (V4SF_type_node,
7741 tree_cons (NULL_TREE, float_type_node,
7743 tree v4sf_ftype_float_float_float_float
7744 = build_function_type (V4SF_type_node,
7745 tree_cons (NULL_TREE, float_type_node,
7746 tree_cons (NULL_TREE, float_type_node,
7747 tree_cons (NULL_TREE,
7749 tree_cons (NULL_TREE,
7752 /* @@@ the type is bogus */
7753 tree v4sf_ftype_v4sf_pv2si
7754 = build_function_type (V4SF_type_node,
7755 tree_cons (NULL_TREE, V4SF_type_node,
7756 tree_cons (NULL_TREE, pv2si_type_node,
7758 tree v4sf_ftype_pv2si_v4sf
7759 = build_function_type (V4SF_type_node,
7760 tree_cons (NULL_TREE, V4SF_type_node,
7761 tree_cons (NULL_TREE, pv2si_type_node,
7763 tree void_ftype_pfloat_v4sf
7764 = build_function_type (void_type_node,
7765 tree_cons (NULL_TREE, pfloat_type_node,
7766 tree_cons (NULL_TREE, V4SF_type_node,
7768 tree void_ftype_pdi_di
7769 = build_function_type (void_type_node,
7770 tree_cons (NULL_TREE, pdi_type_node,
7771 tree_cons (NULL_TREE,
7772 long_long_unsigned_type_node,
7774 /* Normal vector unops. */
7775 tree v4sf_ftype_v4sf
7776 = build_function_type (V4SF_type_node,
7777 tree_cons (NULL_TREE, V4SF_type_node,
7780 /* Normal vector binops. */
7781 tree v4sf_ftype_v4sf_v4sf
7782 = build_function_type (V4SF_type_node,
7783 tree_cons (NULL_TREE, V4SF_type_node,
7784 tree_cons (NULL_TREE, V4SF_type_node,
7786 tree v8qi_ftype_v8qi_v8qi
7787 = build_function_type (V8QI_type_node,
7788 tree_cons (NULL_TREE, V8QI_type_node,
7789 tree_cons (NULL_TREE, V8QI_type_node,
7791 tree v4hi_ftype_v4hi_v4hi
7792 = build_function_type (V4HI_type_node,
7793 tree_cons (NULL_TREE, V4HI_type_node,
7794 tree_cons (NULL_TREE, V4HI_type_node,
7796 tree v2si_ftype_v2si_v2si
7797 = build_function_type (V2SI_type_node,
7798 tree_cons (NULL_TREE, V2SI_type_node,
7799 tree_cons (NULL_TREE, V2SI_type_node,
7802 = build_function_type (intTI_type_node,
7803 tree_cons (NULL_TREE, intTI_type_node,
7804 tree_cons (NULL_TREE, intTI_type_node,
7807 = build_function_type (long_long_unsigned_type_node,
7808 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7809 tree_cons (NULL_TREE,
7810 long_long_unsigned_type_node,
7813 /* Add all builtins that are more or less simple operations on two
7815 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7817 /* Use one of the operands; the target can have a different mode for
7818 mask-generating compares. */
7819 enum machine_mode mode;
7824 mode = insn_data[d->icode].operand[1].mode;
7826 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7832 type = v4sf_ftype_v4sf_v4sf;
7835 type = v8qi_ftype_v8qi_v8qi;
7838 type = v4hi_ftype_v4hi_v4hi;
7841 type = v2si_ftype_v2si_v2si;
7844 type = ti_ftype_ti_ti;
7847 type = di_ftype_di_di;
7854 /* Override for comparisons. */
7855 if (d->icode == CODE_FOR_maskcmpv4sf3
7856 || d->icode == CODE_FOR_maskncmpv4sf3
7857 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7858 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7859 type = v4si_ftype_v4sf_v4sf;
7861 def_builtin (d->name, type, d->code);
7864 /* Add the remaining MMX insns with somewhat more complicated types. */
7865 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7866 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7867 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7868 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7869 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7870 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7871 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7872 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7873 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7875 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7876 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7877 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7879 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7880 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7882 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7883 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7885 /* Everything beyond this point is SSE only. */
7889 /* comi/ucomi insns. */
7890 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7891 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7893 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7894 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7895 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7897 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7898 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7899 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7900 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7901 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7902 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7904 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7905 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7907 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7909 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7910 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7911 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7912 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7913 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7914 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7916 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7917 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7918 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7919 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7921 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7922 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7923 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7924 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7926 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7927 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7929 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7931 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7932 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7933 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7934 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7935 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7936 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7938 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7940 /* Composite intrinsics. */
7941 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7942 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7943 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7944 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7945 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7946 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7947 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7950 /* Errors in the source file can cause expand_expr to return const0_rtx
7951 where we expect a vector. To avoid crashing, use one of the vector
7952 clear instructions. */
7954 safe_vector_operand (x, mode)
7956 enum machine_mode mode;
7958 if (x != const0_rtx)
7960 x = gen_reg_rtx (mode);
7962 if (VALID_MMX_REG_MODE (mode))
7963 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7964 : gen_rtx_SUBREG (DImode, x, 0)));
7966 emit_insn (gen_sse_clrti (mode == TImode ? x
7967 : gen_rtx_SUBREG (TImode, x, 0)));
7971 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7974 ix86_expand_binop_builtin (icode, arglist, target)
7975 enum insn_code icode;
7980 tree arg0 = TREE_VALUE (arglist);
7981 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7982 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7983 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7984 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7985 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7986 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7988 if (VECTOR_MODE_P (mode0))
7989 op0 = safe_vector_operand (op0, mode0);
7990 if (VECTOR_MODE_P (mode1))
7991 op1 = safe_vector_operand (op1, mode1);
7994 || GET_MODE (target) != tmode
7995 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7996 target = gen_reg_rtx (tmode);
7998 /* In case the insn wants input operands in modes different from
7999 the result, abort. */
8000 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
8003 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8004 op0 = copy_to_mode_reg (mode0, op0);
8005 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8006 op1 = copy_to_mode_reg (mode1, op1);
8008 pat = GEN_FCN (icode) (target, op0, op1);
8015 /* Subroutine of ix86_expand_builtin to take care of stores. */
8018 ix86_expand_store_builtin (icode, arglist, shuffle)
8019 enum insn_code icode;
8024 tree arg0 = TREE_VALUE (arglist);
8025 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8026 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8027 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8028 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
8029 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
8031 if (VECTOR_MODE_P (mode1))
8032 op1 = safe_vector_operand (op1, mode1);
8034 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8035 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8036 op1 = copy_to_mode_reg (mode1, op1);
8038 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
8039 pat = GEN_FCN (icode) (op0, op1);
8045 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
8048 ix86_expand_unop_builtin (icode, arglist, target, do_load)
8049 enum insn_code icode;
8055 tree arg0 = TREE_VALUE (arglist);
8056 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8057 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8058 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8061 || GET_MODE (target) != tmode
8062 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8063 target = gen_reg_rtx (tmode);
8065 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8068 if (VECTOR_MODE_P (mode0))
8069 op0 = safe_vector_operand (op0, mode0);
8071 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8072 op0 = copy_to_mode_reg (mode0, op0);
8075 pat = GEN_FCN (icode) (target, op0);
8082 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
8083 sqrtss, rsqrtss, rcpss. */
8086 ix86_expand_unop1_builtin (icode, arglist, target)
8087 enum insn_code icode;
8092 tree arg0 = TREE_VALUE (arglist);
8093 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8094 enum machine_mode tmode = insn_data[icode].operand[0].mode;
8095 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
8098 || GET_MODE (target) != tmode
8099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8100 target = gen_reg_rtx (tmode);
8102 if (VECTOR_MODE_P (mode0))
8103 op0 = safe_vector_operand (op0, mode0);
8105 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8106 op0 = copy_to_mode_reg (mode0, op0);
8108 pat = GEN_FCN (icode) (target, op0, op0);
8115 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
8118 ix86_expand_sse_compare (d, arglist, target)
8119 struct builtin_description *d;
8124 tree arg0 = TREE_VALUE (arglist);
8125 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8126 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8127 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8129 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
8130 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
8131 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
8132 enum rtx_code comparison = d->comparison;
8134 if (VECTOR_MODE_P (mode0))
8135 op0 = safe_vector_operand (op0, mode0);
8136 if (VECTOR_MODE_P (mode1))
8137 op1 = safe_vector_operand (op1, mode1);
8139 /* Swap operands if we have a comparison that isn't available in
8143 target = gen_reg_rtx (tmode);
8144 emit_move_insn (target, op1);
8147 comparison = swap_condition (comparison);
8150 || GET_MODE (target) != tmode
8151 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
8152 target = gen_reg_rtx (tmode);
8154 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
8155 op0 = copy_to_mode_reg (mode0, op0);
8156 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
8157 op1 = copy_to_mode_reg (mode1, op1);
8159 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8160 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
8167 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
8170 ix86_expand_sse_comi (d, arglist, target)
8171 struct builtin_description *d;
8176 tree arg0 = TREE_VALUE (arglist);
8177 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8178 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8179 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8181 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
8182 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
8183 enum rtx_code comparison = d->comparison;
8185 if (VECTOR_MODE_P (mode0))
8186 op0 = safe_vector_operand (op0, mode0);
8187 if (VECTOR_MODE_P (mode1))
8188 op1 = safe_vector_operand (op1, mode1);
8190 /* Swap operands if we have a comparison that isn't available in
8197 comparison = swap_condition (comparison);
8200 target = gen_reg_rtx (SImode);
8201 emit_move_insn (target, const0_rtx);
8202 target = gen_rtx_SUBREG (QImode, target, 0);
8204 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
8205 op0 = copy_to_mode_reg (mode0, op0);
8206 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
8207 op1 = copy_to_mode_reg (mode1, op1);
8209 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
8210 pat = GEN_FCN (d->icode) (op0, op1, op2);
8214 emit_insn (gen_setcc_2 (target, op2));
8219 /* Expand an expression EXP that calls a built-in function,
8220 with result going to TARGET if that's convenient
8221 (and in mode MODE if that's convenient).
8222 SUBTARGET may be used as the target for computing one of EXP's operands.
8223 IGNORE is nonzero if the value is to be ignored. */
8226 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
8229 rtx subtarget ATTRIBUTE_UNUSED;
8230 enum machine_mode mode ATTRIBUTE_UNUSED;
8231 int ignore ATTRIBUTE_UNUSED;
8233 struct builtin_description *d;
8235 enum insn_code icode;
8236 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8237 tree arglist = TREE_OPERAND (exp, 1);
8238 tree arg0, arg1, arg2, arg3;
8239 rtx op0, op1, op2, pat;
8240 enum machine_mode tmode, mode0, mode1, mode2;
8241 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8245 case IX86_BUILTIN_EMMS:
8246 emit_insn (gen_emms ());
8249 case IX86_BUILTIN_SFENCE:
8250 emit_insn (gen_sfence ());
8253 case IX86_BUILTIN_M_FROM_INT:
8254 target = gen_reg_rtx (DImode);
8255 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8256 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
8259 case IX86_BUILTIN_M_TO_INT:
8260 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8261 op0 = copy_to_mode_reg (DImode, op0);
8262 target = gen_reg_rtx (SImode);
8263 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
8266 case IX86_BUILTIN_PEXTRW:
8267 icode = CODE_FOR_mmx_pextrw;
8268 arg0 = TREE_VALUE (arglist);
8269 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8270 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8271 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8272 tmode = insn_data[icode].operand[0].mode;
8273 mode0 = insn_data[icode].operand[1].mode;
8274 mode1 = insn_data[icode].operand[2].mode;
8276 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8277 op0 = copy_to_mode_reg (mode0, op0);
8278 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8280 /* @@@ better error message */
8281 error ("selector must be an immediate");
8285 || GET_MODE (target) != tmode
8286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8287 target = gen_reg_rtx (tmode);
8288 pat = GEN_FCN (icode) (target, op0, op1);
8294 case IX86_BUILTIN_PINSRW:
8295 icode = CODE_FOR_mmx_pinsrw;
8296 arg0 = TREE_VALUE (arglist);
8297 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8298 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8299 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8300 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8301 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8302 tmode = insn_data[icode].operand[0].mode;
8303 mode0 = insn_data[icode].operand[1].mode;
8304 mode1 = insn_data[icode].operand[2].mode;
8305 mode2 = insn_data[icode].operand[3].mode;
8307 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8308 op0 = copy_to_mode_reg (mode0, op0);
8309 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8310 op1 = copy_to_mode_reg (mode1, op1);
8311 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8313 /* @@@ better error message */
8314 error ("selector must be an immediate");
8318 || GET_MODE (target) != tmode
8319 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8320 target = gen_reg_rtx (tmode);
8321 pat = GEN_FCN (icode) (target, op0, op1, op2);
8327 case IX86_BUILTIN_MASKMOVQ:
8328 icode = CODE_FOR_mmx_maskmovq;
8329 /* Note the arg order is different from the operand order. */
8330 arg1 = TREE_VALUE (arglist);
8331 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8332 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8333 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8334 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8335 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8336 mode0 = insn_data[icode].operand[0].mode;
8337 mode1 = insn_data[icode].operand[1].mode;
8338 mode2 = insn_data[icode].operand[2].mode;
8340 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8341 op0 = copy_to_mode_reg (mode0, op0);
8342 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8343 op1 = copy_to_mode_reg (mode1, op1);
8344 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8345 op2 = copy_to_mode_reg (mode2, op2);
8346 pat = GEN_FCN (icode) (op0, op1, op2);
8352 case IX86_BUILTIN_SQRTSS:
8353 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8354 case IX86_BUILTIN_RSQRTSS:
8355 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8356 case IX86_BUILTIN_RCPSS:
8357 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8359 case IX86_BUILTIN_LOADAPS:
8360 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8362 case IX86_BUILTIN_LOADUPS:
8363 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8365 case IX86_BUILTIN_STOREAPS:
8366 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8367 case IX86_BUILTIN_STOREUPS:
8368 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8370 case IX86_BUILTIN_LOADSS:
8371 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8373 case IX86_BUILTIN_STORESS:
8374 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8376 case IX86_BUILTIN_LOADHPS:
8377 case IX86_BUILTIN_LOADLPS:
8378 icode = (fcode == IX86_BUILTIN_LOADHPS
8379 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8380 arg0 = TREE_VALUE (arglist);
8381 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8382 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8383 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8384 tmode = insn_data[icode].operand[0].mode;
8385 mode0 = insn_data[icode].operand[1].mode;
8386 mode1 = insn_data[icode].operand[2].mode;
8388 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8389 op0 = copy_to_mode_reg (mode0, op0);
8390 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8392 || GET_MODE (target) != tmode
8393 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8394 target = gen_reg_rtx (tmode);
8395 pat = GEN_FCN (icode) (target, op0, op1);
8401 case IX86_BUILTIN_STOREHPS:
8402 case IX86_BUILTIN_STORELPS:
8403 icode = (fcode == IX86_BUILTIN_STOREHPS
8404 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8405 arg0 = TREE_VALUE (arglist);
8406 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8407 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8408 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8409 mode0 = insn_data[icode].operand[1].mode;
8410 mode1 = insn_data[icode].operand[2].mode;
8412 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8413 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8414 op1 = copy_to_mode_reg (mode1, op1);
8416 pat = GEN_FCN (icode) (op0, op0, op1);
8422 case IX86_BUILTIN_MOVNTPS:
8423 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8424 case IX86_BUILTIN_MOVNTQ:
8425 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8427 case IX86_BUILTIN_LDMXCSR:
8428 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8429 target = assign_386_stack_local (SImode, 0);
8430 emit_move_insn (target, op0);
8431 emit_insn (gen_ldmxcsr (target));
8434 case IX86_BUILTIN_STMXCSR:
8435 target = assign_386_stack_local (SImode, 0);
8436 emit_insn (gen_stmxcsr (target));
8437 return copy_to_mode_reg (SImode, target);
8439 case IX86_BUILTIN_PREFETCH:
8440 icode = CODE_FOR_prefetch;
8441 arg0 = TREE_VALUE (arglist);
8442 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8443 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8444 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8445 mode0 = insn_data[icode].operand[0].mode;
8446 mode1 = insn_data[icode].operand[1].mode;
8448 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8450 /* @@@ better error message */
8451 error ("selector must be an immediate");
8455 op0 = copy_to_mode_reg (Pmode, op0);
8456 pat = GEN_FCN (icode) (op0, op1);
8462 case IX86_BUILTIN_SHUFPS:
8463 icode = CODE_FOR_sse_shufps;
8464 arg0 = TREE_VALUE (arglist);
8465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8466 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8467 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8468 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8469 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8470 tmode = insn_data[icode].operand[0].mode;
8471 mode0 = insn_data[icode].operand[1].mode;
8472 mode1 = insn_data[icode].operand[2].mode;
8473 mode2 = insn_data[icode].operand[3].mode;
8475 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8476 op0 = copy_to_mode_reg (mode0, op0);
8477 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8478 op1 = copy_to_mode_reg (mode1, op1);
8479 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8481 /* @@@ better error message */
8482 error ("mask must be an immediate");
8486 || GET_MODE (target) != tmode
8487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8488 target = gen_reg_rtx (tmode);
8489 pat = GEN_FCN (icode) (target, op0, op1, op2);
8495 case IX86_BUILTIN_PSHUFW:
8496 icode = CODE_FOR_mmx_pshufw;
8497 arg0 = TREE_VALUE (arglist);
8498 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8499 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8500 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8501 tmode = insn_data[icode].operand[0].mode;
8502 mode0 = insn_data[icode].operand[2].mode;
8503 mode1 = insn_data[icode].operand[3].mode;
8505 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8506 op0 = copy_to_mode_reg (mode0, op0);
8507 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8509 /* @@@ better error message */
8510 error ("mask must be an immediate");
8514 || GET_MODE (target) != tmode
8515 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8516 target = gen_reg_rtx (tmode);
8517 pat = GEN_FCN (icode) (target, target, op0, op1);
8523 /* Composite intrinsics. */
8524 case IX86_BUILTIN_SETPS1:
8525 target = assign_386_stack_local (SFmode, 0);
8526 arg0 = TREE_VALUE (arglist);
8527 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8528 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8529 op0 = gen_reg_rtx (V4SFmode);
8530 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8531 XEXP (target, 0))));
8532 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8535 case IX86_BUILTIN_SETPS:
8536 target = assign_386_stack_local (V4SFmode, 0);
8537 op0 = change_address (target, SFmode, XEXP (target, 0));
8538 arg0 = TREE_VALUE (arglist);
8539 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8540 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8541 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8542 emit_move_insn (op0,
8543 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8544 emit_move_insn (adj_offsettable_operand (op0, 4),
8545 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8546 emit_move_insn (adj_offsettable_operand (op0, 8),
8547 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8548 emit_move_insn (adj_offsettable_operand (op0, 12),
8549 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8550 op0 = gen_reg_rtx (V4SFmode);
8551 emit_insn (gen_sse_movaps (op0, target));
8554 case IX86_BUILTIN_CLRPS:
8555 target = gen_reg_rtx (TImode);
8556 emit_insn (gen_sse_clrti (target));
8559 case IX86_BUILTIN_LOADRPS:
8560 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8561 gen_reg_rtx (V4SFmode), 1);
8562 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8565 case IX86_BUILTIN_LOADPS1:
8566 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8567 gen_reg_rtx (V4SFmode), 1);
8568 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8571 case IX86_BUILTIN_STOREPS1:
8572 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8573 case IX86_BUILTIN_STORERPS:
8574 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8576 case IX86_BUILTIN_MMX_ZERO:
8577 target = gen_reg_rtx (DImode);
8578 emit_insn (gen_mmx_clrdi (target));
8585 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8586 if (d->code == fcode)
8588 /* Compares are treated specially. */
8589 if (d->icode == CODE_FOR_maskcmpv4sf3
8590 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8591 || d->icode == CODE_FOR_maskncmpv4sf3
8592 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8593 return ix86_expand_sse_compare (d, arglist, target);
8595 return ix86_expand_binop_builtin (d->icode, arglist, target);
8598 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8599 if (d->code == fcode)
8600 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8602 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8603 if (d->code == fcode)
8604 return ix86_expand_sse_comi (d, arglist, target);
8606 /* @@@ Should really do something sensible here. */
8610 /* Store OPERAND to the memory after reload is completed. This means
8611 that we can't easilly use assign_stack_local. */
8613 ix86_force_to_memory (mode, operand)
8614 enum machine_mode mode;
8617 if (!reload_completed)
8624 split_di (&operand, 1, operands, operands+1);
8626 gen_rtx_SET (VOIDmode,
8627 gen_rtx_MEM (SImode,
8628 gen_rtx_PRE_DEC (Pmode,
8629 stack_pointer_rtx)),
8632 gen_rtx_SET (VOIDmode,
8633 gen_rtx_MEM (SImode,
8634 gen_rtx_PRE_DEC (Pmode,
8635 stack_pointer_rtx)),
8640 /* It is better to store HImodes as SImodes. */
8641 if (!TARGET_PARTIAL_REG_STALL)
8642 operand = gen_lowpart (SImode, operand);
8646 gen_rtx_SET (VOIDmode,
8647 gen_rtx_MEM (GET_MODE (operand),
8648 gen_rtx_PRE_DEC (SImode,
8649 stack_pointer_rtx)),
8655 return gen_rtx_MEM (mode, stack_pointer_rtx);
8658 /* Free operand from the memory. */
8660 ix86_free_from_memory (mode)
8661 enum machine_mode mode;
8663 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8664 to pop or add instruction if registers are available. */
8665 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8666 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8667 GEN_INT (mode == DImode
8669 : mode == HImode && TARGET_PARTIAL_REG_STALL