1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost = {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs *ix86_cost = &pentium_cost;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
188 const int x86_zero_extend_with_and = m_486 | m_PENT;
189 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
190 const int x86_double_with_add = ~m_386;
191 const int x86_use_bit_test = m_386;
192 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
193 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194 const int x86_use_any_reg = m_486;
195 const int x86_cmove = m_PPRO | m_ATHLON;
196 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
198 const int x86_partial_reg_stall = m_PPRO;
199 const int x86_use_loop = m_K6;
200 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
201 const int x86_use_mov0 = m_K6;
202 const int x86_use_cltd = ~(m_PENT | m_K6);
203 const int x86_read_modify_write = ~m_PENT;
204 const int x86_read_modify = ~(m_PENT | m_PPRO);
205 const int x86_split_long_moves = m_PPRO;
206 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
207 const int x86_single_stringop = m_386;
208 const int x86_qimode_math = ~(0);
209 const int x86_promote_qi_regs = 0;
210 const int x86_himode_math = ~(m_PPRO);
211 const int x86_promote_hi_regs = m_PPRO;
212 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214 const int x86_add_esp_4 = m_ATHLON | m_K6;
215 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
216 const int x86_integer_DFmode_moves = ~m_ATHLON;
217 const int x86_partial_reg_dependency = m_ATHLON;
218 const int x86_memory_mismatch_stall = m_ATHLON;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
232 AREG, DREG, CREG, BREG,
234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
248 /* The "default" register map. */
250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def *ix86_compare_op0 = NULL_RTX;
326 struct rtx_def *ix86_compare_op1 = NULL_RTX;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu;
341 /* which instruction set architecture to use. */
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order;
350 static char regs_allocated[FIRST_PSEUDO_REGISTER];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string;
355 /* ix86_regparm_string as a number */
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost;
374 const char *ix86_branch_cost_string;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs;
378 const char *ix86_align_funcs_string;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps;
386 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
389 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
390 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
391 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
393 static rtx gen_push PARAMS ((rtx));
394 static int memory_address_length PARAMS ((rtx addr));
395 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
396 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
397 static int ix86_safe_length PARAMS ((rtx));
398 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
399 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
400 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
401 static void ix86_dump_ppro_packet PARAMS ((FILE *));
402 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
403 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
405 static void ix86_init_machine_status PARAMS ((struct function *));
406 static void ix86_mark_machine_status PARAMS ((struct function *));
407 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408 static int ix86_safe_length_prefix PARAMS ((rtx));
409 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
411 static int ix86_nsaved_regs PARAMS((void));
412 static void ix86_emit_save_regs PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
414 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
415 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
417 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
421 rtx base, index, disp;
425 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
427 struct builtin_description;
428 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
430 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
432 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
438 /* Sometimes certain combinations of command options do not make
439 sense on a particular target machine. You can define a macro
440 `OVERRIDE_OPTIONS' to take account of this. This macro, if
441 defined, is executed once just after all the command options have
444 Don't use this macro to turn on various extra optimizations for
445 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
450 /* Comes from final.c -- no real reason to change it. */
451 #define MAX_CODE_ALIGN 16
455 struct processor_costs *cost; /* Processor costs */
456 int target_enable; /* Target flags to enable. */
457 int target_disable; /* Target flags to disable. */
458 int align_loop; /* Default alignments. */
463 const processor_target_table[PROCESSOR_max] =
465 {&i386_cost, 0, 0, 2, 2, 2, 1},
466 {&i486_cost, 0, 0, 4, 4, 4, 1},
467 {&pentium_cost, 0, 0, -4, -4, -4, 1},
468 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
469 {&k6_cost, 0, 0, -5, -5, 4, 1},
470 {&athlon_cost, 0, 0, 4, -4, 4, 1}
475 const char *name; /* processor name or nickname. */
476 enum processor_type processor;
478 const processor_alias_table[] =
480 {"i386", PROCESSOR_I386},
481 {"i486", PROCESSOR_I486},
482 {"i586", PROCESSOR_PENTIUM},
483 {"pentium", PROCESSOR_PENTIUM},
484 {"i686", PROCESSOR_PENTIUMPRO},
485 {"pentiumpro", PROCESSOR_PENTIUMPRO},
486 {"k6", PROCESSOR_K6},
487 {"athlon", PROCESSOR_ATHLON},
490 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
492 #ifdef SUBTARGET_OVERRIDE_OPTIONS
493 SUBTARGET_OVERRIDE_OPTIONS;
496 ix86_arch = PROCESSOR_I386;
497 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
499 if (ix86_arch_string != 0)
502 for (i = 0; i < pta_size; i++)
503 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
505 ix86_arch = processor_alias_table[i].processor;
506 /* Default cpu tuning to the architecture. */
507 ix86_cpu = ix86_arch;
511 error ("bad value (%s) for -march= switch", ix86_arch_string);
514 if (ix86_cpu_string != 0)
517 for (i = 0; i < pta_size; i++)
518 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
520 ix86_cpu = processor_alias_table[i].processor;
524 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
527 ix86_cost = processor_target_table[ix86_cpu].cost;
528 target_flags |= processor_target_table[ix86_cpu].target_enable;
529 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
531 /* Arrange to set up i386_stack_locals for all functions. */
532 init_machine_status = ix86_init_machine_status;
533 mark_machine_status = ix86_mark_machine_status;
535 /* Validate registers in register allocation order. */
536 if (ix86_reg_alloc_order)
539 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
545 case 'a': regno = 0; break;
546 case 'd': regno = 1; break;
547 case 'c': regno = 2; break;
548 case 'b': regno = 3; break;
549 case 'S': regno = 4; break;
550 case 'D': regno = 5; break;
551 case 'B': regno = 6; break;
553 default: fatal ("Register '%c' is unknown", ch);
556 if (regs_allocated[regno])
557 fatal ("Register '%c' already specified in allocation order", ch);
559 regs_allocated[regno] = 1;
563 /* Validate -mregparm= value. */
564 if (ix86_regparm_string)
566 ix86_regparm = atoi (ix86_regparm_string);
567 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
568 fatal ("-mregparm=%d is not between 0 and %d",
569 ix86_regparm, REGPARM_MAX);
572 /* Validate -malign-loops= value, or provide default. */
573 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
574 if (ix86_align_loops_string)
576 ix86_align_loops = atoi (ix86_align_loops_string);
577 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
578 fatal ("-malign-loops=%d is not between 0 and %d",
579 ix86_align_loops, MAX_CODE_ALIGN);
582 /* Validate -malign-jumps= value, or provide default. */
583 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
584 if (ix86_align_jumps_string)
586 ix86_align_jumps = atoi (ix86_align_jumps_string);
587 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
588 fatal ("-malign-jumps=%d is not between 0 and %d",
589 ix86_align_jumps, MAX_CODE_ALIGN);
592 /* Validate -malign-functions= value, or provide default. */
593 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
594 if (ix86_align_funcs_string)
596 ix86_align_funcs = atoi (ix86_align_funcs_string);
597 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
598 fatal ("-malign-functions=%d is not between 0 and %d",
599 ix86_align_funcs, MAX_CODE_ALIGN);
602 /* Validate -mpreferred-stack-boundary= value, or provide default.
603 The default of 128 bits is for Pentium III's SSE __m128. */
604 ix86_preferred_stack_boundary = 128;
605 if (ix86_preferred_stack_boundary_string)
607 int i = atoi (ix86_preferred_stack_boundary_string);
609 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
610 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
613 /* Validate -mbranch-cost= value, or provide default. */
614 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
615 if (ix86_branch_cost_string)
617 ix86_branch_cost = atoi (ix86_branch_cost_string);
618 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
619 fatal ("-mbranch-cost=%d is not between 0 and 5",
623 /* Keep nonleaf frame pointers. */
624 if (TARGET_OMIT_LEAF_FRAME_POINTER)
625 flag_omit_frame_pointer = 1;
627 /* If we're doing fast math, we don't care about comparison order
628 wrt NaNs. This lets us use a shorter comparison sequence. */
630 target_flags &= ~MASK_IEEE_FP;
632 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
635 target_flags |= MASK_MMX;
638 /* A C statement (sans semicolon) to choose the order in which to
639 allocate hard registers for pseudo-registers local to a basic
642 Store the desired register order in the array `reg_alloc_order'.
643 Element 0 should be the register to allocate first; element 1, the
644 next register; and so on.
646 The macro body should not assume anything about the contents of
647 `reg_alloc_order' before execution of the macro.
649 On most machines, it is not necessary to define this macro. */
652 order_regs_for_local_alloc ()
656 /* User specified the register allocation order. */
658 if (ix86_reg_alloc_order)
660 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
666 case 'a': regno = 0; break;
667 case 'd': regno = 1; break;
668 case 'c': regno = 2; break;
669 case 'b': regno = 3; break;
670 case 'S': regno = 4; break;
671 case 'D': regno = 5; break;
672 case 'B': regno = 6; break;
675 reg_alloc_order[order++] = regno;
678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
680 if (! regs_allocated[i])
681 reg_alloc_order[order++] = i;
685 /* If user did not specify a register allocation order, use natural order. */
688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
689 reg_alloc_order[i] = i;
694 optimization_options (level, size)
696 int size ATTRIBUTE_UNUSED;
698 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
699 make the problem with not enough registers even worse. */
700 #ifdef INSN_SCHEDULING
702 flag_schedule_insns = 0;
706 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
707 attribute for DECL. The attributes in ATTRIBUTES have previously been
711 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
712 tree decl ATTRIBUTE_UNUSED;
713 tree attributes ATTRIBUTE_UNUSED;
714 tree identifier ATTRIBUTE_UNUSED;
715 tree args ATTRIBUTE_UNUSED;
720 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
721 attribute for TYPE. The attributes in ATTRIBUTES have previously been
725 ix86_valid_type_attribute_p (type, attributes, identifier, args)
727 tree attributes ATTRIBUTE_UNUSED;
731 if (TREE_CODE (type) != FUNCTION_TYPE
732 && TREE_CODE (type) != METHOD_TYPE
733 && TREE_CODE (type) != FIELD_DECL
734 && TREE_CODE (type) != TYPE_DECL)
737 /* Stdcall attribute says callee is responsible for popping arguments
738 if they are not variable. */
739 if (is_attribute_p ("stdcall", identifier))
740 return (args == NULL_TREE);
742 /* Cdecl attribute says the callee is a normal C declaration. */
743 if (is_attribute_p ("cdecl", identifier))
744 return (args == NULL_TREE);
746 /* Regparm attribute specifies how many integer arguments are to be
747 passed in registers. */
748 if (is_attribute_p ("regparm", identifier))
752 if (! args || TREE_CODE (args) != TREE_LIST
753 || TREE_CHAIN (args) != NULL_TREE
754 || TREE_VALUE (args) == NULL_TREE)
757 cst = TREE_VALUE (args);
758 if (TREE_CODE (cst) != INTEGER_CST)
761 if (compare_tree_int (cst, REGPARM_MAX) > 0)
770 /* Return 0 if the attributes for two types are incompatible, 1 if they
771 are compatible, and 2 if they are nearly compatible (which causes a
772 warning to be generated). */
775 ix86_comp_type_attributes (type1, type2)
779 /* Check for mismatch of non-default calling convention. */
780 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
782 if (TREE_CODE (type1) != FUNCTION_TYPE)
785 /* Check for mismatched return types (cdecl vs stdcall). */
786 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
787 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
792 /* Value is the number of bytes of arguments automatically
793 popped when returning from a subroutine call.
794 FUNDECL is the declaration node of the function (as a tree),
795 FUNTYPE is the data type of the function (as a tree),
796 or for a library call it is an identifier node for the subroutine name.
797 SIZE is the number of bytes of arguments passed on the stack.
799 On the 80386, the RTD insn may be used to pop them if the number
800 of args is fixed, but if the number is variable then the caller
801 must pop them all. RTD can't be used for library calls now
802 because the library is compiled with the Unix compiler.
803 Use of RTD is a selectable option, since it is incompatible with
804 standard Unix calling sequences. If the option is not selected,
805 the caller must always pop the args.
807 The attribute stdcall is equivalent to RTD on a per module basis. */
810 ix86_return_pops_args (fundecl, funtype, size)
815 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
817 /* Cdecl functions override -mrtd, and never pop the stack. */
818 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
820 /* Stdcall functions will pop the stack if not variable args. */
821 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
825 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
826 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
831 /* Lose any fake structure return argument. */
832 if (aggregate_value_p (TREE_TYPE (funtype)))
833 return GET_MODE_SIZE (Pmode);
838 /* Argument support functions. */
840 /* Initialize a variable CUM of type CUMULATIVE_ARGS
841 for a call to a function whose data type is FNTYPE.
842 For a library call, FNTYPE is 0. */
845 init_cumulative_args (cum, fntype, libname)
846 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
847 tree fntype; /* tree ptr for function decl */
848 rtx libname; /* SYMBOL_REF of library name or 0 */
850 static CUMULATIVE_ARGS zero_cum;
851 tree param, next_param;
853 if (TARGET_DEBUG_ARG)
855 fprintf (stderr, "\ninit_cumulative_args (");
857 fprintf (stderr, "fntype code = %s, ret code = %s",
858 tree_code_name[(int) TREE_CODE (fntype)],
859 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
861 fprintf (stderr, "no fntype");
864 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
869 /* Set up the number of registers to use for passing arguments. */
870 cum->nregs = ix86_regparm;
873 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
876 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
879 /* Determine if this function has variable arguments. This is
880 indicated by the last argument being 'void_type_mode' if there
881 are no variable arguments. If there are variable arguments, then
882 we won't pass anything in registers */
886 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
887 param != 0; param = next_param)
889 next_param = TREE_CHAIN (param);
890 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
895 if (TARGET_DEBUG_ARG)
896 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
901 /* Update the data in CUM to advance over an argument
902 of mode MODE and data type TYPE.
903 (TYPE is null for libcalls where that information may not be available.) */
906 function_arg_advance (cum, mode, type, named)
907 CUMULATIVE_ARGS *cum; /* current arg information */
908 enum machine_mode mode; /* current arg mode */
909 tree type; /* type of the argument or 0 if lib support */
910 int named; /* whether or not the argument was named */
913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
914 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
916 if (TARGET_DEBUG_ARG)
918 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
919 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
934 /* Define where to put the arguments to a function.
935 Value is zero to push the argument on the stack,
936 or a hard register in which to store the argument.
938 MODE is the argument's machine mode.
939 TYPE is the data type of the argument (as a tree).
940 This is null for libcalls where that information may
942 CUM is a variable of type CUMULATIVE_ARGS which gives info about
943 the preceding args and about the function being called.
944 NAMED is nonzero if this argument is a named parameter
945 (otherwise it is an extra parameter matching an ellipsis). */
948 function_arg (cum, mode, type, named)
949 CUMULATIVE_ARGS *cum; /* current arg information */
950 enum machine_mode mode; /* current arg mode */
951 tree type; /* type of the argument or 0 if lib support */
952 int named; /* != 0 for normal args, == 0 for ... args */
956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
957 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
961 /* For now, pass fp/complex values on the stack. */
970 if (words <= cum->nregs)
971 ret = gen_rtx_REG (mode, cum->regno);
975 if (TARGET_DEBUG_ARG)
978 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
979 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
982 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
984 fprintf (stderr, ", stack");
986 fprintf (stderr, " )\n");
993 /* Return nonzero if OP is (const_int 1), else return zero. */
996 const_int_1_operand (op, mode)
998 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1004 reference and a constant. */
1007 symbolic_operand (op, mode)
1009 enum machine_mode mode ATTRIBUTE_UNUSED;
1011 switch (GET_CODE (op))
1019 if (GET_CODE (op) == SYMBOL_REF
1020 || GET_CODE (op) == LABEL_REF
1021 || (GET_CODE (op) == UNSPEC
1022 && XINT (op, 1) >= 6
1023 && XINT (op, 1) <= 7))
1025 if (GET_CODE (op) != PLUS
1026 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1030 if (GET_CODE (op) == SYMBOL_REF
1031 || GET_CODE (op) == LABEL_REF)
1033 /* Only @GOTOFF gets offsets. */
1034 if (GET_CODE (op) != UNSPEC
1035 || XINT (op, 1) != 7)
1038 op = XVECEXP (op, 0, 0);
1039 if (GET_CODE (op) == SYMBOL_REF
1040 || GET_CODE (op) == LABEL_REF)
1049 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1052 pic_symbolic_operand (op, mode)
1054 enum machine_mode mode ATTRIBUTE_UNUSED;
1056 if (GET_CODE (op) == CONST)
1059 if (GET_CODE (op) == UNSPEC)
1061 if (GET_CODE (op) != PLUS
1062 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1065 if (GET_CODE (op) == UNSPEC)
1071 /* Test for a valid operand for a call instruction. Don't allow the
1072 arg pointer register or virtual regs since they may decay into
1073 reg + const, which the patterns can't handle. */
1076 call_insn_operand (op, mode)
1078 enum machine_mode mode ATTRIBUTE_UNUSED;
1080 /* Disallow indirect through a virtual register. This leads to
1081 compiler aborts when trying to eliminate them. */
1082 if (GET_CODE (op) == REG
1083 && (op == arg_pointer_rtx
1084 || op == frame_pointer_rtx
1085 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1086 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1089 /* Disallow `call 1234'. Due to varying assembler lameness this
1090 gets either rejected or translated to `call .+1234'. */
1091 if (GET_CODE (op) == CONST_INT)
1094 /* Explicitly allow SYMBOL_REF even if pic. */
1095 if (GET_CODE (op) == SYMBOL_REF)
1098 /* Half-pic doesn't allow anything but registers and constants.
1099 We've just taken care of the later. */
1101 return register_operand (op, Pmode);
1103 /* Otherwise we can allow any general_operand in the address. */
1104 return general_operand (op, Pmode);
1108 constant_call_address_operand (op, mode)
1110 enum machine_mode mode ATTRIBUTE_UNUSED;
1112 if (GET_CODE (op) == CONST
1113 && GET_CODE (XEXP (op, 0)) == PLUS
1114 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1115 op = XEXP (XEXP (op, 0), 0);
1116 return GET_CODE (op) == SYMBOL_REF;
1119 /* Match exactly zero and one. */
1122 const0_operand (op, mode)
1124 enum machine_mode mode;
1126 return op == CONST0_RTX (mode);
1130 const1_operand (op, mode)
1132 enum machine_mode mode ATTRIBUTE_UNUSED;
1134 return op == const1_rtx;
1137 /* Match 2, 4, or 8. Used for leal multiplicands. */
1140 const248_operand (op, mode)
1142 enum machine_mode mode ATTRIBUTE_UNUSED;
1144 return (GET_CODE (op) == CONST_INT
1145 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1148 /* True if this is a constant appropriate for an increment or decremenmt. */
1151 incdec_operand (op, mode)
1153 enum machine_mode mode;
1155 if (op == const1_rtx || op == constm1_rtx)
1157 if (GET_CODE (op) != CONST_INT)
1159 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1161 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1163 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1168 /* Return false if this is the stack pointer, or any other fake
1169 register eliminable to the stack pointer. Otherwise, this is
1172 This is used to prevent esp from being used as an index reg.
1173 Which would only happen in pathological cases. */
1176 reg_no_sp_operand (op, mode)
1178 enum machine_mode mode;
1181 if (GET_CODE (t) == SUBREG)
1183 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1186 return register_operand (op, mode);
1190 mmx_reg_operand (op, mode)
1192 enum machine_mode mode ATTRIBUTE_UNUSED;
1194 return MMX_REG_P (op);
1197 /* Return false if this is any eliminable register. Otherwise
1201 general_no_elim_operand (op, mode)
1203 enum machine_mode mode;
1206 if (GET_CODE (t) == SUBREG)
1208 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1209 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1210 || t == virtual_stack_dynamic_rtx)
1213 return general_operand (op, mode);
1216 /* Return false if this is any eliminable register. Otherwise
1217 register_operand or const_int. */
1220 nonmemory_no_elim_operand (op, mode)
1222 enum machine_mode mode;
1225 if (GET_CODE (t) == SUBREG)
1227 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1228 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1229 || t == virtual_stack_dynamic_rtx)
1232 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1235 /* Return true if op is a Q_REGS class register. */
1238 q_regs_operand (op, mode)
1240 enum machine_mode mode;
1242 if (mode != VOIDmode && GET_MODE (op) != mode)
1244 if (GET_CODE (op) == SUBREG)
1245 op = SUBREG_REG (op);
1246 return QI_REG_P (op);
1249 /* Return true if op is a NON_Q_REGS class register. */
1252 non_q_regs_operand (op, mode)
1254 enum machine_mode mode;
1256 if (mode != VOIDmode && GET_MODE (op) != mode)
1258 if (GET_CODE (op) == SUBREG)
1259 op = SUBREG_REG (op);
1260 return NON_QI_REG_P (op);
1263 /* Return 1 if OP is a comparison operator that can use the condition code
1264 generated by a logical operation, which characteristicly does not set
1265 overflow or carry. To be used with CCNOmode. */
1268 no_comparison_operator (op, mode)
1270 enum machine_mode mode;
1272 if (mode != VOIDmode && GET_MODE (op) != mode)
1275 switch (GET_CODE (op))
1279 case LEU: case LTU: case GEU: case GTU:
1287 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1290 sse_comparison_operator (op, mode)
1292 enum machine_mode mode ATTRIBUTE_UNUSED;
1294 enum rtx_code code = GET_CODE (op);
1295 return code == EQ || code == LT || code == LE || code == UNORDERED;
1297 /* Return 1 if OP is a valid comparison operator in valid mode. */
1299 ix86_comparison_operator (op, mode)
1301 enum machine_mode mode;
1303 enum machine_mode inmode;
1304 if (mode != VOIDmode && GET_MODE (op) != mode)
1306 switch (GET_CODE (op))
1311 inmode = GET_MODE (XEXP (op, 0));
1312 if (inmode == CCmode || inmode == CCGCmode
1313 || inmode == CCGOCmode || inmode == CCNOmode)
1316 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1317 inmode = GET_MODE (XEXP (op, 0));
1318 if (inmode == CCmode)
1322 inmode = GET_MODE (XEXP (op, 0));
1323 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1331 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1334 fcmov_comparison_operator (op, mode)
1336 enum machine_mode mode;
1338 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
1339 if (mode != VOIDmode && GET_MODE (op) != mode)
1341 switch (GET_CODE (op))
1345 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1346 if (inmode == CCFPmode || inmode == CCFPUmode)
1354 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1357 promotable_binary_operator (op, mode)
1359 enum machine_mode mode ATTRIBUTE_UNUSED;
1361 switch (GET_CODE (op))
1364 /* Modern CPUs have same latency for HImode and SImode multiply,
1365 but 386 and 486 do HImode multiply faster. */
1366 return ix86_cpu > PROCESSOR_I486;
1378 /* Nearly general operand, but accept any const_double, since we wish
1379 to be able to drop them into memory rather than have them get pulled
1383 cmp_fp_expander_operand (op, mode)
1385 enum machine_mode mode;
1387 if (mode != VOIDmode && mode != GET_MODE (op))
1389 if (GET_CODE (op) == CONST_DOUBLE)
1391 return general_operand (op, mode);
1394 /* Match an SI or HImode register for a zero_extract. */
1397 ext_register_operand (op, mode)
1399 enum machine_mode mode ATTRIBUTE_UNUSED;
1401 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1403 return register_operand (op, VOIDmode);
1406 /* Return 1 if this is a valid binary floating-point operation.
1407 OP is the expression matched, and MODE is its mode. */
1410 binary_fp_operator (op, mode)
1412 enum machine_mode mode;
1414 if (mode != VOIDmode && mode != GET_MODE (op))
1417 switch (GET_CODE (op))
1423 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1431 mult_operator(op, mode)
1433 enum machine_mode mode ATTRIBUTE_UNUSED;
1435 return GET_CODE (op) == MULT;
1439 div_operator(op, mode)
1441 enum machine_mode mode ATTRIBUTE_UNUSED;
1443 return GET_CODE (op) == DIV;
1447 arith_or_logical_operator (op, mode)
1449 enum machine_mode mode;
1451 return ((mode == VOIDmode || GET_MODE (op) == mode)
1452 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1453 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1456 /* Returns 1 if OP is memory operand with a displacement. */
1459 memory_displacement_operand (op, mode)
1461 enum machine_mode mode;
1463 struct ix86_address parts;
1465 if (! memory_operand (op, mode))
1468 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1471 return parts.disp != NULL_RTX;
1474 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1475 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1477 ??? It seems likely that this will only work because cmpsi is an
1478 expander, and no actual insns use this. */
1481 cmpsi_operand (op, mode)
1483 enum machine_mode mode;
1485 if (general_operand (op, mode))
1488 if (GET_CODE (op) == AND
1489 && GET_MODE (op) == SImode
1490 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1491 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1492 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1493 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1494 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1495 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1501 /* Returns 1 if OP is memory operand that can not be represented by the
1505 long_memory_operand (op, mode)
1507 enum machine_mode mode;
1509 if (! memory_operand (op, mode))
1512 return memory_address_length (op) != 0;
1515 /* Return nonzero if the rtx is known aligned. */
1518 aligned_operand (op, mode)
1520 enum machine_mode mode;
1522 struct ix86_address parts;
1524 if (!general_operand (op, mode))
1527 /* Registers and immediate operands are always "aligned". */
1528 if (GET_CODE (op) != MEM)
1531 /* Don't even try to do any aligned optimizations with volatiles. */
1532 if (MEM_VOLATILE_P (op))
1537 /* Pushes and pops are only valid on the stack pointer. */
1538 if (GET_CODE (op) == PRE_DEC
1539 || GET_CODE (op) == POST_INC)
1542 /* Decode the address. */
1543 if (! ix86_decompose_address (op, &parts))
1546 /* Look for some component that isn't known to be aligned. */
1550 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1555 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1560 if (GET_CODE (parts.disp) != CONST_INT
1561 || (INTVAL (parts.disp) & 3) != 0)
1565 /* Didn't find one -- this must be an aligned address. */
1569 /* Return true if the constant is something that can be loaded with
1570 a special instruction. Only handle 0.0 and 1.0; others are less
1574 standard_80387_constant_p (x)
1577 if (GET_CODE (x) != CONST_DOUBLE)
1580 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1586 if (setjmp (handler))
1589 set_float_handler (handler);
1590 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1591 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1592 is1 = REAL_VALUES_EQUAL (d, dconst1);
1593 set_float_handler (NULL_PTR);
1601 /* Note that on the 80387, other constants, such as pi,
1602 are much slower to load as standard constants
1603 than to load from doubles in memory! */
1604 /* ??? Not true on K6: all constants are equal cost. */
1611 /* Returns 1 if OP contains a symbol reference */
1614 symbolic_reference_mentioned_p (op)
1617 register const char *fmt;
1620 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1623 fmt = GET_RTX_FORMAT (GET_CODE (op));
1624 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1630 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1631 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1635 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1642 /* Return 1 if it is appropriate to emit `ret' instructions in the
1643 body of a function. Do this only if the epilogue is simple, needing a
1644 couple of insns. Prior to reloading, we can't tell how many registers
1645 must be saved, so return 0 then. Return 0 if there is no frame
1646 marker to de-allocate.
1648 If NON_SAVING_SETJMP is defined and true, then it is not possible
1649 for the epilogue to be simple, so return 0. This is a special case
1650 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1651 until final, but jump_optimize may need to know sooner if a
1655 ix86_can_use_return_insn_p ()
1657 HOST_WIDE_INT tsize;
1660 #ifdef NON_SAVING_SETJMP
1661 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1664 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1665 if (profile_block_flag == 2)
1669 if (! reload_completed || frame_pointer_needed)
1672 /* Don't allow more than 32 pop, since that's all we can do
1673 with one instruction. */
1674 if (current_function_pops_args
1675 && current_function_args_size >= 32768)
1678 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1679 return tsize == 0 && nregs == 0;
1682 static const char *pic_label_name;
1683 static int pic_label_output;
1685 /* This function generates code for -fpic that loads %ebx with
1686 the return address of the caller and then returns. */
1689 asm_output_function_prefix (file, name)
1691 const char *name ATTRIBUTE_UNUSED;
1694 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1695 || current_function_uses_const_pool);
1696 xops[0] = pic_offset_table_rtx;
1697 xops[1] = stack_pointer_rtx;
1699 /* Deep branch prediction favors having a return for every call. */
1700 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1702 if (!pic_label_output)
1704 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1705 internal (non-global) label that's being emitted, it didn't make
1706 sense to have .type information for local labels. This caused
1707 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1708 me debug info for a label that you're declaring non-global?) this
1709 was changed to call ASM_OUTPUT_LABEL() instead. */
1711 ASM_OUTPUT_LABEL (file, pic_label_name);
1713 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1714 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1715 output_asm_insn ("ret", xops);
1717 pic_label_output = 1;
1723 load_pic_register ()
1727 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1729 if (TARGET_DEEP_BRANCH_PREDICTION)
1731 if (pic_label_name == NULL)
1734 ASM_GENERATE_INTERNAL_LABEL (buf, "LPR", 0);
1735 pic_label_name = ggc_strdup (buf);
1737 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1741 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1744 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1746 if (! TARGET_DEEP_BRANCH_PREDICTION)
1747 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1749 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1752 /* Generate an SImode "push" pattern for input ARG. */
1758 return gen_rtx_SET (VOIDmode,
1759 gen_rtx_MEM (SImode,
1760 gen_rtx_PRE_DEC (SImode,
1761 stack_pointer_rtx)),
1765 /* Return number of registers to be saved on the stack. */
1771 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1772 || current_function_uses_const_pool);
1773 int limit = (frame_pointer_needed
1774 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1777 for (regno = limit - 1; regno >= 0; regno--)
1778 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1779 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1786 /* Return the offset between two registers, one to be eliminated, and the other
1787 its replacement, at the start of a routine. */
1790 ix86_initial_elimination_offset (from, to)
1797 /* Stack grows downward:
1803 saved frame pointer if frame_pointer_needed
1804 <- HARD_FRAME_POINTER
1814 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1815 /* Skip saved PC and previous frame pointer.
1816 Executed only when frame_pointer_needed. */
1818 else if (from == FRAME_POINTER_REGNUM
1819 && to == HARD_FRAME_POINTER_REGNUM)
1821 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1822 padding1 += nregs * UNITS_PER_WORD;
1827 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1828 int frame_size = frame_pointer_needed ? 8 : 4;
1829 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1830 &nregs, &padding1, (int *) 0);
1832 if (to != STACK_POINTER_REGNUM)
1834 else if (from == ARG_POINTER_REGNUM)
1835 return tsize + nregs * UNITS_PER_WORD + frame_size;
1836 else if (from != FRAME_POINTER_REGNUM)
1839 return tsize - padding1;
1843 /* Compute the size of local storage taking into consideration the
1844 desired stack alignment which is to be maintained. Also determine
1845 the number of registers saved below the local storage.
1847 PADDING1 returns padding before stack frame and PADDING2 returns
1848 padding after stack frame;
1851 static HOST_WIDE_INT
1852 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1854 int *nregs_on_stack;
1861 HOST_WIDE_INT total_size;
1862 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1864 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1866 nregs = ix86_nsaved_regs ();
1869 offset = frame_pointer_needed ? 8 : 4;
1871 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1872 since i386 port is the only using those features that may break easilly. */
1874 if (size && !stack_alignment_needed)
1876 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1878 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1880 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1882 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1885 if (stack_alignment_needed < 4)
1886 stack_alignment_needed = 4;
1888 offset += nregs * UNITS_PER_WORD;
1890 if (ACCUMULATE_OUTGOING_ARGS)
1891 total_size += current_function_outgoing_args_size;
1893 total_size += offset;
1895 /* Align start of frame for local function. */
1896 padding1 = ((offset + stack_alignment_needed - 1)
1897 & -stack_alignment_needed) - offset;
1898 total_size += padding1;
1900 /* Align stack boundary. */
1901 padding2 = ((total_size + preferred_alignment - 1)
1902 & -preferred_alignment) - total_size;
1904 if (ACCUMULATE_OUTGOING_ARGS)
1905 padding2 += current_function_outgoing_args_size;
1908 *nregs_on_stack = nregs;
1910 *rpadding1 = padding1;
1912 *rpadding2 = padding2;
1914 return size + padding1 + padding2;
1917 /* Emit code to save registers in the prologue. */
1920 ix86_emit_save_regs ()
1925 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1926 || current_function_uses_const_pool);
1927 limit = (frame_pointer_needed
1928 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1930 for (regno = limit - 1; regno >= 0; regno--)
1931 if ((regs_ever_live[regno] && !call_used_regs[regno])
1932 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1934 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1935 RTX_FRAME_RELATED_P (insn) = 1;
1939 /* Expand the prologue into a bunch of separate insns. */
1942 ix86_expand_prologue ()
1944 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1947 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1948 || current_function_uses_const_pool);
1950 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1951 slower on all targets. Also sdb doesn't like it. */
1953 if (frame_pointer_needed)
1955 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1956 RTX_FRAME_RELATED_P (insn) = 1;
1958 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1959 RTX_FRAME_RELATED_P (insn) = 1;
1962 ix86_emit_save_regs ();
1966 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1968 if (frame_pointer_needed)
1969 insn = emit_insn (gen_pro_epilogue_adjust_stack
1970 (stack_pointer_rtx, stack_pointer_rtx,
1971 GEN_INT (-tsize), hard_frame_pointer_rtx));
1973 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1975 RTX_FRAME_RELATED_P (insn) = 1;
1979 /* ??? Is this only valid for Win32? */
1983 arg0 = gen_rtx_REG (SImode, 0);
1984 emit_move_insn (arg0, GEN_INT (tsize));
1986 sym = gen_rtx_MEM (FUNCTION_MODE,
1987 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1988 insn = emit_call_insn (gen_call (sym, const0_rtx));
1990 CALL_INSN_FUNCTION_USAGE (insn)
1991 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1992 CALL_INSN_FUNCTION_USAGE (insn));
1995 #ifdef SUBTARGET_PROLOGUE
2000 load_pic_register ();
2002 /* If we are profiling, make sure no instructions are scheduled before
2003 the call to mcount. However, if -fpic, the above call will have
2005 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2006 emit_insn (gen_blockage ());
2009 /* Emit code to add TSIZE to esp value. Use POP instruction when
2013 ix86_emit_epilogue_esp_adjustment (tsize)
2016 /* If a frame pointer is present, we must be sure to tie the sp
2017 to the fp so that we don't mis-schedule. */
2018 if (frame_pointer_needed)
2019 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2022 hard_frame_pointer_rtx));
2024 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2028 /* Emit code to restore saved registers using MOV insns. First register
2029 is restored from POINTER + OFFSET. */
2031 ix86_emit_restore_regs_using_mov (pointer, offset)
2036 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2037 || current_function_uses_const_pool);
2038 int limit = (frame_pointer_needed
2039 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2041 for (regno = 0; regno < limit; regno++)
2042 if ((regs_ever_live[regno] && !call_used_regs[regno])
2043 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2045 emit_move_insn (gen_rtx_REG (SImode, regno),
2046 adj_offsettable_operand (gen_rtx_MEM (SImode,
2053 /* Restore function stack, frame, and registers. */
2056 ix86_expand_epilogue (emit_return)
2062 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2063 || current_function_uses_const_pool);
2064 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2065 HOST_WIDE_INT offset;
2066 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2067 (int *) 0, (int *) 0);
2069 /* Calculate start of saved registers relative to ebp. */
2070 offset = -nregs * UNITS_PER_WORD;
2072 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2073 if (profile_block_flag == 2)
2075 FUNCTION_BLOCK_PROFILER_EXIT;
2079 /* If we're only restoring one register and sp is not valid then
2080 using a move instruction to restore the register since it's
2081 less work than reloading sp and popping the register.
2083 The default code result in stack adjustment using add/lea instruction,
2084 while this code results in LEAVE instruction (or discrete equivalent),
2085 so it is profitable in some other cases as well. Especially when there
2086 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2087 and there is exactly one register to pop. This heruistic may need some
2088 tuning in future. */
2089 if ((!sp_valid && nregs <= 1)
2090 || (frame_pointer_needed && !nregs && tsize)
2091 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2094 /* Restore registers. We can use ebp or esp to address the memory
2095 locations. If both are available, default to ebp, since offsets
2096 are known to be small. Only exception is esp pointing directly to the
2097 end of block of saved registers, where we may simplify addressing
2100 if (!frame_pointer_needed || (sp_valid && !tsize))
2101 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2103 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2105 if (!frame_pointer_needed)
2106 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2107 /* If not an i386, mov & pop is faster than "leave". */
2108 else if (TARGET_USE_LEAVE || optimize_size)
2109 emit_insn (gen_leave ());
2112 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2113 hard_frame_pointer_rtx,
2115 hard_frame_pointer_rtx));
2116 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2121 /* First step is to deallocate the stack frame so that we can
2122 pop the registers. */
2125 if (!frame_pointer_needed)
2127 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2128 hard_frame_pointer_rtx,
2130 hard_frame_pointer_rtx));
2133 ix86_emit_epilogue_esp_adjustment (tsize);
2135 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2136 if ((regs_ever_live[regno] && !call_used_regs[regno])
2137 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2138 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2141 /* Sibcall epilogues don't want a return instruction. */
2145 if (current_function_pops_args && current_function_args_size)
2147 rtx popc = GEN_INT (current_function_pops_args);
2149 /* i386 can only pop 64K bytes. If asked to pop more, pop
2150 return address, do explicit add, and jump indirectly to the
2153 if (current_function_pops_args >= 65536)
2155 rtx ecx = gen_rtx_REG (SImode, 2);
2157 emit_insn (gen_popsi1 (ecx));
2158 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2159 emit_jump_insn (gen_return_indirect_internal (ecx));
2162 emit_jump_insn (gen_return_pop_internal (popc));
2165 emit_jump_insn (gen_return_internal ());
2168 /* Extract the parts of an RTL expression that is a valid memory address
2169 for an instruction. Return false if the structure of the address is
2173 ix86_decompose_address (addr, out)
2175 struct ix86_address *out;
2177 rtx base = NULL_RTX;
2178 rtx index = NULL_RTX;
2179 rtx disp = NULL_RTX;
2180 HOST_WIDE_INT scale = 1;
2181 rtx scale_rtx = NULL_RTX;
2183 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2185 else if (GET_CODE (addr) == PLUS)
2187 rtx op0 = XEXP (addr, 0);
2188 rtx op1 = XEXP (addr, 1);
2189 enum rtx_code code0 = GET_CODE (op0);
2190 enum rtx_code code1 = GET_CODE (op1);
2192 if (code0 == REG || code0 == SUBREG)
2194 if (code1 == REG || code1 == SUBREG)
2195 index = op0, base = op1; /* index + base */
2197 base = op0, disp = op1; /* base + displacement */
2199 else if (code0 == MULT)
2201 index = XEXP (op0, 0);
2202 scale_rtx = XEXP (op0, 1);
2203 if (code1 == REG || code1 == SUBREG)
2204 base = op1; /* index*scale + base */
2206 disp = op1; /* index*scale + disp */
2208 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2210 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2211 scale_rtx = XEXP (XEXP (op0, 0), 1);
2212 base = XEXP (op0, 1);
2215 else if (code0 == PLUS)
2217 index = XEXP (op0, 0); /* index + base + disp */
2218 base = XEXP (op0, 1);
2224 else if (GET_CODE (addr) == MULT)
2226 index = XEXP (addr, 0); /* index*scale */
2227 scale_rtx = XEXP (addr, 1);
2229 else if (GET_CODE (addr) == ASHIFT)
2233 /* We're called for lea too, which implements ashift on occasion. */
2234 index = XEXP (addr, 0);
2235 tmp = XEXP (addr, 1);
2236 if (GET_CODE (tmp) != CONST_INT)
2238 scale = INTVAL (tmp);
2239 if ((unsigned HOST_WIDE_INT) scale > 3)
2244 disp = addr; /* displacement */
2246 /* Extract the integral value of scale. */
2249 if (GET_CODE (scale_rtx) != CONST_INT)
2251 scale = INTVAL (scale_rtx);
2254 /* Allow arg pointer and stack pointer as index if there is not scaling */
2255 if (base && index && scale == 1
2256 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2257 || index == stack_pointer_rtx))
2264 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2265 if ((base == hard_frame_pointer_rtx
2266 || base == frame_pointer_rtx
2267 || base == arg_pointer_rtx) && !disp)
2270 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2271 Avoid this by transforming to [%esi+0]. */
2272 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2273 && base && !index && !disp
2275 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2278 /* Special case: encode reg+reg instead of reg*2. */
2279 if (!base && index && scale && scale == 2)
2280 base = index, scale = 1;
2282 /* Special case: scaling cannot be encoded without base or displacement. */
2283 if (!base && !disp && index && scale != 1)
2294 /* Return cost of the memory address x.
2295 For i386, it is better to use a complex address than let gcc copy
2296 the address into a reg and make a new pseudo. But not if the address
2297 requires to two regs - that would mean more pseudos with longer
2300 ix86_address_cost (x)
2303 struct ix86_address parts;
2306 if (!ix86_decompose_address (x, &parts))
2309 /* More complex memory references are better. */
2310 if (parts.disp && parts.disp != const0_rtx)
2313 /* Attempt to minimize number of registers in the address. */
2315 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2317 && (!REG_P (parts.index)
2318 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2322 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2324 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2325 && parts.base != parts.index)
2328 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2329 since it's predecode logic can't detect the length of instructions
2330 and it degenerates to vector decoded. Increase cost of such
2331 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2332 to split such addresses or even refuse such addresses at all.
2334 Following addressing modes are affected:
2339 The first and last case may be avoidable by explicitly coding the zero in
2340 memory address, but I don't have AMD-K6 machine handy to check this
2344 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2345 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2346 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2352 /* If X is a machine specific address (i.e. a symbol or label being
2353 referenced as a displacement from the GOT implemented using an
2354 UNSPEC), then return the base term. Otherwise return X. */
2357 ix86_find_base_term (x)
2362 if (GET_CODE (x) != PLUS
2363 || XEXP (x, 0) != pic_offset_table_rtx
2364 || GET_CODE (XEXP (x, 1)) != CONST)
2367 term = XEXP (XEXP (x, 1), 0);
2369 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2370 term = XEXP (term, 0);
2372 if (GET_CODE (term) != UNSPEC
2373 || XVECLEN (term, 0) != 1
2374 || XINT (term, 1) != 7)
2377 term = XVECEXP (term, 0, 0);
2379 if (GET_CODE (term) != SYMBOL_REF
2380 && GET_CODE (term) != LABEL_REF)
2386 /* Determine if a given CONST RTX is a valid memory displacement
2390 legitimate_pic_address_disp_p (disp)
2393 if (GET_CODE (disp) != CONST)
2395 disp = XEXP (disp, 0);
2397 if (GET_CODE (disp) == PLUS)
2399 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2401 disp = XEXP (disp, 0);
2404 if (GET_CODE (disp) != UNSPEC
2405 || XVECLEN (disp, 0) != 1)
2408 /* Must be @GOT or @GOTOFF. */
2409 if (XINT (disp, 1) != 6
2410 && XINT (disp, 1) != 7)
2413 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2414 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2420 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2421 memory address for an instruction. The MODE argument is the machine mode
2422 for the MEM expression that wants to use this address.
2424 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2425 convert common non-canonical forms to canonical form so that they will
2429 legitimate_address_p (mode, addr, strict)
2430 enum machine_mode mode;
2434 struct ix86_address parts;
2435 rtx base, index, disp;
2436 HOST_WIDE_INT scale;
2437 const char *reason = NULL;
2438 rtx reason_rtx = NULL_RTX;
2440 if (TARGET_DEBUG_ADDR)
2443 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2444 GET_MODE_NAME (mode), strict);
2448 if (! ix86_decompose_address (addr, &parts))
2450 reason = "decomposition failed";
2455 index = parts.index;
2457 scale = parts.scale;
2459 /* Validate base register.
2461 Don't allow SUBREG's here, it can lead to spill failures when the base
2462 is one word out of a two word structure, which is represented internally
2469 if (GET_CODE (base) != REG)
2471 reason = "base is not a register";
2475 if (GET_MODE (base) != Pmode)
2477 reason = "base is not in Pmode";
2481 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2482 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2484 reason = "base is not valid";
2489 /* Validate index register.
2491 Don't allow SUBREG's here, it can lead to spill failures when the index
2492 is one word out of a two word structure, which is represented internally
2499 if (GET_CODE (index) != REG)
2501 reason = "index is not a register";
2505 if (GET_MODE (index) != Pmode)
2507 reason = "index is not in Pmode";
2511 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2512 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2514 reason = "index is not valid";
2519 /* Validate scale factor. */
2522 reason_rtx = GEN_INT (scale);
2525 reason = "scale without index";
2529 if (scale != 2 && scale != 4 && scale != 8)
2531 reason = "scale is not a valid multiplier";
2536 /* Validate displacement. */
2541 if (!CONSTANT_ADDRESS_P (disp))
2543 reason = "displacement is not constant";
2547 if (GET_CODE (disp) == CONST_DOUBLE)
2549 reason = "displacement is a const_double";
2553 if (flag_pic && SYMBOLIC_CONST (disp))
2555 if (! legitimate_pic_address_disp_p (disp))
2557 reason = "displacement is an invalid pic construct";
2561 /* This code used to verify that a symbolic pic displacement
2562 includes the pic_offset_table_rtx register.
2564 While this is good idea, unfortunately these constructs may
2565 be created by "adds using lea" optimization for incorrect
2574 This code is nonsensical, but results in addressing
2575 GOT table with pic_offset_table_rtx base. We can't
2576 just refuse it easilly, since it gets matched by
2577 "addsi3" pattern, that later gets split to lea in the
2578 case output register differs from input. While this
2579 can be handled by separate addsi pattern for this case
2580 that never results in lea, this seems to be easier and
2581 correct fix for crash to disable this test. */
2583 else if (HALF_PIC_P ())
2585 if (! HALF_PIC_ADDRESS_P (disp)
2586 || (base != NULL_RTX || index != NULL_RTX))
2588 reason = "displacement is an invalid half-pic reference";
2594 /* Everything looks valid. */
2595 if (TARGET_DEBUG_ADDR)
2596 fprintf (stderr, "Success.\n");
2600 if (TARGET_DEBUG_ADDR)
2602 fprintf (stderr, "Error: %s\n", reason);
2603 debug_rtx (reason_rtx);
2608 /* Return an unique alias set for the GOT. */
2610 static HOST_WIDE_INT
2611 ix86_GOT_alias_set ()
2613 static HOST_WIDE_INT set = -1;
2615 set = new_alias_set ();
2619 /* Return a legitimate reference for ORIG (an address) using the
2620 register REG. If REG is 0, a new pseudo is generated.
2622 There are two types of references that must be handled:
2624 1. Global data references must load the address from the GOT, via
2625 the PIC reg. An insn is emitted to do this load, and the reg is
2628 2. Static data references, constant pool addresses, and code labels
2629 compute the address as an offset from the GOT, whose base is in
2630 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2631 differentiate them from global data objects. The returned
2632 address is the PIC reg + an unspec constant.
2634 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2635 reg also appears in the address. */
2638 legitimize_pic_address (orig, reg)
2646 if (GET_CODE (addr) == LABEL_REF
2647 || (GET_CODE (addr) == SYMBOL_REF
2648 && (CONSTANT_POOL_ADDRESS_P (addr)
2649 || SYMBOL_REF_FLAG (addr))))
2651 /* This symbol may be referenced via a displacement from the PIC
2652 base address (@GOTOFF). */
2654 current_function_uses_pic_offset_table = 1;
2655 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2656 new = gen_rtx_CONST (Pmode, new);
2657 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2661 emit_move_insn (reg, new);
2665 else if (GET_CODE (addr) == SYMBOL_REF)
2667 /* This symbol must be referenced via a load from the
2668 Global Offset Table (@GOT). */
2670 current_function_uses_pic_offset_table = 1;
2671 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2672 new = gen_rtx_CONST (Pmode, new);
2673 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2674 new = gen_rtx_MEM (Pmode, new);
2675 RTX_UNCHANGING_P (new) = 1;
2676 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2679 reg = gen_reg_rtx (Pmode);
2680 emit_move_insn (reg, new);
2685 if (GET_CODE (addr) == CONST)
2687 addr = XEXP (addr, 0);
2688 if (GET_CODE (addr) == UNSPEC)
2690 /* Check that the unspec is one of the ones we generate? */
2692 else if (GET_CODE (addr) != PLUS)
2695 if (GET_CODE (addr) == PLUS)
2697 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2699 /* Check first to see if this is a constant offset from a @GOTOFF
2700 symbol reference. */
2701 if ((GET_CODE (op0) == LABEL_REF
2702 || (GET_CODE (op0) == SYMBOL_REF
2703 && (CONSTANT_POOL_ADDRESS_P (op0)
2704 || SYMBOL_REF_FLAG (op0))))
2705 && GET_CODE (op1) == CONST_INT)
2707 current_function_uses_pic_offset_table = 1;
2708 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2709 new = gen_rtx_PLUS (Pmode, new, op1);
2710 new = gen_rtx_CONST (Pmode, new);
2711 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2715 emit_move_insn (reg, new);
2721 base = legitimize_pic_address (XEXP (addr, 0), reg);
2722 new = legitimize_pic_address (XEXP (addr, 1),
2723 base == reg ? NULL_RTX : reg);
2725 if (GET_CODE (new) == CONST_INT)
2726 new = plus_constant (base, INTVAL (new));
2729 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2731 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2732 new = XEXP (new, 1);
2734 new = gen_rtx_PLUS (Pmode, base, new);
2742 /* Try machine-dependent ways of modifying an illegitimate address
2743 to be legitimate. If we find one, return the new, valid address.
2744 This macro is used in only one place: `memory_address' in explow.c.
2746 OLDX is the address as it was before break_out_memory_refs was called.
2747 In some cases it is useful to look at this to decide what needs to be done.
2749 MODE and WIN are passed so that this macro can use
2750 GO_IF_LEGITIMATE_ADDRESS.
2752 It is always safe for this macro to do nothing. It exists to recognize
2753 opportunities to optimize the output.
2755 For the 80386, we handle X+REG by loading X into a register R and
2756 using R+REG. R will go in a general reg and indexing will be used.
2757 However, if REG is a broken-out memory address or multiplication,
2758 nothing needs to be done because REG can certainly go in a general reg.
2760 When -fpic is used, special handling is needed for symbolic references.
2761 See comments by legitimize_pic_address in i386.c for details. */
2764 legitimize_address (x, oldx, mode)
2766 register rtx oldx ATTRIBUTE_UNUSED;
2767 enum machine_mode mode;
2772 if (TARGET_DEBUG_ADDR)
2774 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2775 GET_MODE_NAME (mode));
2779 if (flag_pic && SYMBOLIC_CONST (x))
2780 return legitimize_pic_address (x, 0);
2782 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2783 if (GET_CODE (x) == ASHIFT
2784 && GET_CODE (XEXP (x, 1)) == CONST_INT
2785 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2788 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2789 GEN_INT (1 << log));
2792 if (GET_CODE (x) == PLUS)
2794 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2796 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2797 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2798 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2801 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2802 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2803 GEN_INT (1 << log));
2806 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2807 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2808 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2811 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2812 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2813 GEN_INT (1 << log));
2816 /* Put multiply first if it isn't already. */
2817 if (GET_CODE (XEXP (x, 1)) == MULT)
2819 rtx tmp = XEXP (x, 0);
2820 XEXP (x, 0) = XEXP (x, 1);
2825 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2826 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2827 created by virtual register instantiation, register elimination, and
2828 similar optimizations. */
2829 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2832 x = gen_rtx_PLUS (Pmode,
2833 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2834 XEXP (XEXP (x, 1), 0)),
2835 XEXP (XEXP (x, 1), 1));
2839 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2840 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2841 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2842 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2843 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2844 && CONSTANT_P (XEXP (x, 1)))
2847 rtx other = NULL_RTX;
2849 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2851 constant = XEXP (x, 1);
2852 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2854 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2856 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2857 other = XEXP (x, 1);
2865 x = gen_rtx_PLUS (Pmode,
2866 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2867 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2868 plus_constant (other, INTVAL (constant)));
2872 if (changed && legitimate_address_p (mode, x, FALSE))
2875 if (GET_CODE (XEXP (x, 0)) == MULT)
2878 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2881 if (GET_CODE (XEXP (x, 1)) == MULT)
2884 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2888 && GET_CODE (XEXP (x, 1)) == REG
2889 && GET_CODE (XEXP (x, 0)) == REG)
2892 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2895 x = legitimize_pic_address (x, 0);
2898 if (changed && legitimate_address_p (mode, x, FALSE))
2901 if (GET_CODE (XEXP (x, 0)) == REG)
2903 register rtx temp = gen_reg_rtx (Pmode);
2904 register rtx val = force_operand (XEXP (x, 1), temp);
2906 emit_move_insn (temp, val);
2912 else if (GET_CODE (XEXP (x, 1)) == REG)
2914 register rtx temp = gen_reg_rtx (Pmode);
2915 register rtx val = force_operand (XEXP (x, 0), temp);
2917 emit_move_insn (temp, val);
2927 /* Print an integer constant expression in assembler syntax. Addition
2928 and subtraction are the only arithmetic that may appear in these
2929 expressions. FILE is the stdio stream to write to, X is the rtx, and
2930 CODE is the operand print code from the output string. */
2933 output_pic_addr_const (file, x, code)
2940 switch (GET_CODE (x))
2950 assemble_name (file, XSTR (x, 0));
2951 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2952 fputs ("@PLT", file);
2959 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2960 assemble_name (asm_out_file, buf);
2964 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2968 /* This used to output parentheses around the expression,
2969 but that does not work on the 386 (either ATT or BSD assembler). */
2970 output_pic_addr_const (file, XEXP (x, 0), code);
2974 if (GET_MODE (x) == VOIDmode)
2976 /* We can use %d if the number is <32 bits and positive. */
2977 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2978 fprintf (file, "0x%lx%08lx",
2979 (unsigned long) CONST_DOUBLE_HIGH (x),
2980 (unsigned long) CONST_DOUBLE_LOW (x));
2982 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2985 /* We can't handle floating point constants;
2986 PRINT_OPERAND must handle them. */
2987 output_operand_lossage ("floating constant misused");
2991 /* Some assemblers need integer constants to appear first. */
2992 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2994 output_pic_addr_const (file, XEXP (x, 0), code);
2996 output_pic_addr_const (file, XEXP (x, 1), code);
2998 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3000 output_pic_addr_const (file, XEXP (x, 1), code);
3002 output_pic_addr_const (file, XEXP (x, 0), code);
3009 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3010 output_pic_addr_const (file, XEXP (x, 0), code);
3012 output_pic_addr_const (file, XEXP (x, 1), code);
3013 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3017 if (XVECLEN (x, 0) != 1)
3019 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3020 switch (XINT (x, 1))
3023 fputs ("@GOT", file);
3026 fputs ("@GOTOFF", file);
3029 fputs ("@PLT", file);
3032 output_operand_lossage ("invalid UNSPEC as operand");
3038 output_operand_lossage ("invalid expression as operand");
3042 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3043 We need to handle our special PIC relocations. */
3046 i386_dwarf_output_addr_const (file, x)
3050 fprintf (file, "%s", INT_ASM_OP);
3052 output_pic_addr_const (file, x, '\0');
3054 output_addr_const (file, x);
3058 /* In the name of slightly smaller debug output, and to cater to
3059 general assembler losage, recognize PIC+GOTOFF and turn it back
3060 into a direct symbol reference. */
3063 i386_simplify_dwarf_addr (orig_x)
3068 if (GET_CODE (x) != PLUS
3069 || GET_CODE (XEXP (x, 0)) != REG
3070 || GET_CODE (XEXP (x, 1)) != CONST)
3073 x = XEXP (XEXP (x, 1), 0);
3074 if (GET_CODE (x) == UNSPEC
3075 && (XINT (x, 1) == 6
3076 || XINT (x, 1) == 7))
3077 return XVECEXP (x, 0, 0);
3079 if (GET_CODE (x) == PLUS
3080 && GET_CODE (XEXP (x, 0)) == UNSPEC
3081 && GET_CODE (XEXP (x, 1)) == CONST_INT
3082 && (XINT (XEXP (x, 0), 1) == 6
3083 || XINT (XEXP (x, 0), 1) == 7))
3084 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3090 put_condition_code (code, mode, reverse, fp, file)
3092 enum machine_mode mode;
3099 code = reverse_condition (code);
3110 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3115 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3116 Those same assemblers have the same but opposite losage on cmov. */
3119 suffix = fp ? "nbe" : "a";
3122 if (mode == CCNOmode || mode == CCGOCmode)
3124 else if (mode == CCmode || mode == CCGCmode)
3135 if (mode == CCNOmode || mode == CCGOCmode)
3137 else if (mode == CCmode || mode == CCGCmode)
3146 suffix = fp ? "nb" : "ae";
3149 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3167 fputs (suffix, file);
3171 print_reg (x, code, file)
3176 if (REGNO (x) == ARG_POINTER_REGNUM
3177 || REGNO (x) == FRAME_POINTER_REGNUM
3178 || REGNO (x) == FLAGS_REG
3179 || REGNO (x) == FPSR_REG)
3182 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3187 else if (code == 'b')
3189 else if (code == 'k')
3191 else if (code == 'y')
3193 else if (code == 'h')
3195 else if (code == 'm' || MMX_REG_P (x))
3198 code = GET_MODE_SIZE (GET_MODE (x));
3203 fputs (hi_reg_name[REGNO (x)], file);
3206 if (STACK_TOP_P (x))
3208 fputs ("st(0)", file);
3220 fputs (hi_reg_name[REGNO (x)], file);
3223 fputs (qi_reg_name[REGNO (x)], file);
3226 fputs (qi_high_reg_name[REGNO (x)], file);
3234 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3235 C -- print opcode suffix for set/cmov insn.
3236 c -- like C, but print reversed condition
3237 R -- print the prefix for register names.
3238 z -- print the opcode suffix for the size of the current operand.
3239 * -- print a star (in certain assembler syntax)
3240 A -- print an absolute memory reference.
3241 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3242 s -- print a shift double count, followed by the assemblers argument
3244 b -- print the QImode name of the register for the indicated operand.
3245 %b0 would print %al if operands[0] is reg 0.
3246 w -- likewise, print the HImode name of the register.
3247 k -- likewise, print the SImode name of the register.
3248 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3249 y -- print "st(0)" instead of "st" as a register.
3250 m -- print "st(n)" as an mmx register. */
3253 print_operand (file, x, code)
3263 if (ASSEMBLER_DIALECT == 0)
3268 if (ASSEMBLER_DIALECT == 0)
3270 else if (ASSEMBLER_DIALECT == 1)
3272 /* Intel syntax. For absolute addresses, registers should not
3273 be surrounded by braces. */
3274 if (GET_CODE (x) != REG)
3277 PRINT_OPERAND (file, x, 0);
3283 PRINT_OPERAND (file, x, 0);
3288 if (ASSEMBLER_DIALECT == 0)
3293 if (ASSEMBLER_DIALECT == 0)
3298 if (ASSEMBLER_DIALECT == 0)
3303 if (ASSEMBLER_DIALECT == 0)
3308 if (ASSEMBLER_DIALECT == 0)
3313 if (ASSEMBLER_DIALECT == 0)
3318 /* 387 opcodes don't get size suffixes if the operands are
3321 if (STACK_REG_P (x))
3324 /* this is the size of op from size of operand */
3325 switch (GET_MODE_SIZE (GET_MODE (x)))
3328 #ifdef HAVE_GAS_FILDS_FISTS
3334 if (GET_MODE (x) == SFmode)
3349 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3351 #ifdef GAS_MNEMONICS
3377 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3379 PRINT_OPERAND (file, x, 0);
3385 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3388 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3391 /* Like above, but reverse condition */
3393 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3402 sprintf (str, "invalid operand code `%c'", code);
3403 output_operand_lossage (str);
3408 if (GET_CODE (x) == REG)
3410 PRINT_REG (x, code, file);
3413 else if (GET_CODE (x) == MEM)
3415 /* No `byte ptr' prefix for call instructions. */
3416 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3419 switch (GET_MODE_SIZE (GET_MODE (x)))
3421 case 1: size = "BYTE"; break;
3422 case 2: size = "WORD"; break;
3423 case 4: size = "DWORD"; break;
3424 case 8: size = "QWORD"; break;
3425 case 12: size = "XWORD"; break;
3426 case 16: size = "XMMWORD"; break;
3431 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3434 else if (code == 'w')
3436 else if (code == 'k')
3440 fputs (" PTR ", file);
3444 if (flag_pic && CONSTANT_ADDRESS_P (x))
3445 output_pic_addr_const (file, x, code);
3450 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3455 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3456 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3458 if (ASSEMBLER_DIALECT == 0)
3460 fprintf (file, "0x%lx", l);
3463 /* These float cases don't actually occur as immediate operands. */
3464 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3469 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3470 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3471 fprintf (file, "%s", dstr);
3474 else if (GET_CODE (x) == CONST_DOUBLE
3475 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3481 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3482 fprintf (file, "%s", dstr);
3488 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3490 if (ASSEMBLER_DIALECT == 0)
3493 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3494 || GET_CODE (x) == LABEL_REF)
3496 if (ASSEMBLER_DIALECT == 0)
3499 fputs ("OFFSET FLAT:", file);
3502 if (GET_CODE (x) == CONST_INT)
3503 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3505 output_pic_addr_const (file, x, code);
3507 output_addr_const (file, x);
3511 /* Print a memory operand whose address is ADDR. */
3514 print_operand_address (file, addr)
3518 struct ix86_address parts;
3519 rtx base, index, disp;
3522 if (! ix86_decompose_address (addr, &parts))
3526 index = parts.index;
3528 scale = parts.scale;
3530 if (!base && !index)
3532 /* Displacement only requires special attention. */
3534 if (GET_CODE (disp) == CONST_INT)
3536 if (ASSEMBLER_DIALECT != 0)
3538 if (USER_LABEL_PREFIX[0] == 0)
3540 fputs ("ds:", file);
3542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3545 output_pic_addr_const (file, addr, 0);
3547 output_addr_const (file, addr);
3551 if (ASSEMBLER_DIALECT == 0)
3556 output_pic_addr_const (file, disp, 0);
3557 else if (GET_CODE (disp) == LABEL_REF)
3558 output_asm_label (disp);
3560 output_addr_const (file, disp);
3565 PRINT_REG (base, 0, file);
3569 PRINT_REG (index, 0, file);
3571 fprintf (file, ",%d", scale);
3577 rtx offset = NULL_RTX;
3581 /* Pull out the offset of a symbol; print any symbol itself. */
3582 if (GET_CODE (disp) == CONST
3583 && GET_CODE (XEXP (disp, 0)) == PLUS
3584 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3586 offset = XEXP (XEXP (disp, 0), 1);
3587 disp = gen_rtx_CONST (VOIDmode,
3588 XEXP (XEXP (disp, 0), 0));
3592 output_pic_addr_const (file, disp, 0);
3593 else if (GET_CODE (disp) == LABEL_REF)
3594 output_asm_label (disp);
3595 else if (GET_CODE (disp) == CONST_INT)
3598 output_addr_const (file, disp);
3604 PRINT_REG (base, 0, file);
3607 if (INTVAL (offset) >= 0)
3609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3613 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3620 PRINT_REG (index, 0, file);
3622 fprintf (file, "*%d", scale);
3629 /* Split one or more DImode RTL references into pairs of SImode
3630 references. The RTL can be REG, offsettable MEM, integer constant, or
3631 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3632 split and "num" is its length. lo_half and hi_half are output arrays
3633 that parallel "operands". */
3636 split_di (operands, num, lo_half, hi_half)
3639 rtx lo_half[], hi_half[];
3643 rtx op = operands[num];
3644 if (CONSTANT_P (op))
3645 split_double (op, &lo_half[num], &hi_half[num]);
3646 else if (! reload_completed)
3648 lo_half[num] = gen_lowpart (SImode, op);
3649 hi_half[num] = gen_highpart (SImode, op);
3651 else if (GET_CODE (op) == REG)
3653 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3654 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3656 else if (offsettable_memref_p (op))
3658 rtx lo_addr = XEXP (op, 0);
3659 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3660 lo_half[num] = change_address (op, SImode, lo_addr);
3661 hi_half[num] = change_address (op, SImode, hi_addr);
3668 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3669 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3670 is the expression of the binary operation. The output may either be
3671 emitted here, or returned to the caller, like all output_* functions.
3673 There is no guarantee that the operands are the same mode, as they
3674 might be within FLOAT or FLOAT_EXTEND expressions. */
3676 #ifndef SYSV386_COMPAT
3677 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3678 wants to fix the assemblers because that causes incompatibility
3679 with gcc. No-one wants to fix gcc because that causes
3680 incompatibility with assemblers... You can use the option of
3681 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3682 #define SYSV386_COMPAT 1
3686 output_387_binary_op (insn, operands)
3690 static char buf[30];
3693 #ifdef ENABLE_CHECKING
3694 /* Even if we do not want to check the inputs, this documents input
3695 constraints. Which helps in understanding the following code. */
3696 if (STACK_REG_P (operands[0])
3697 && ((REG_P (operands[1])
3698 && REGNO (operands[0]) == REGNO (operands[1])
3699 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3700 || (REG_P (operands[2])
3701 && REGNO (operands[0]) == REGNO (operands[2])
3702 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3703 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3709 switch (GET_CODE (operands[3]))
3712 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3713 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3720 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3721 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3728 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3729 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3736 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3737 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3749 switch (GET_CODE (operands[3]))
3753 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3755 rtx temp = operands[2];
3756 operands[2] = operands[1];
3760 /* know operands[0] == operands[1]. */
3762 if (GET_CODE (operands[2]) == MEM)
3768 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3770 if (STACK_TOP_P (operands[0]))
3771 /* How is it that we are storing to a dead operand[2]?
3772 Well, presumably operands[1] is dead too. We can't
3773 store the result to st(0) as st(0) gets popped on this
3774 instruction. Instead store to operands[2] (which I
3775 think has to be st(1)). st(1) will be popped later.
3776 gcc <= 2.8.1 didn't have this check and generated
3777 assembly code that the Unixware assembler rejected. */
3778 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3780 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3784 if (STACK_TOP_P (operands[0]))
3785 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3787 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3792 if (GET_CODE (operands[1]) == MEM)
3798 if (GET_CODE (operands[2]) == MEM)
3804 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3807 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3808 derived assemblers, confusingly reverse the direction of
3809 the operation for fsub{r} and fdiv{r} when the
3810 destination register is not st(0). The Intel assembler
3811 doesn't have this brain damage. Read !SYSV386_COMPAT to
3812 figure out what the hardware really does. */
3813 if (STACK_TOP_P (operands[0]))
3814 p = "{p\t%0, %2|rp\t%2, %0}";
3816 p = "{rp\t%2, %0|p\t%0, %2}";
3818 if (STACK_TOP_P (operands[0]))
3819 /* As above for fmul/fadd, we can't store to st(0). */
3820 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3822 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3827 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3830 if (STACK_TOP_P (operands[0]))
3831 p = "{rp\t%0, %1|p\t%1, %0}";
3833 p = "{p\t%1, %0|rp\t%0, %1}";
3835 if (STACK_TOP_P (operands[0]))
3836 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3838 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3843 if (STACK_TOP_P (operands[0]))
3845 if (STACK_TOP_P (operands[1]))
3846 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3848 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3851 else if (STACK_TOP_P (operands[1]))
3854 p = "{\t%1, %0|r\t%0, %1}";
3856 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3862 p = "{r\t%2, %0|\t%0, %2}";
3864 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3877 /* Output code for INSN to convert a float to a signed int. OPERANDS
3878 are the insn operands. The output may be [HSD]Imode and the input
3879 operand may be [SDX]Fmode. */
3882 output_fix_trunc (insn, operands)
3886 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3887 int dimode_p = GET_MODE (operands[0]) == DImode;
3890 /* Jump through a hoop or two for DImode, since the hardware has no
3891 non-popping instruction. We used to do this a different way, but
3892 that was somewhat fragile and broke with post-reload splitters. */
3893 if (dimode_p && !stack_top_dies)
3894 output_asm_insn ("fld\t%y1", operands);
3896 if (! STACK_TOP_P (operands[1]))
3899 xops[0] = GEN_INT (12);
3900 xops[1] = adj_offsettable_operand (operands[2], 1);
3901 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3903 xops[2] = operands[0];
3904 if (GET_CODE (operands[0]) != MEM)
3905 xops[2] = operands[3];
3907 output_asm_insn ("fnstcw\t%2", operands);
3908 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3909 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3910 output_asm_insn ("fldcw\t%2", operands);
3911 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3913 if (stack_top_dies || dimode_p)
3914 output_asm_insn ("fistp%z2\t%2", xops);
3916 output_asm_insn ("fist%z2\t%2", xops);
3918 output_asm_insn ("fldcw\t%2", operands);
3920 if (GET_CODE (operands[0]) != MEM)
3924 split_di (operands+0, 1, xops+0, xops+1);
3925 split_di (operands+3, 1, xops+2, xops+3);
3926 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3927 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3929 else if (GET_MODE (operands[0]) == SImode)
3930 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3932 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3938 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3939 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3940 when fucom should be used. */
3943 output_fp_compare (insn, operands, eflags_p, unordered_p)
3946 int eflags_p, unordered_p;
3949 rtx cmp_op0 = operands[0];
3950 rtx cmp_op1 = operands[1];
3955 cmp_op1 = operands[2];
3958 if (! STACK_TOP_P (cmp_op0))
3961 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3963 if (STACK_REG_P (cmp_op1)
3965 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3966 && REGNO (cmp_op1) != FIRST_STACK_REG)
3968 /* If both the top of the 387 stack dies, and the other operand
3969 is also a stack register that dies, then this must be a
3970 `fcompp' float compare */
3974 /* There is no double popping fcomi variant. Fortunately,
3975 eflags is immune from the fstp's cc clobbering. */
3977 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3979 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3987 return "fucompp\n\tfnstsw\t%0";
3989 return "fcompp\n\tfnstsw\t%0";
4002 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4004 static const char * const alt[24] =
4016 "fcomi\t{%y1, %0|%0, %y1}",
4017 "fcomip\t{%y1, %0|%0, %y1}",
4018 "fucomi\t{%y1, %0|%0, %y1}",
4019 "fucomip\t{%y1, %0|%0, %y1}",
4026 "fcom%z2\t%y2\n\tfnstsw\t%0",
4027 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4028 "fucom%z2\t%y2\n\tfnstsw\t%0",
4029 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4031 "ficom%z2\t%y2\n\tfnstsw\t%0",
4032 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4040 mask = eflags_p << 3;
4041 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4042 mask |= unordered_p << 1;
4043 mask |= stack_top_dies;
4055 /* Output assembler code to FILE to initialize basic-block profiling.
4057 If profile_block_flag == 2
4059 Output code to call the subroutine `__bb_init_trace_func'
4060 and pass two parameters to it. The first parameter is
4061 the address of a block allocated in the object module.
4062 The second parameter is the number of the first basic block
4065 The name of the block is a local symbol made with this statement:
4067 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4069 Of course, since you are writing the definition of
4070 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4071 can take a short cut in the definition of this macro and use the
4072 name that you know will result.
4074 The number of the first basic block of the function is
4075 passed to the macro in BLOCK_OR_LABEL.
4077 If described in a virtual assembler language the code to be
4081 parameter2 <- BLOCK_OR_LABEL
4082 call __bb_init_trace_func
4084 else if profile_block_flag != 0
4086 Output code to call the subroutine `__bb_init_func'
4087 and pass one single parameter to it, which is the same
4088 as the first parameter to `__bb_init_trace_func'.
4090 The first word of this parameter is a flag which will be nonzero if
4091 the object module has already been initialized. So test this word
4092 first, and do not call `__bb_init_func' if the flag is nonzero.
4093 Note: When profile_block_flag == 2 the test need not be done
4094 but `__bb_init_trace_func' *must* be called.
4096 BLOCK_OR_LABEL may be used to generate a label number as a
4097 branch destination in case `__bb_init_func' will not be called.
4099 If described in a virtual assembler language the code to be
4110 ix86_output_function_block_profiler (file, block_or_label)
4114 static int num_func = 0;
4116 char block_table[80], false_label[80];
4118 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4120 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4121 xops[5] = stack_pointer_rtx;
4122 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4124 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4126 switch (profile_block_flag)
4129 xops[2] = GEN_INT (block_or_label);
4130 xops[3] = gen_rtx_MEM (Pmode,
4131 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4132 xops[6] = GEN_INT (8);
4134 output_asm_insn ("push{l}\t%2", xops);
4136 output_asm_insn ("push{l}\t%1", xops);
4139 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4140 output_asm_insn ("push{l}\t%7", xops);
4142 output_asm_insn ("call\t%P3", xops);
4143 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4147 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4149 xops[0] = const0_rtx;
4150 xops[2] = gen_rtx_MEM (Pmode,
4151 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4152 xops[3] = gen_rtx_MEM (Pmode,
4153 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4154 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4155 xops[6] = GEN_INT (4);
4157 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4159 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4160 output_asm_insn ("jne\t%2", xops);
4163 output_asm_insn ("push{l}\t%1", xops);
4166 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4167 output_asm_insn ("push{l}\t%7", xops);
4169 output_asm_insn ("call\t%P3", xops);
4170 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4171 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4177 /* Output assembler code to FILE to increment a counter associated
4178 with basic block number BLOCKNO.
4180 If profile_block_flag == 2
4182 Output code to initialize the global structure `__bb' and
4183 call the function `__bb_trace_func' which will increment the
4186 `__bb' consists of two words. In the first word the number
4187 of the basic block has to be stored. In the second word
4188 the address of a block allocated in the object module
4191 The basic block number is given by BLOCKNO.
4193 The address of the block is given by the label created with
4195 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4197 by FUNCTION_BLOCK_PROFILER.
4199 Of course, since you are writing the definition of
4200 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4201 can take a short cut in the definition of this macro and use the
4202 name that you know will result.
4204 If described in a virtual assembler language the code to be
4207 move BLOCKNO -> (__bb)
4208 move LPBX0 -> (__bb+4)
4209 call __bb_trace_func
4211 Note that function `__bb_trace_func' must not change the
4212 machine state, especially the flag register. To grant
4213 this, you must output code to save and restore registers
4214 either in this macro or in the macros MACHINE_STATE_SAVE
4215 and MACHINE_STATE_RESTORE. The last two macros will be
4216 used in the function `__bb_trace_func', so you must make
4217 sure that the function prologue does not change any
4218 register prior to saving it with MACHINE_STATE_SAVE.
4220 else if profile_block_flag != 0
4222 Output code to increment the counter directly.
4223 Basic blocks are numbered separately from zero within each
4224 compiled object module. The count associated with block number
4225 BLOCKNO is at index BLOCKNO in an array of words; the name of
4226 this array is a local symbol made with this statement:
4228 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4230 Of course, since you are writing the definition of
4231 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4232 can take a short cut in the definition of this macro and use the
4233 name that you know will result.
4235 If described in a virtual assembler language the code to be
4238 inc (LPBX2+4*BLOCKNO)
4242 ix86_output_block_profiler (file, blockno)
4243 FILE *file ATTRIBUTE_UNUSED;
4246 rtx xops[8], cnt_rtx;
4248 char *block_table = counts;
4250 switch (profile_block_flag)
4253 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4255 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4256 xops[2] = GEN_INT (blockno);
4257 xops[3] = gen_rtx_MEM (Pmode,
4258 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4259 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4260 xops[5] = plus_constant (xops[4], 4);
4261 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4262 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4264 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4266 output_asm_insn ("pushf", xops);
4267 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4270 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4271 output_asm_insn ("push{l}\t%7", xops);
4272 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4273 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4274 output_asm_insn ("pop{l}\t%7", xops);
4277 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4278 output_asm_insn ("call\t%P3", xops);
4279 output_asm_insn ("popf", xops);
4284 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4285 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4286 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4289 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4292 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4294 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4295 output_asm_insn ("inc{l}\t%0", xops);
4302 ix86_expand_move (mode, operands)
4303 enum machine_mode mode;
4306 int strict = (reload_in_progress || reload_completed);
4309 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4311 /* Emit insns to move operands[1] into operands[0]. */
4313 if (GET_CODE (operands[0]) == MEM)
4314 operands[1] = force_reg (Pmode, operands[1]);
4317 rtx temp = operands[0];
4318 if (GET_CODE (temp) != REG)
4319 temp = gen_reg_rtx (Pmode);
4320 temp = legitimize_pic_address (operands[1], temp);
4321 if (temp == operands[0])
4328 if (GET_CODE (operands[0]) == MEM
4329 && (GET_MODE (operands[0]) == QImode
4330 || !push_operand (operands[0], mode))
4331 && GET_CODE (operands[1]) == MEM)
4332 operands[1] = force_reg (mode, operands[1]);
4334 if (push_operand (operands[0], mode)
4335 && ! general_no_elim_operand (operands[1], mode))
4336 operands[1] = copy_to_mode_reg (mode, operands[1]);
4338 if (FLOAT_MODE_P (mode))
4340 /* If we are loading a floating point constant to a register,
4341 force the value to memory now, since we'll get better code
4342 out the back end. */
4346 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4347 && register_operand (operands[0], mode))
4348 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4352 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4357 /* Attempt to expand a binary operator. Make the expansion closer to the
4358 actual machine, then just general_operand, which will allow 3 separate
4359 memory references (one output, two input) in a single insn. */
4362 ix86_expand_binary_operator (code, mode, operands)
4364 enum machine_mode mode;
4367 int matching_memory;
4368 rtx src1, src2, dst, op, clob;
4374 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4375 if (GET_RTX_CLASS (code) == 'c'
4376 && (rtx_equal_p (dst, src2)
4377 || immediate_operand (src1, mode)))
4384 /* If the destination is memory, and we do not have matching source
4385 operands, do things in registers. */
4386 matching_memory = 0;
4387 if (GET_CODE (dst) == MEM)
4389 if (rtx_equal_p (dst, src1))
4390 matching_memory = 1;
4391 else if (GET_RTX_CLASS (code) == 'c'
4392 && rtx_equal_p (dst, src2))
4393 matching_memory = 2;
4395 dst = gen_reg_rtx (mode);
4398 /* Both source operands cannot be in memory. */
4399 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4401 if (matching_memory != 2)
4402 src2 = force_reg (mode, src2);
4404 src1 = force_reg (mode, src1);
4407 /* If the operation is not commutable, source 1 cannot be a constant
4408 or non-matching memory. */
4409 if ((CONSTANT_P (src1)
4410 || (!matching_memory && GET_CODE (src1) == MEM))
4411 && GET_RTX_CLASS (code) != 'c')
4412 src1 = force_reg (mode, src1);
4414 /* If optimizing, copy to regs to improve CSE */
4415 if (optimize && ! no_new_pseudos)
4417 if (GET_CODE (dst) == MEM)
4418 dst = gen_reg_rtx (mode);
4419 if (GET_CODE (src1) == MEM)
4420 src1 = force_reg (mode, src1);
4421 if (GET_CODE (src2) == MEM)
4422 src2 = force_reg (mode, src2);
4425 /* Emit the instruction. */
4427 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4428 if (reload_in_progress)
4430 /* Reload doesn't know about the flags register, and doesn't know that
4431 it doesn't want to clobber it. We can only do this with PLUS. */
4438 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4439 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4442 /* Fix up the destination if needed. */
4443 if (dst != operands[0])
4444 emit_move_insn (operands[0], dst);
4447 /* Return TRUE or FALSE depending on whether the binary operator meets the
4448 appropriate constraints. */
4451 ix86_binary_operator_ok (code, mode, operands)
4453 enum machine_mode mode ATTRIBUTE_UNUSED;
4456 /* Both source operands cannot be in memory. */
4457 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4459 /* If the operation is not commutable, source 1 cannot be a constant. */
4460 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4462 /* If the destination is memory, we must have a matching source operand. */
4463 if (GET_CODE (operands[0]) == MEM
4464 && ! (rtx_equal_p (operands[0], operands[1])
4465 || (GET_RTX_CLASS (code) == 'c'
4466 && rtx_equal_p (operands[0], operands[2]))))
4468 /* If the operation is not commutable and the source 1 is memory, we must
4469 have a matching destionation. */
4470 if (GET_CODE (operands[1]) == MEM
4471 && GET_RTX_CLASS (code) != 'c'
4472 && ! rtx_equal_p (operands[0], operands[1]))
4477 /* Attempt to expand a unary operator. Make the expansion closer to the
4478 actual machine, then just general_operand, which will allow 2 separate
4479 memory references (one output, one input) in a single insn. */
4482 ix86_expand_unary_operator (code, mode, operands)
4484 enum machine_mode mode;
4487 int matching_memory;
4488 rtx src, dst, op, clob;
4493 /* If the destination is memory, and we do not have matching source
4494 operands, do things in registers. */
4495 matching_memory = 0;
4496 if (GET_CODE (dst) == MEM)
4498 if (rtx_equal_p (dst, src))
4499 matching_memory = 1;
4501 dst = gen_reg_rtx (mode);
4504 /* When source operand is memory, destination must match. */
4505 if (!matching_memory && GET_CODE (src) == MEM)
4506 src = force_reg (mode, src);
4508 /* If optimizing, copy to regs to improve CSE */
4509 if (optimize && ! no_new_pseudos)
4511 if (GET_CODE (dst) == MEM)
4512 dst = gen_reg_rtx (mode);
4513 if (GET_CODE (src) == MEM)
4514 src = force_reg (mode, src);
4517 /* Emit the instruction. */
4519 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4520 if (reload_in_progress || code == NOT)
4522 /* Reload doesn't know about the flags register, and doesn't know that
4523 it doesn't want to clobber it. */
4530 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4531 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4534 /* Fix up the destination if needed. */
4535 if (dst != operands[0])
4536 emit_move_insn (operands[0], dst);
4539 /* Return TRUE or FALSE depending on whether the unary operator meets the
4540 appropriate constraints. */
4543 ix86_unary_operator_ok (code, mode, operands)
4544 enum rtx_code code ATTRIBUTE_UNUSED;
4545 enum machine_mode mode ATTRIBUTE_UNUSED;
4546 rtx operands[2] ATTRIBUTE_UNUSED;
4548 /* If one of operands is memory, source and destination must match. */
4549 if ((GET_CODE (operands[0]) == MEM
4550 || GET_CODE (operands[1]) == MEM)
4551 && ! rtx_equal_p (operands[0], operands[1]))
4556 /* Return TRUE or FALSE depending on whether the first SET in INSN
4557 has source and destination with matching CC modes, and that the
4558 CC mode is at least as constrained as REQ_MODE. */
4561 ix86_match_ccmode (insn, req_mode)
4563 enum machine_mode req_mode;
4566 enum machine_mode set_mode;
4568 set = PATTERN (insn);
4569 if (GET_CODE (set) == PARALLEL)
4570 set = XVECEXP (set, 0, 0);
4571 if (GET_CODE (set) != SET)
4573 if (GET_CODE (SET_SRC (set)) != COMPARE)
4576 set_mode = GET_MODE (SET_DEST (set));
4580 if (req_mode != CCNOmode
4581 && (req_mode != CCmode
4582 || XEXP (SET_SRC (set), 1) != const0_rtx))
4586 if (req_mode == CCGCmode)
4590 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4594 if (req_mode == CCZmode)
4604 return (GET_MODE (SET_SRC (set)) == set_mode);
4607 /* Produce an unsigned comparison for a given signed comparison. */
4609 static enum rtx_code
4610 unsigned_comparison (code)
4642 /* Generate insn patterns to do an integer compare of OPERANDS. */
4645 ix86_expand_int_compare (code, op0, op1)
4649 enum machine_mode cmpmode;
4652 cmpmode = SELECT_CC_MODE (code, op0, op1);
4653 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4655 /* This is very simple, but making the interface the same as in the
4656 FP case makes the rest of the code easier. */
4657 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4658 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4660 /* Return the test that should be put into the flags user, i.e.
4661 the bcc, scc, or cmov instruction. */
4662 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4665 /* Figure out whether to use ordered or unordered fp comparisons.
4666 Return the appropriate mode to use. */
4669 ix86_fp_compare_mode (code)
4677 /* When not doing IEEE compliant compares, fault on NaNs. */
4678 unordered = (TARGET_IEEE_FP != 0);
4681 case LT: case LE: case GT: case GE:
4685 case UNORDERED: case ORDERED:
4686 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4694 /* ??? If we knew whether invalid-operand exceptions were masked,
4695 we could rely on fcom to raise an exception and take care of
4696 NaNs. But we don't. We could know this from c99 math pragmas. */
4700 return unordered ? CCFPUmode : CCFPmode;
4704 ix86_cc_mode (code, op0, op1)
4708 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4709 return ix86_fp_compare_mode (code);
4712 /* Only zero flag is needed. */
4714 case NE: /* ZF!=0 */
4716 /* Codes needing carry flag. */
4717 case GEU: /* CF=0 */
4718 case GTU: /* CF=0 & ZF=0 */
4719 case LTU: /* CF=1 */
4720 case LEU: /* CF=1 | ZF=1 */
4722 /* Codes possibly doable only with sign flag when
4723 comparing against zero. */
4724 case GE: /* SF=OF or SF=0 */
4725 case LT: /* SF<>OF or SF=1 */
4726 if (op1 == const0_rtx)
4729 /* For other cases Carry flag is not required. */
4731 /* Codes doable only with sign flag when comparing
4732 against zero, but we miss jump instruction for it
4733 so we need to use relational tests agains overflow
4734 that thus needs to be zero. */
4735 case GT: /* ZF=0 & SF=OF */
4736 case LE: /* ZF=1 | SF<>OF */
4737 if (op1 == const0_rtx)
4746 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4749 ix86_use_fcomi_compare (code)
4752 return (TARGET_CMOVE
4753 && (code == ORDERED || code == UNORDERED
4754 /* All other unordered compares require checking
4755 multiple sets of bits. */
4756 || ix86_fp_compare_mode (code) == CCFPmode));
4759 /* Swap, force into registers, or otherwise massage the two operands
4760 to a fp comparison. The operands are updated in place; the new
4761 comparsion code is returned. */
4763 static enum rtx_code
4764 ix86_prepare_fp_compare_args (code, pop0, pop1)
4768 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4769 rtx op0 = *pop0, op1 = *pop1;
4770 enum machine_mode op_mode = GET_MODE (op0);
4772 /* All of the unordered compare instructions only work on registers.
4773 The same is true of the XFmode compare instructions. The same is
4774 true of the fcomi compare instructions. */
4776 if (fpcmp_mode == CCFPUmode
4777 || op_mode == XFmode
4778 || op_mode == TFmode
4779 || ix86_use_fcomi_compare (code))
4781 op0 = force_reg (op_mode, op0);
4782 op1 = force_reg (op_mode, op1);
4786 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4787 things around if they appear profitable, otherwise force op0
4790 if (standard_80387_constant_p (op0) == 0
4791 || (GET_CODE (op0) == MEM
4792 && ! (standard_80387_constant_p (op1) == 0
4793 || GET_CODE (op1) == MEM)))
4796 tmp = op0, op0 = op1, op1 = tmp;
4797 code = swap_condition (code);
4800 if (GET_CODE (op0) != REG)
4801 op0 = force_reg (op_mode, op0);
4803 if (CONSTANT_P (op1))
4805 if (standard_80387_constant_p (op1))
4806 op1 = force_reg (op_mode, op1);
4808 op1 = validize_mem (force_const_mem (op_mode, op1));
4817 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4820 ix86_expand_fp_compare (code, op0, op1, scratch)
4822 rtx op0, op1, scratch;
4824 enum machine_mode fpcmp_mode, intcmp_mode;
4827 fpcmp_mode = ix86_fp_compare_mode (code);
4828 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4830 /* %%% fcomi is probably always faster, even when dealing with memory,
4831 since compare-and-branch would be three insns instead of four. */
4832 if (ix86_use_fcomi_compare (code))
4834 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4835 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4838 /* The FP codes work out to act like unsigned. */
4839 code = unsigned_comparison (code);
4840 intcmp_mode = CCmode;
4844 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4847 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4848 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4849 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4851 if (fpcmp_mode == CCFPmode
4853 || code == UNORDERED)
4855 /* We have two options here -- use sahf, or testing bits of ah
4856 directly. On PPRO, they are equivalent, sahf being one byte
4857 smaller. On Pentium, sahf is non-pairable while test is UV
4860 if (TARGET_USE_SAHF || optimize_size)
4863 emit_insn (gen_x86_sahf_1 (scratch));
4865 /* The FP codes work out to act like unsigned. */
4866 code = unsigned_comparison (code);
4867 intcmp_mode = CCmode;
4872 * The numbers below correspond to the bits of the FPSW in AH.
4873 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4895 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4896 faster in all cases to just fall back on sahf. */
4923 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4924 intcmp_mode = CCNOmode;
4929 /* In the unordered case, we have to check C2 for NaN's, which
4930 doesn't happen to work out to anything nice combination-wise.
4931 So do some bit twiddling on the value we've got in AH to come
4932 up with an appropriate set of condition codes. */
4934 intcmp_mode = CCNOmode;
4938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4942 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4943 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4944 intcmp_mode = CCmode;
4948 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4952 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4953 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4954 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4955 intcmp_mode = CCmode;
4959 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4960 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4961 intcmp_mode = CCmode;
4965 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4966 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4971 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4975 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4983 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4984 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4988 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4989 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4990 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4991 intcmp_mode = CCmode;
4995 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4999 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5003 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5013 /* Return the test that should be put into the flags user, i.e.
5014 the bcc, scc, or cmov instruction. */
5015 return gen_rtx_fmt_ee (code, VOIDmode,
5016 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5021 ix86_expand_compare (code)
5025 op0 = ix86_compare_op0;
5026 op1 = ix86_compare_op1;
5028 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5029 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
5031 ret = ix86_expand_int_compare (code, op0, op1);
5037 ix86_expand_branch (code, label)
5043 switch (GET_MODE (ix86_compare_op0))
5048 tmp = ix86_expand_compare (code);
5049 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5050 gen_rtx_LABEL_REF (VOIDmode, label),
5052 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5059 /* Don't expand the comparison early, so that we get better code
5060 when jump or whoever decides to reverse the comparison. */
5065 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5068 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5069 ix86_compare_op0, ix86_compare_op1);
5070 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5071 gen_rtx_LABEL_REF (VOIDmode, label),
5073 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5075 use_fcomi = ix86_use_fcomi_compare (code);
5076 vec = rtvec_alloc (3 + !use_fcomi);
5077 RTVEC_ELT (vec, 0) = tmp;
5079 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5081 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5084 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5086 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5091 /* Expand DImode branch into multiple compare+branch. */
5093 rtx lo[2], hi[2], label2;
5094 enum rtx_code code1, code2, code3;
5096 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5098 tmp = ix86_compare_op0;
5099 ix86_compare_op0 = ix86_compare_op1;
5100 ix86_compare_op1 = tmp;
5101 code = swap_condition (code);
5103 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5104 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5106 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5107 avoid two branches. This costs one extra insn, so disable when
5108 optimizing for size. */
5110 if ((code == EQ || code == NE)
5112 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5117 if (hi[1] != const0_rtx)
5118 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5119 NULL_RTX, 0, OPTAB_WIDEN);
5122 if (lo[1] != const0_rtx)
5123 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5124 NULL_RTX, 0, OPTAB_WIDEN);
5126 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5127 NULL_RTX, 0, OPTAB_WIDEN);
5129 ix86_compare_op0 = tmp;
5130 ix86_compare_op1 = const0_rtx;
5131 ix86_expand_branch (code, label);
5135 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5136 op1 is a constant and the low word is zero, then we can just
5137 examine the high word. */
5139 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5142 case LT: case LTU: case GE: case GEU:
5143 ix86_compare_op0 = hi[0];
5144 ix86_compare_op1 = hi[1];
5145 ix86_expand_branch (code, label);
5151 /* Otherwise, we need two or three jumps. */
5153 label2 = gen_label_rtx ();
5156 code2 = swap_condition (code);
5157 code3 = unsigned_condition (code);
5161 case LT: case GT: case LTU: case GTU:
5164 case LE: code1 = LT; code2 = GT; break;
5165 case GE: code1 = GT; code2 = LT; break;
5166 case LEU: code1 = LTU; code2 = GTU; break;
5167 case GEU: code1 = GTU; code2 = LTU; break;
5169 case EQ: code1 = NIL; code2 = NE; break;
5170 case NE: code2 = NIL; break;
5178 * if (hi(a) < hi(b)) goto true;
5179 * if (hi(a) > hi(b)) goto false;
5180 * if (lo(a) < lo(b)) goto true;
5184 ix86_compare_op0 = hi[0];
5185 ix86_compare_op1 = hi[1];
5188 ix86_expand_branch (code1, label);
5190 ix86_expand_branch (code2, label2);
5192 ix86_compare_op0 = lo[0];
5193 ix86_compare_op1 = lo[1];
5194 ix86_expand_branch (code3, label);
5197 emit_label (label2);
5207 ix86_expand_setcc (code, dest)
5214 if (GET_MODE (ix86_compare_op0) == DImode)
5215 return 0; /* FAIL */
5217 /* Three modes of generation:
5218 0 -- destination does not overlap compare sources:
5219 clear dest first, emit strict_low_part setcc.
5220 1 -- destination does overlap compare sources:
5221 emit subreg setcc, zero extend.
5222 2 -- destination is in QImode:
5228 if (GET_MODE (dest) == QImode)
5230 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5231 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5235 emit_move_insn (dest, const0_rtx);
5237 ret = ix86_expand_compare (code);
5238 PUT_MODE (ret, QImode);
5243 tmp = gen_lowpart (QImode, dest);
5244 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5248 if (!cse_not_expected)
5249 tmp = gen_reg_rtx (QImode);
5251 tmp = gen_lowpart (QImode, dest);
5254 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5260 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5261 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5262 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5263 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5267 return 1; /* DONE */
5271 ix86_expand_int_movcc (operands)
5274 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5275 rtx compare_seq, compare_op;
5277 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5278 In case comparsion is done with immediate, we can convert it to LTU or
5279 GEU by altering the integer. */
5281 if ((code == LEU || code == GTU)
5282 && GET_CODE (ix86_compare_op1) == CONST_INT
5283 && GET_MODE (operands[0]) != HImode
5284 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5285 && GET_CODE (operands[2]) == CONST_INT
5286 && GET_CODE (operands[3]) == CONST_INT)
5292 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5296 compare_op = ix86_expand_compare (code);
5297 compare_seq = gen_sequence ();
5300 compare_code = GET_CODE (compare_op);
5302 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5303 HImode insns, we'd be swallowed in word prefix ops. */
5305 if (GET_MODE (operands[0]) != HImode
5306 && GET_CODE (operands[2]) == CONST_INT
5307 && GET_CODE (operands[3]) == CONST_INT)
5309 rtx out = operands[0];
5310 HOST_WIDE_INT ct = INTVAL (operands[2]);
5311 HOST_WIDE_INT cf = INTVAL (operands[3]);
5314 if (compare_code == LTU || compare_code == GEU)
5317 /* Detect overlap between destination and compare sources. */
5320 /* To simplify rest of code, restrict to the GEU case. */
5321 if (compare_code == LTU)
5326 compare_code = reverse_condition (compare_code);
5327 code = reverse_condition (code);
5331 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5332 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5333 tmp = gen_reg_rtx (SImode);
5335 emit_insn (compare_seq);
5336 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5348 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5359 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5361 else if (diff == -1 && ct)
5371 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5373 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5380 * andl cf - ct, dest
5385 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5387 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5391 emit_move_insn (out, tmp);
5393 return 1; /* DONE */
5400 tmp = ct, ct = cf, cf = tmp;
5402 compare_code = reverse_condition (compare_code);
5403 code = reverse_condition (code);
5405 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5406 || diff == 3 || diff == 5 || diff == 9)
5412 * lea cf(dest*(ct-cf)),dest
5416 * This also catches the degenerate setcc-only case.
5422 out = emit_store_flag (out, code, ix86_compare_op0,
5423 ix86_compare_op1, VOIDmode, 0, 1);
5430 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5434 tmp = gen_rtx_PLUS (SImode, tmp, out);
5440 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5446 emit_move_insn (out, tmp);
5451 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5452 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5454 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5455 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5459 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5461 if (out != operands[0])
5462 emit_move_insn (operands[0], out);
5464 return 1; /* DONE */
5468 * General case: Jumpful:
5469 * xorl dest,dest cmpl op1, op2
5470 * cmpl op1, op2 movl ct, dest
5472 * decl dest movl cf, dest
5473 * andl (cf-ct),dest 1:
5478 * This is reasonably steep, but branch mispredict costs are
5479 * high on modern cpus, so consider failing only if optimizing
5482 * %%% Parameterize branch_cost on the tuning architecture, then
5483 * use that. The 80386 couldn't care less about mispredicts.
5486 if (!optimize_size && !TARGET_CMOVE)
5492 compare_code = reverse_condition (compare_code);
5493 code = reverse_condition (code);
5496 out = emit_store_flag (out, code, ix86_compare_op0,
5497 ix86_compare_op1, VOIDmode, 0, 1);
5499 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5500 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5502 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5503 if (out != operands[0])
5504 emit_move_insn (operands[0], out);
5506 return 1; /* DONE */
5512 /* Try a few things more with specific constants and a variable. */
5515 rtx var, orig_out, out, tmp;
5518 return 0; /* FAIL */
5520 /* If one of the two operands is an interesting constant, load a
5521 constant with the above and mask it in with a logical operation. */
5523 if (GET_CODE (operands[2]) == CONST_INT)
5526 if (INTVAL (operands[2]) == 0)
5527 operands[3] = constm1_rtx, op = and_optab;
5528 else if (INTVAL (operands[2]) == -1)
5529 operands[3] = const0_rtx, op = ior_optab;
5531 return 0; /* FAIL */
5533 else if (GET_CODE (operands[3]) == CONST_INT)
5536 if (INTVAL (operands[3]) == 0)
5537 operands[2] = constm1_rtx, op = and_optab;
5538 else if (INTVAL (operands[3]) == -1)
5539 operands[2] = const0_rtx, op = ior_optab;
5541 return 0; /* FAIL */
5544 return 0; /* FAIL */
5546 orig_out = operands[0];
5547 tmp = gen_reg_rtx (GET_MODE (orig_out));
5550 /* Recurse to get the constant loaded. */
5551 if (ix86_expand_int_movcc (operands) == 0)
5552 return 0; /* FAIL */
5554 /* Mask in the interesting variable. */
5555 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5557 if (out != orig_out)
5558 emit_move_insn (orig_out, out);
5560 return 1; /* DONE */
5564 * For comparison with above,
5574 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5575 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5576 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5577 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5579 emit_insn (compare_seq);
5580 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5581 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5582 compare_op, operands[2],
5585 return 1; /* DONE */
5589 ix86_expand_fp_movcc (operands)
5593 enum machine_mode mode;
5596 /* The floating point conditional move instructions don't directly
5597 support conditions resulting from a signed integer comparison. */
5599 code = GET_CODE (operands[1]);
5612 tmp = gen_reg_rtx (QImode);
5613 ix86_expand_setcc (code, tmp);
5615 ix86_compare_op0 = tmp;
5616 ix86_compare_op1 = const0_rtx;
5623 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5624 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5625 gen_rtx_COMPARE (mode,
5627 ix86_compare_op1)));
5628 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5629 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5630 gen_rtx_fmt_ee (code, VOIDmode,
5631 gen_rtx_REG (mode, FLAGS_REG),
5639 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5640 works for floating pointer parameters and nonoffsetable memories.
5641 For pushes, it returns just stack offsets; the values will be saved
5642 in the right order. Maximally three parts are generated. */
5645 ix86_split_to_parts (operand, parts, mode)
5648 enum machine_mode mode;
5650 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
5652 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5654 if (size < 2 || size > 3)
5657 /* Optimize constant pool reference to immediates. This is used by fp moves,
5658 that force all constants to memory to allow combining. */
5660 if (GET_CODE (operand) == MEM
5661 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5662 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5663 operand = get_pool_constant (XEXP (operand, 0));
5665 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5667 /* The only non-offsetable memories we handle are pushes. */
5668 if (! push_operand (operand, VOIDmode))
5671 PUT_MODE (operand, SImode);
5672 parts[0] = parts[1] = parts[2] = operand;
5677 split_di (&operand, 1, &parts[0], &parts[1]);
5680 if (REG_P (operand))
5682 if (!reload_completed)
5684 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5685 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5687 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5689 else if (offsettable_memref_p (operand))
5691 PUT_MODE (operand, SImode);
5693 parts[1] = adj_offsettable_operand (operand, 4);
5695 parts[2] = adj_offsettable_operand (operand, 8);
5697 else if (GET_CODE (operand) == CONST_DOUBLE)
5702 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5707 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5708 parts[2] = GEN_INT (l[2]);
5711 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5716 parts[1] = GEN_INT (l[1]);
5717 parts[0] = GEN_INT (l[0]);
5727 /* Emit insns to perform a move or push of DI, DF, and XF values.
5728 Return false when normal moves are needed; true when all required
5729 insns have been emitted. Operands 2-4 contain the input values
5730 int the correct order; operands 5-7 contain the output values. */
5733 ix86_split_long_move (operands1)
5742 /* Make our own copy to avoid clobbering the operands. */
5743 operands[0] = copy_rtx (operands1[0]);
5744 operands[1] = copy_rtx (operands1[1]);
5746 /* The only non-offsettable memory we handle is push. */
5747 if (push_operand (operands[0], VOIDmode))
5749 else if (GET_CODE (operands[0]) == MEM
5750 && ! offsettable_memref_p (operands[0]))
5753 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5754 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5756 /* When emitting push, take care for source operands on the stack. */
5757 if (push && GET_CODE (operands[1]) == MEM
5758 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5761 part[1][1] = part[1][2];
5762 part[1][0] = part[1][1];
5765 /* We need to do copy in the right order in case an address register
5766 of the source overlaps the destination. */
5767 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5769 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5771 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5774 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5777 /* Collision in the middle part can be handled by reordering. */
5778 if (collisions == 1 && size == 3
5779 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5782 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5783 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5786 /* If there are more collisions, we can't handle it by reordering.
5787 Do an lea to the last part and use only one colliding move. */
5788 else if (collisions > 1)
5791 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5792 XEXP (part[1][0], 0)));
5793 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5794 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5796 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5804 /* We use only first 12 bytes of TFmode value, but for pushing we
5805 are required to adjust stack as if we were pushing real 16byte
5807 if (GET_MODE (operands1[0]) == TFmode)
5808 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
5810 emit_insn (gen_push (part[1][2]));
5812 emit_insn (gen_push (part[1][1]));
5813 emit_insn (gen_push (part[1][0]));
5817 /* Choose correct order to not overwrite the source before it is copied. */
5818 if ((REG_P (part[0][0])
5819 && REG_P (part[1][1])
5820 && (REGNO (part[0][0]) == REGNO (part[1][1])
5822 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5824 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5828 operands1[2] = part[0][2];
5829 operands1[3] = part[0][1];
5830 operands1[4] = part[0][0];
5831 operands1[5] = part[1][2];
5832 operands1[6] = part[1][1];
5833 operands1[7] = part[1][0];
5837 operands1[2] = part[0][1];
5838 operands1[3] = part[0][0];
5839 operands1[5] = part[1][1];
5840 operands1[6] = part[1][0];
5847 operands1[2] = part[0][0];
5848 operands1[3] = part[0][1];
5849 operands1[4] = part[0][2];
5850 operands1[5] = part[1][0];
5851 operands1[6] = part[1][1];
5852 operands1[7] = part[1][2];
5856 operands1[2] = part[0][0];
5857 operands1[3] = part[0][1];
5858 operands1[5] = part[1][0];
5859 operands1[6] = part[1][1];
5867 ix86_split_ashldi (operands, scratch)
5868 rtx *operands, scratch;
5870 rtx low[2], high[2];
5873 if (GET_CODE (operands[2]) == CONST_INT)
5875 split_di (operands, 2, low, high);
5876 count = INTVAL (operands[2]) & 63;
5880 emit_move_insn (high[0], low[1]);
5881 emit_move_insn (low[0], const0_rtx);
5884 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5888 if (!rtx_equal_p (operands[0], operands[1]))
5889 emit_move_insn (operands[0], operands[1]);
5890 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5891 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5896 if (!rtx_equal_p (operands[0], operands[1]))
5897 emit_move_insn (operands[0], operands[1]);
5899 split_di (operands, 1, low, high);
5901 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5902 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5904 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5906 if (! no_new_pseudos)
5907 scratch = force_reg (SImode, const0_rtx);
5909 emit_move_insn (scratch, const0_rtx);
5911 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5915 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5920 ix86_split_ashrdi (operands, scratch)
5921 rtx *operands, scratch;
5923 rtx low[2], high[2];
5926 if (GET_CODE (operands[2]) == CONST_INT)
5928 split_di (operands, 2, low, high);
5929 count = INTVAL (operands[2]) & 63;
5933 emit_move_insn (low[0], high[1]);
5935 if (! reload_completed)
5936 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5939 emit_move_insn (high[0], low[0]);
5940 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5944 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5948 if (!rtx_equal_p (operands[0], operands[1]))
5949 emit_move_insn (operands[0], operands[1]);
5950 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5951 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5956 if (!rtx_equal_p (operands[0], operands[1]))
5957 emit_move_insn (operands[0], operands[1]);
5959 split_di (operands, 1, low, high);
5961 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5962 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5964 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5966 if (! no_new_pseudos)
5967 scratch = gen_reg_rtx (SImode);
5968 emit_move_insn (scratch, high[0]);
5969 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5970 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5974 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5979 ix86_split_lshrdi (operands, scratch)
5980 rtx *operands, scratch;
5982 rtx low[2], high[2];
5985 if (GET_CODE (operands[2]) == CONST_INT)
5987 split_di (operands, 2, low, high);
5988 count = INTVAL (operands[2]) & 63;
5992 emit_move_insn (low[0], high[1]);
5993 emit_move_insn (high[0], const0_rtx);
5996 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6000 if (!rtx_equal_p (operands[0], operands[1]))
6001 emit_move_insn (operands[0], operands[1]);
6002 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6003 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6008 if (!rtx_equal_p (operands[0], operands[1]))
6009 emit_move_insn (operands[0], operands[1]);
6011 split_di (operands, 1, low, high);
6013 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6014 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6016 /* Heh. By reversing the arguments, we can reuse this pattern. */
6017 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6019 if (! no_new_pseudos)
6020 scratch = force_reg (SImode, const0_rtx);
6022 emit_move_insn (scratch, const0_rtx);
6024 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6028 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6032 /* Expand the appropriate insns for doing strlen if not just doing
6035 out = result, initialized with the start address
6036 align_rtx = alignment of the address.
6037 scratch = scratch register, initialized with the startaddress when
6038 not aligned, otherwise undefined
6040 This is just the body. It needs the initialisations mentioned above and
6041 some address computing at the end. These things are done in i386.md. */
6044 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6045 rtx out, align_rtx, scratch;
6049 rtx align_2_label = NULL_RTX;
6050 rtx align_3_label = NULL_RTX;
6051 rtx align_4_label = gen_label_rtx ();
6052 rtx end_0_label = gen_label_rtx ();
6054 rtx tmpreg = gen_reg_rtx (SImode);
6057 if (GET_CODE (align_rtx) == CONST_INT)
6058 align = INTVAL (align_rtx);
6060 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6062 /* Is there a known alignment and is it less than 4? */
6065 /* Is there a known alignment and is it not 2? */
6068 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6069 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6071 /* Leave just the 3 lower bits. */
6072 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6073 NULL_RTX, 0, OPTAB_WIDEN);
6075 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6076 SImode, 1, 0, align_4_label);
6077 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6078 SImode, 1, 0, align_2_label);
6079 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6080 SImode, 1, 0, align_3_label);
6084 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6085 check if is aligned to 4 - byte. */
6087 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6088 NULL_RTX, 0, OPTAB_WIDEN);
6090 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6091 SImode, 1, 0, align_4_label);
6094 mem = gen_rtx_MEM (QImode, out);
6096 /* Now compare the bytes. */
6098 /* Compare the first n unaligned byte on a byte per byte basis. */
6099 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6100 QImode, 1, 0, end_0_label);
6102 /* Increment the address. */
6103 emit_insn (gen_addsi3 (out, out, const1_rtx));
6105 /* Not needed with an alignment of 2 */
6108 emit_label (align_2_label);
6110 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6111 QImode, 1, 0, end_0_label);
6113 emit_insn (gen_addsi3 (out, out, const1_rtx));
6115 emit_label (align_3_label);
6118 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6119 QImode, 1, 0, end_0_label);
6121 emit_insn (gen_addsi3 (out, out, const1_rtx));
6124 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6125 align this loop. It gives only huge programs, but does not help to
6127 emit_label (align_4_label);
6129 mem = gen_rtx_MEM (SImode, out);
6130 emit_move_insn (scratch, mem);
6131 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6133 /* This formula yields a nonzero result iff one of the bytes is zero.
6134 This saves three branches inside loop and many cycles. */
6136 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6137 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6138 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6139 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6140 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6141 SImode, 1, 0, align_4_label);
6145 rtx reg = gen_reg_rtx (SImode);
6146 emit_move_insn (reg, tmpreg);
6147 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6149 /* If zero is not in the first two bytes, move two bytes forward. */
6150 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6151 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6152 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6153 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6154 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6157 /* Emit lea manually to avoid clobbering of flags. */
6158 emit_insn (gen_rtx_SET (SImode, reg,
6159 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6161 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6162 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6163 emit_insn (gen_rtx_SET (VOIDmode, out,
6164 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6171 rtx end_2_label = gen_label_rtx ();
6172 /* Is zero in the first two bytes? */
6174 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6175 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6176 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6177 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6178 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6180 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6181 JUMP_LABEL (tmp) = end_2_label;
6183 /* Not in the first two. Move two bytes forward. */
6184 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6185 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6187 emit_label (end_2_label);
6191 /* Avoid branch in fixing the byte. */
6192 tmpreg = gen_lowpart (QImode, tmpreg);
6193 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6194 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6196 emit_label (end_0_label);
6199 /* Clear stack slot assignments remembered from previous functions.
6200 This is called from INIT_EXPANDERS once before RTL is emitted for each
6204 ix86_init_machine_status (p)
6207 enum machine_mode mode;
6210 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6212 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6213 mode = (enum machine_mode) ((int) mode + 1))
6214 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6215 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6218 /* Mark machine specific bits of P for GC. */
6220 ix86_mark_machine_status (p)
6223 enum machine_mode mode;
6226 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6227 mode = (enum machine_mode) ((int) mode + 1))
6228 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6229 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6232 /* Return a MEM corresponding to a stack slot with mode MODE.
6233 Allocate a new slot if necessary.
6235 The RTL for a function can have several slots available: N is
6236 which slot to use. */
6239 assign_386_stack_local (mode, n)
6240 enum machine_mode mode;
6243 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6246 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6247 ix86_stack_locals[(int) mode][n]
6248 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6250 return ix86_stack_locals[(int) mode][n];
6253 /* Calculate the length of the memory address in the instruction
6254 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6257 memory_address_length (addr)
6260 struct ix86_address parts;
6261 rtx base, index, disp;
6264 if (GET_CODE (addr) == PRE_DEC
6265 || GET_CODE (addr) == POST_INC)
6268 if (! ix86_decompose_address (addr, &parts))
6272 index = parts.index;
6276 /* Register Indirect. */
6277 if (base && !index && !disp)
6279 /* Special cases: ebp and esp need the two-byte modrm form. */
6280 if (addr == stack_pointer_rtx
6281 || addr == arg_pointer_rtx
6282 || addr == frame_pointer_rtx
6283 || addr == hard_frame_pointer_rtx)
6287 /* Direct Addressing. */
6288 else if (disp && !base && !index)
6293 /* Find the length of the displacement constant. */
6296 if (GET_CODE (disp) == CONST_INT
6297 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6303 /* An index requires the two-byte modrm form. */
6311 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6312 expect that insn have 8bit immediate alternative. */
6314 ix86_attr_length_immediate_default (insn, shortform)
6320 extract_insn_cached (insn);
6321 for (i = recog_data.n_operands - 1; i >= 0; --i)
6322 if (CONSTANT_P (recog_data.operand[i]))
6327 && GET_CODE (recog_data.operand[i]) == CONST_INT
6328 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6332 switch (get_attr_mode (insn))
6344 fatal_insn ("Unknown insn mode", insn);
6350 /* Compute default value for "length_address" attribute. */
6352 ix86_attr_length_address_default (insn)
6356 extract_insn_cached (insn);
6357 for (i = recog_data.n_operands - 1; i >= 0; --i)
6358 if (GET_CODE (recog_data.operand[i]) == MEM)
6360 return memory_address_length (XEXP (recog_data.operand[i], 0));
6366 /* Return the maximum number of instructions a cpu can issue. */
6373 case PROCESSOR_PENTIUM:
6377 case PROCESSOR_PENTIUMPRO:
6385 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6386 by DEP_INSN and nothing set by DEP_INSN. */
6389 ix86_flags_dependant (insn, dep_insn, insn_type)
6391 enum attr_type insn_type;
6395 /* Simplify the test for uninteresting insns. */
6396 if (insn_type != TYPE_SETCC
6397 && insn_type != TYPE_ICMOV
6398 && insn_type != TYPE_FCMOV
6399 && insn_type != TYPE_IBR)
6402 if ((set = single_set (dep_insn)) != 0)
6404 set = SET_DEST (set);
6407 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6408 && XVECLEN (PATTERN (dep_insn), 0) == 2
6409 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6410 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6412 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6413 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6418 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6421 /* This test is true if the dependant insn reads the flags but
6422 not any other potentially set register. */
6423 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6426 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6432 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6433 address with operands set by DEP_INSN. */
6436 ix86_agi_dependant (insn, dep_insn, insn_type)
6438 enum attr_type insn_type;
6442 if (insn_type == TYPE_LEA)
6444 addr = PATTERN (insn);
6445 if (GET_CODE (addr) == SET)
6447 else if (GET_CODE (addr) == PARALLEL
6448 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6449 addr = XVECEXP (addr, 0, 0);
6452 addr = SET_SRC (addr);
6457 extract_insn_cached (insn);
6458 for (i = recog_data.n_operands - 1; i >= 0; --i)
6459 if (GET_CODE (recog_data.operand[i]) == MEM)
6461 addr = XEXP (recog_data.operand[i], 0);
6468 return modified_in_p (addr, dep_insn);
6472 ix86_adjust_cost (insn, link, dep_insn, cost)
6473 rtx insn, link, dep_insn;
6476 enum attr_type insn_type, dep_insn_type;
6477 enum attr_memory memory;
6479 int dep_insn_code_number;
6481 /* Anti and output depenancies have zero cost on all CPUs. */
6482 if (REG_NOTE_KIND (link) != 0)
6485 dep_insn_code_number = recog_memoized (dep_insn);
6487 /* If we can't recognize the insns, we can't really do anything. */
6488 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6491 insn_type = get_attr_type (insn);
6492 dep_insn_type = get_attr_type (dep_insn);
6494 /* Prologue and epilogue allocators can have a false dependency on ebp.
6495 This results in one cycle extra stall on Pentium prologue scheduling,
6496 so handle this important case manually. */
6497 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6498 && dep_insn_type == TYPE_ALU
6499 && !reg_mentioned_p (stack_pointer_rtx, insn))
6504 case PROCESSOR_PENTIUM:
6505 /* Address Generation Interlock adds a cycle of latency. */
6506 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6509 /* ??? Compares pair with jump/setcc. */
6510 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6513 /* Floating point stores require value to be ready one cycle ealier. */
6514 if (insn_type == TYPE_FMOV
6515 && get_attr_memory (insn) == MEMORY_STORE
6516 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6520 case PROCESSOR_PENTIUMPRO:
6521 /* Since we can't represent delayed latencies of load+operation,
6522 increase the cost here for non-imov insns. */
6523 if (dep_insn_type != TYPE_IMOV
6524 && dep_insn_type != TYPE_FMOV
6525 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6526 || memory == MEMORY_BOTH))
6529 /* INT->FP conversion is expensive. */
6530 if (get_attr_fp_int_src (dep_insn))
6533 /* There is one cycle extra latency between an FP op and a store. */
6534 if (insn_type == TYPE_FMOV
6535 && (set = single_set (dep_insn)) != NULL_RTX
6536 && (set2 = single_set (insn)) != NULL_RTX
6537 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6538 && GET_CODE (SET_DEST (set2)) == MEM)
6543 /* The esp dependency is resolved before the instruction is really
6545 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6546 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6549 /* Since we can't represent delayed latencies of load+operation,
6550 increase the cost here for non-imov insns. */
6551 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6552 || memory == MEMORY_BOTH)
6553 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6555 /* INT->FP conversion is expensive. */
6556 if (get_attr_fp_int_src (dep_insn))
6560 case PROCESSOR_ATHLON:
6561 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6562 || memory == MEMORY_BOTH)
6564 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6579 struct ppro_sched_data
6582 int issued_this_cycle;
6587 ix86_safe_length (insn)
6590 if (recog_memoized (insn) >= 0)
6591 return get_attr_length(insn);
6597 ix86_safe_length_prefix (insn)
6600 if (recog_memoized (insn) >= 0)
6601 return get_attr_length(insn);
6606 static enum attr_memory
6607 ix86_safe_memory (insn)
6610 if (recog_memoized (insn) >= 0)
6611 return get_attr_memory(insn);
6613 return MEMORY_UNKNOWN;
6616 static enum attr_pent_pair
6617 ix86_safe_pent_pair (insn)
6620 if (recog_memoized (insn) >= 0)
6621 return get_attr_pent_pair(insn);
6623 return PENT_PAIR_NP;
6626 static enum attr_ppro_uops
6627 ix86_safe_ppro_uops (insn)
6630 if (recog_memoized (insn) >= 0)
6631 return get_attr_ppro_uops (insn);
6633 return PPRO_UOPS_MANY;
6637 ix86_dump_ppro_packet (dump)
6640 if (ix86_sched_data.ppro.decode[0])
6642 fprintf (dump, "PPRO packet: %d",
6643 INSN_UID (ix86_sched_data.ppro.decode[0]));
6644 if (ix86_sched_data.ppro.decode[1])
6645 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6646 if (ix86_sched_data.ppro.decode[2])
6647 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6652 /* We're beginning a new block. Initialize data structures as necessary. */
6655 ix86_sched_init (dump, sched_verbose)
6656 FILE *dump ATTRIBUTE_UNUSED;
6657 int sched_verbose ATTRIBUTE_UNUSED;
6659 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6662 /* Shift INSN to SLOT, and shift everything else down. */
6665 ix86_reorder_insn (insnp, slot)
6672 insnp[0] = insnp[1];
6673 while (++insnp != slot);
6678 /* Find an instruction with given pairability and minimal amount of cycles
6679 lost by the fact that the CPU waits for both pipelines to finish before
6680 reading next instructions. Also take care that both instructions together
6681 can not exceed 7 bytes. */
6684 ix86_pent_find_pair (e_ready, ready, type, first)
6687 enum attr_pent_pair type;
6690 int mincycles, cycles;
6691 enum attr_pent_pair tmp;
6692 enum attr_memory memory;
6693 rtx *insnp, *bestinsnp = NULL;
6695 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6698 memory = ix86_safe_memory (first);
6699 cycles = result_ready_cost (first);
6700 mincycles = INT_MAX;
6702 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6703 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6704 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6706 enum attr_memory second_memory;
6707 int secondcycles, currentcycles;
6709 second_memory = ix86_safe_memory (*insnp);
6710 secondcycles = result_ready_cost (*insnp);
6711 currentcycles = abs (cycles - secondcycles);
6713 if (secondcycles >= 1 && cycles >= 1)
6715 /* Two read/modify/write instructions together takes two
6717 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6720 /* Read modify/write instruction followed by read/modify
6721 takes one cycle longer. */
6722 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6723 && tmp != PENT_PAIR_UV
6724 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6727 if (currentcycles < mincycles)
6728 bestinsnp = insnp, mincycles = currentcycles;
6734 /* Subroutines of ix86_sched_reorder. */
6737 ix86_sched_reorder_pentium (ready, e_ready)
6741 enum attr_pent_pair pair1, pair2;
6744 /* This wouldn't be necessary if Haifa knew that static insn ordering
6745 is important to which pipe an insn is issued to. So we have to make
6746 some minor rearrangements. */
6748 pair1 = ix86_safe_pent_pair (*e_ready);
6750 /* If the first insn is non-pairable, let it be. */
6751 if (pair1 == PENT_PAIR_NP)
6754 pair2 = PENT_PAIR_NP;
6757 /* If the first insn is UV or PV pairable, search for a PU
6759 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6761 insnp = ix86_pent_find_pair (e_ready-1, ready,
6762 PENT_PAIR_PU, *e_ready);
6764 pair2 = PENT_PAIR_PU;
6767 /* If the first insn is PU or UV pairable, search for a PV
6769 if (pair2 == PENT_PAIR_NP
6770 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6772 insnp = ix86_pent_find_pair (e_ready-1, ready,
6773 PENT_PAIR_PV, *e_ready);
6775 pair2 = PENT_PAIR_PV;
6778 /* If the first insn is pairable, search for a UV
6780 if (pair2 == PENT_PAIR_NP)
6782 insnp = ix86_pent_find_pair (e_ready-1, ready,
6783 PENT_PAIR_UV, *e_ready);
6785 pair2 = PENT_PAIR_UV;
6788 if (pair2 == PENT_PAIR_NP)
6791 /* Found something! Decide if we need to swap the order. */
6792 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6793 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6794 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6795 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6796 ix86_reorder_insn (insnp, e_ready);
6798 ix86_reorder_insn (insnp, e_ready - 1);
6802 ix86_sched_reorder_ppro (ready, e_ready)
6807 enum attr_ppro_uops cur_uops;
6808 int issued_this_cycle;
6812 /* At this point .ppro.decode contains the state of the three
6813 decoders from last "cycle". That is, those insns that were
6814 actually independent. But here we're scheduling for the
6815 decoder, and we may find things that are decodable in the
6818 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
6819 issued_this_cycle = 0;
6822 cur_uops = ix86_safe_ppro_uops (*insnp);
6824 /* If the decoders are empty, and we've a complex insn at the
6825 head of the priority queue, let it issue without complaint. */
6826 if (decode[0] == NULL)
6828 if (cur_uops == PPRO_UOPS_MANY)
6834 /* Otherwise, search for a 2-4 uop unsn to issue. */
6835 while (cur_uops != PPRO_UOPS_FEW)
6839 cur_uops = ix86_safe_ppro_uops (*--insnp);
6842 /* If so, move it to the head of the line. */
6843 if (cur_uops == PPRO_UOPS_FEW)
6844 ix86_reorder_insn (insnp, e_ready);
6846 /* Issue the head of the queue. */
6847 issued_this_cycle = 1;
6848 decode[0] = *e_ready--;
6851 /* Look for simple insns to fill in the other two slots. */
6852 for (i = 1; i < 3; ++i)
6853 if (decode[i] == NULL)
6855 if (ready >= e_ready)
6859 cur_uops = ix86_safe_ppro_uops (*insnp);
6860 while (cur_uops != PPRO_UOPS_ONE)
6864 cur_uops = ix86_safe_ppro_uops (*--insnp);
6867 /* Found one. Move it to the head of the queue and issue it. */
6868 if (cur_uops == PPRO_UOPS_ONE)
6870 ix86_reorder_insn (insnp, e_ready);
6871 decode[i] = *e_ready--;
6872 issued_this_cycle++;
6876 /* ??? Didn't find one. Ideally, here we would do a lazy split
6877 of 2-uop insns, issue one and queue the other. */
6881 if (issued_this_cycle == 0)
6882 issued_this_cycle = 1;
6883 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6886 /* We are about to being issuing insns for this clock cycle.
6887 Override the default sort algorithm to better slot instructions. */
6889 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6890 FILE *dump ATTRIBUTE_UNUSED;
6891 int sched_verbose ATTRIBUTE_UNUSED;
6894 int clock_var ATTRIBUTE_UNUSED;
6896 rtx *e_ready = ready + n_ready - 1;
6906 case PROCESSOR_PENTIUM:
6907 ix86_sched_reorder_pentium (ready, e_ready);
6910 case PROCESSOR_PENTIUMPRO:
6911 ix86_sched_reorder_ppro (ready, e_ready);
6916 return ix86_issue_rate ();
6919 /* We are about to issue INSN. Return the number of insns left on the
6920 ready queue that can be issued this cycle. */
6923 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6933 return can_issue_more - 1;
6935 case PROCESSOR_PENTIUMPRO:
6937 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6939 if (uops == PPRO_UOPS_MANY)
6942 ix86_dump_ppro_packet (dump);
6943 ix86_sched_data.ppro.decode[0] = insn;
6944 ix86_sched_data.ppro.decode[1] = NULL;
6945 ix86_sched_data.ppro.decode[2] = NULL;
6947 ix86_dump_ppro_packet (dump);
6948 ix86_sched_data.ppro.decode[0] = NULL;
6950 else if (uops == PPRO_UOPS_FEW)
6953 ix86_dump_ppro_packet (dump);
6954 ix86_sched_data.ppro.decode[0] = insn;
6955 ix86_sched_data.ppro.decode[1] = NULL;
6956 ix86_sched_data.ppro.decode[2] = NULL;
6960 for (i = 0; i < 3; ++i)
6961 if (ix86_sched_data.ppro.decode[i] == NULL)
6963 ix86_sched_data.ppro.decode[i] = insn;
6971 ix86_dump_ppro_packet (dump);
6972 ix86_sched_data.ppro.decode[0] = NULL;
6973 ix86_sched_data.ppro.decode[1] = NULL;
6974 ix86_sched_data.ppro.decode[2] = NULL;
6978 return --ix86_sched_data.ppro.issued_this_cycle;
6982 /* Compute the alignment given to a constant that is being placed in memory.
6983 EXP is the constant and ALIGN is the alignment that the object would
6985 The value of this function is used instead of that alignment to align
6989 ix86_constant_alignment (exp, align)
6993 if (TREE_CODE (exp) == REAL_CST)
6995 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6997 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7000 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7007 /* Compute the alignment for a static variable.
7008 TYPE is the data type, and ALIGN is the alignment that
7009 the object would ordinarily have. The value of this function is used
7010 instead of that alignment to align the object. */
7013 ix86_data_alignment (type, align)
7017 if (AGGREGATE_TYPE_P (type)
7019 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7020 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7021 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7024 if (TREE_CODE (type) == ARRAY_TYPE)
7026 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7028 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7031 else if (TREE_CODE (type) == COMPLEX_TYPE)
7034 if (TYPE_MODE (type) == DCmode && align < 64)
7036 if (TYPE_MODE (type) == XCmode && align < 128)
7039 else if ((TREE_CODE (type) == RECORD_TYPE
7040 || TREE_CODE (type) == UNION_TYPE
7041 || TREE_CODE (type) == QUAL_UNION_TYPE)
7042 && TYPE_FIELDS (type))
7044 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7046 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7049 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7050 || TREE_CODE (type) == INTEGER_TYPE)
7052 if (TYPE_MODE (type) == DFmode && align < 64)
7054 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7061 /* Compute the alignment for a local variable.
7062 TYPE is the data type, and ALIGN is the alignment that
7063 the object would ordinarily have. The value of this macro is used
7064 instead of that alignment to align the object. */
7067 ix86_local_alignment (type, align)
7071 if (TREE_CODE (type) == ARRAY_TYPE)
7073 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7075 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7078 else if (TREE_CODE (type) == COMPLEX_TYPE)
7080 if (TYPE_MODE (type) == DCmode && align < 64)
7082 if (TYPE_MODE (type) == XCmode && align < 128)
7085 else if ((TREE_CODE (type) == RECORD_TYPE
7086 || TREE_CODE (type) == UNION_TYPE
7087 || TREE_CODE (type) == QUAL_UNION_TYPE)
7088 && TYPE_FIELDS (type))
7090 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7092 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7095 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7096 || TREE_CODE (type) == INTEGER_TYPE)
7099 if (TYPE_MODE (type) == DFmode && align < 64)
7101 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7107 #define def_builtin(NAME, TYPE, CODE) \
7108 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7109 struct builtin_description
7111 enum insn_code icode;
7113 enum ix86_builtins code;
7114 enum rtx_code comparison;
7118 static struct builtin_description bdesc_comi[] =
7120 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7121 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7122 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7123 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7124 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7125 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7126 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7127 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7128 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7129 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7130 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7131 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7134 static struct builtin_description bdesc_2arg[] =
7137 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7138 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7139 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7140 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7141 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7142 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7143 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7144 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7146 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7147 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7148 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7149 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7150 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7151 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7152 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7153 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7154 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7155 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7156 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7157 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7158 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7159 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7160 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7161 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7162 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7163 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7164 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7165 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7166 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7167 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7168 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7169 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7171 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7172 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7173 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7174 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7176 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7177 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7178 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7179 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7181 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7182 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7183 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7184 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7185 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7188 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7189 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7190 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7191 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7192 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7193 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7195 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7196 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7197 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7198 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7199 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7200 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7201 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7202 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7204 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7205 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7206 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7208 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7209 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7210 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7211 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7213 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7214 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7216 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7217 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7218 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7219 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7220 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7221 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7223 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7224 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7225 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7226 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7228 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7229 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7230 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7231 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7232 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7233 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7236 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7237 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7238 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7240 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7241 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7243 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7244 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7245 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7246 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7247 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7248 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7250 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7251 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7252 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7253 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7254 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7255 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7257 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7258 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7259 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7260 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7262 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7263 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7267 static struct builtin_description bdesc_1arg[] =
7269 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7270 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7272 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7273 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7274 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7276 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7277 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7278 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7279 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7283 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7284 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7287 ix86_init_builtins ()
7289 struct builtin_description * d;
7291 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
7293 tree pchar_type_node = build_pointer_type (char_type_node);
7294 tree pfloat_type_node = build_pointer_type (float_type_node);
7295 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7296 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7299 tree int_ftype_v4sf_v4sf
7300 = build_function_type (integer_type_node,
7301 tree_cons (NULL_TREE, V4SF_type_node,
7302 tree_cons (NULL_TREE,
7305 tree v4si_ftype_v4sf_v4sf
7306 = build_function_type (V4SI_type_node,
7307 tree_cons (NULL_TREE, V4SF_type_node,
7308 tree_cons (NULL_TREE,
7311 /* MMX/SSE/integer conversions. */
7312 tree int_ftype_v4sf_int
7313 = build_function_type (integer_type_node,
7314 tree_cons (NULL_TREE, V4SF_type_node,
7315 tree_cons (NULL_TREE,
7319 = build_function_type (integer_type_node,
7320 tree_cons (NULL_TREE, V4SF_type_node,
7323 = build_function_type (integer_type_node,
7324 tree_cons (NULL_TREE, V8QI_type_node,
7327 = build_function_type (integer_type_node,
7328 tree_cons (NULL_TREE, V2SI_type_node,
7331 = build_function_type (V2SI_type_node,
7332 tree_cons (NULL_TREE, integer_type_node,
7334 tree v4sf_ftype_v4sf_int
7335 = build_function_type (integer_type_node,
7336 tree_cons (NULL_TREE, V4SF_type_node,
7337 tree_cons (NULL_TREE, integer_type_node,
7339 tree v4sf_ftype_v4sf_v2si
7340 = build_function_type (V4SF_type_node,
7341 tree_cons (NULL_TREE, V4SF_type_node,
7342 tree_cons (NULL_TREE, V2SI_type_node,
7344 tree int_ftype_v4hi_int
7345 = build_function_type (integer_type_node,
7346 tree_cons (NULL_TREE, V4HI_type_node,
7347 tree_cons (NULL_TREE, integer_type_node,
7349 tree v4hi_ftype_v4hi_int_int
7350 = build_function_type (V4HI_type_node,
7351 tree_cons (NULL_TREE, V4HI_type_node,
7352 tree_cons (NULL_TREE, integer_type_node,
7353 tree_cons (NULL_TREE,
7356 /* Miscellaneous. */
7357 tree v8qi_ftype_v4hi_v4hi
7358 = build_function_type (V8QI_type_node,
7359 tree_cons (NULL_TREE, V4HI_type_node,
7360 tree_cons (NULL_TREE, V4HI_type_node,
7362 tree v4hi_ftype_v2si_v2si
7363 = build_function_type (V4HI_type_node,
7364 tree_cons (NULL_TREE, V2SI_type_node,
7365 tree_cons (NULL_TREE, V2SI_type_node,
7367 tree v4sf_ftype_v4sf_v4sf_int
7368 = build_function_type (V4SF_type_node,
7369 tree_cons (NULL_TREE, V4SF_type_node,
7370 tree_cons (NULL_TREE, V4SF_type_node,
7371 tree_cons (NULL_TREE,
7374 tree v4hi_ftype_v8qi_v8qi
7375 = build_function_type (V4HI_type_node,
7376 tree_cons (NULL_TREE, V8QI_type_node,
7377 tree_cons (NULL_TREE, V8QI_type_node,
7379 tree v2si_ftype_v4hi_v4hi
7380 = build_function_type (V2SI_type_node,
7381 tree_cons (NULL_TREE, V4HI_type_node,
7382 tree_cons (NULL_TREE, V4HI_type_node,
7384 tree v4hi_ftype_v4hi_int
7385 = build_function_type (V4HI_type_node,
7386 tree_cons (NULL_TREE, V4HI_type_node,
7387 tree_cons (NULL_TREE, integer_type_node,
7389 tree di_ftype_di_int
7390 = build_function_type (long_long_unsigned_type_node,
7391 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7392 tree_cons (NULL_TREE, integer_type_node,
7394 tree v8qi_ftype_v8qi_di
7395 = build_function_type (V8QI_type_node,
7396 tree_cons (NULL_TREE, V8QI_type_node,
7397 tree_cons (NULL_TREE,
7398 long_long_integer_type_node,
7400 tree v4hi_ftype_v4hi_di
7401 = build_function_type (V4HI_type_node,
7402 tree_cons (NULL_TREE, V4HI_type_node,
7403 tree_cons (NULL_TREE,
7404 long_long_integer_type_node,
7406 tree v2si_ftype_v2si_di
7407 = build_function_type (V2SI_type_node,
7408 tree_cons (NULL_TREE, V2SI_type_node,
7409 tree_cons (NULL_TREE,
7410 long_long_integer_type_node,
7412 tree void_ftype_void
7413 = build_function_type (void_type_node, endlink);
7414 tree void_ftype_pchar_int
7415 = build_function_type (void_type_node,
7416 tree_cons (NULL_TREE, pchar_type_node,
7417 tree_cons (NULL_TREE, integer_type_node,
7419 tree void_ftype_unsigned
7420 = build_function_type (void_type_node,
7421 tree_cons (NULL_TREE, unsigned_type_node,
7423 tree unsigned_ftype_void
7424 = build_function_type (unsigned_type_node, endlink);
7426 = build_function_type (long_long_unsigned_type_node, endlink);
7428 = build_function_type (intTI_type_node, endlink);
7429 tree v2si_ftype_v4sf
7430 = build_function_type (V2SI_type_node,
7431 tree_cons (NULL_TREE, V4SF_type_node,
7434 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7435 tree_cons (NULL_TREE, V8QI_type_node,
7436 tree_cons (NULL_TREE,
7439 tree void_ftype_v8qi_v8qi_pchar
7440 = build_function_type (void_type_node, maskmovq_args);
7441 tree v4sf_ftype_pfloat
7442 = build_function_type (V4SF_type_node,
7443 tree_cons (NULL_TREE, pfloat_type_node,
7445 tree v4sf_ftype_float
7446 = build_function_type (V4SF_type_node,
7447 tree_cons (NULL_TREE, float_type_node,
7449 tree v4sf_ftype_float_float_float_float
7450 = build_function_type (V4SF_type_node,
7451 tree_cons (NULL_TREE, float_type_node,
7452 tree_cons (NULL_TREE, float_type_node,
7453 tree_cons (NULL_TREE,
7455 tree_cons (NULL_TREE,
7458 /* @@@ the type is bogus */
7459 tree v4sf_ftype_v4sf_pv2si
7460 = build_function_type (V4SF_type_node,
7461 tree_cons (NULL_TREE, V4SF_type_node,
7462 tree_cons (NULL_TREE, pv2si_type_node,
7464 tree v4sf_ftype_pv2si_v4sf
7465 = build_function_type (V4SF_type_node,
7466 tree_cons (NULL_TREE, V4SF_type_node,
7467 tree_cons (NULL_TREE, pv2si_type_node,
7469 tree void_ftype_pfloat_v4sf
7470 = build_function_type (void_type_node,
7471 tree_cons (NULL_TREE, pfloat_type_node,
7472 tree_cons (NULL_TREE, V4SF_type_node,
7474 tree void_ftype_pdi_di
7475 = build_function_type (void_type_node,
7476 tree_cons (NULL_TREE, pdi_type_node,
7477 tree_cons (NULL_TREE,
7478 long_long_unsigned_type_node,
7480 /* Normal vector unops. */
7481 tree v4sf_ftype_v4sf
7482 = build_function_type (V4SF_type_node,
7483 tree_cons (NULL_TREE, V4SF_type_node,
7486 /* Normal vector binops. */
7487 tree v4sf_ftype_v4sf_v4sf
7488 = build_function_type (V4SF_type_node,
7489 tree_cons (NULL_TREE, V4SF_type_node,
7490 tree_cons (NULL_TREE, V4SF_type_node,
7492 tree v8qi_ftype_v8qi_v8qi
7493 = build_function_type (V8QI_type_node,
7494 tree_cons (NULL_TREE, V8QI_type_node,
7495 tree_cons (NULL_TREE, V8QI_type_node,
7497 tree v4hi_ftype_v4hi_v4hi
7498 = build_function_type (V4HI_type_node,
7499 tree_cons (NULL_TREE, V4HI_type_node,
7500 tree_cons (NULL_TREE, V4HI_type_node,
7502 tree v2si_ftype_v2si_v2si
7503 = build_function_type (V2SI_type_node,
7504 tree_cons (NULL_TREE, V2SI_type_node,
7505 tree_cons (NULL_TREE, V2SI_type_node,
7508 = build_function_type (intTI_type_node,
7509 tree_cons (NULL_TREE, intTI_type_node,
7510 tree_cons (NULL_TREE, intTI_type_node,
7513 = build_function_type (long_long_unsigned_type_node,
7514 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7515 tree_cons (NULL_TREE,
7516 long_long_unsigned_type_node,
7519 /* Add all builtins that are more or less simple operations on two
7521 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7523 /* Use one of the operands; the target can have a different mode for
7524 mask-generating compares. */
7525 enum machine_mode mode;
7530 mode = insn_data[d->icode].operand[1].mode;
7532 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7538 type = v4sf_ftype_v4sf_v4sf;
7541 type = v8qi_ftype_v8qi_v8qi;
7544 type = v4hi_ftype_v4hi_v4hi;
7547 type = v2si_ftype_v2si_v2si;
7550 type = ti_ftype_ti_ti;
7553 type = di_ftype_di_di;
7560 /* Override for comparisons. */
7561 if (d->icode == CODE_FOR_maskcmpv4sf3
7562 || d->icode == CODE_FOR_maskncmpv4sf3
7563 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7564 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7565 type = v4si_ftype_v4sf_v4sf;
7567 def_builtin (d->name, type, d->code);
7570 /* Add the remaining MMX insns with somewhat more complicated types. */
7571 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7572 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7573 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7574 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7575 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7576 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7577 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7578 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7579 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7581 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7582 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7583 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7585 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7586 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7588 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7589 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7591 /* Everything beyond this point is SSE only. */
7595 /* comi/ucomi insns. */
7596 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7597 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7599 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7600 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7601 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7603 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7604 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7605 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7606 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7607 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7608 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7610 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7611 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7613 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7615 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7616 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7617 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7618 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7619 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7620 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7622 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7623 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7624 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7625 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7627 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7628 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7629 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7630 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7632 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7633 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7635 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7637 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7638 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7639 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7640 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7641 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7642 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7644 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7646 /* Composite intrinsics. */
7647 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7648 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7649 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7650 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7651 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7652 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7653 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7656 /* Errors in the source file can cause expand_expr to return const0_rtx
7657 where we expect a vector. To avoid crashing, use one of the vector
7658 clear instructions. */
7660 safe_vector_operand (x, mode)
7662 enum machine_mode mode;
7664 if (x != const0_rtx)
7666 x = gen_reg_rtx (mode);
7668 if (VALID_MMX_REG_MODE (mode))
7669 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7670 : gen_rtx_SUBREG (DImode, x, 0)));
7672 emit_insn (gen_sse_clrti (mode == TImode ? x
7673 : gen_rtx_SUBREG (TImode, x, 0)));
7677 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7680 ix86_expand_binop_builtin (icode, arglist, target)
7681 enum insn_code icode;
7686 tree arg0 = TREE_VALUE (arglist);
7687 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7688 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7689 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7690 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7691 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7692 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7694 if (VECTOR_MODE_P (mode0))
7695 op0 = safe_vector_operand (op0, mode0);
7696 if (VECTOR_MODE_P (mode1))
7697 op1 = safe_vector_operand (op1, mode1);
7700 || GET_MODE (target) != tmode
7701 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7702 target = gen_reg_rtx (tmode);
7704 /* In case the insn wants input operands in modes different from
7705 the result, abort. */
7706 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
7709 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7710 op0 = copy_to_mode_reg (mode0, op0);
7711 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7712 op1 = copy_to_mode_reg (mode1, op1);
7714 pat = GEN_FCN (icode) (target, op0, op1);
7721 /* Subroutine of ix86_expand_builtin to take care of stores. */
7724 ix86_expand_store_builtin (icode, arglist, shuffle)
7725 enum insn_code icode;
7730 tree arg0 = TREE_VALUE (arglist);
7731 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7732 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7733 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7734 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
7735 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
7737 if (VECTOR_MODE_P (mode1))
7738 op1 = safe_vector_operand (op1, mode1);
7740 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7741 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
7742 op1 = copy_to_mode_reg (mode1, op1);
7744 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
7745 pat = GEN_FCN (icode) (op0, op1);
7751 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
7754 ix86_expand_unop_builtin (icode, arglist, target, do_load)
7755 enum insn_code icode;
7761 tree arg0 = TREE_VALUE (arglist);
7762 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7763 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7764 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7767 || GET_MODE (target) != tmode
7768 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7769 target = gen_reg_rtx (tmode);
7771 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7774 if (VECTOR_MODE_P (mode0))
7775 op0 = safe_vector_operand (op0, mode0);
7777 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7778 op0 = copy_to_mode_reg (mode0, op0);
7781 pat = GEN_FCN (icode) (target, op0);
7788 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
7789 sqrtss, rsqrtss, rcpss. */
7792 ix86_expand_unop1_builtin (icode, arglist, target)
7793 enum insn_code icode;
7798 tree arg0 = TREE_VALUE (arglist);
7799 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7800 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7801 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7804 || GET_MODE (target) != tmode
7805 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7806 target = gen_reg_rtx (tmode);
7808 if (VECTOR_MODE_P (mode0))
7809 op0 = safe_vector_operand (op0, mode0);
7811 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7812 op0 = copy_to_mode_reg (mode0, op0);
7814 pat = GEN_FCN (icode) (target, op0, op0);
7821 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
7824 ix86_expand_sse_compare (d, arglist, target)
7825 struct builtin_description *d;
7830 tree arg0 = TREE_VALUE (arglist);
7831 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7832 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7833 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7835 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
7836 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
7837 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
7838 enum rtx_code comparison = d->comparison;
7840 if (VECTOR_MODE_P (mode0))
7841 op0 = safe_vector_operand (op0, mode0);
7842 if (VECTOR_MODE_P (mode1))
7843 op1 = safe_vector_operand (op1, mode1);
7845 /* Swap operands if we have a comparison that isn't available in
7849 target = gen_reg_rtx (tmode);
7850 emit_move_insn (target, op1);
7853 comparison = swap_condition (comparison);
7856 || GET_MODE (target) != tmode
7857 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
7858 target = gen_reg_rtx (tmode);
7860 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
7861 op0 = copy_to_mode_reg (mode0, op0);
7862 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
7863 op1 = copy_to_mode_reg (mode1, op1);
7865 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7866 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
7873 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
7876 ix86_expand_sse_comi (d, arglist, target)
7877 struct builtin_description *d;
7882 tree arg0 = TREE_VALUE (arglist);
7883 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7884 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7885 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7887 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
7888 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
7889 enum rtx_code comparison = d->comparison;
7891 if (VECTOR_MODE_P (mode0))
7892 op0 = safe_vector_operand (op0, mode0);
7893 if (VECTOR_MODE_P (mode1))
7894 op1 = safe_vector_operand (op1, mode1);
7896 /* Swap operands if we have a comparison that isn't available in
7903 comparison = swap_condition (comparison);
7906 target = gen_reg_rtx (SImode);
7907 emit_move_insn (target, const0_rtx);
7908 target = gen_rtx_SUBREG (QImode, target, 0);
7910 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
7911 op0 = copy_to_mode_reg (mode0, op0);
7912 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
7913 op1 = copy_to_mode_reg (mode1, op1);
7915 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7916 pat = GEN_FCN (d->icode) (op0, op1, op2);
7920 emit_insn (gen_setcc_2 (target, op2));
7925 /* Expand an expression EXP that calls a built-in function,
7926 with result going to TARGET if that's convenient
7927 (and in mode MODE if that's convenient).
7928 SUBTARGET may be used as the target for computing one of EXP's operands.
7929 IGNORE is nonzero if the value is to be ignored. */
7932 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
7935 rtx subtarget ATTRIBUTE_UNUSED;
7936 enum machine_mode mode ATTRIBUTE_UNUSED;
7937 int ignore ATTRIBUTE_UNUSED;
7939 struct builtin_description *d;
7941 enum insn_code icode;
7942 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7943 tree arglist = TREE_OPERAND (exp, 1);
7944 tree arg0, arg1, arg2, arg3;
7945 rtx op0, op1, op2, pat;
7946 enum machine_mode tmode, mode0, mode1, mode2;
7947 int fcode = DECL_FUNCTION_CODE (fndecl);
7951 case IX86_BUILTIN_EMMS:
7952 emit_insn (gen_emms ());
7955 case IX86_BUILTIN_SFENCE:
7956 emit_insn (gen_sfence ());
7959 case IX86_BUILTIN_M_FROM_INT:
7960 target = gen_reg_rtx (DImode);
7961 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7962 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
7965 case IX86_BUILTIN_M_TO_INT:
7966 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7967 op0 = copy_to_mode_reg (DImode, op0);
7968 target = gen_reg_rtx (SImode);
7969 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
7972 case IX86_BUILTIN_PEXTRW:
7973 icode = CODE_FOR_mmx_pextrw;
7974 arg0 = TREE_VALUE (arglist);
7975 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7976 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7977 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7978 tmode = insn_data[icode].operand[0].mode;
7979 mode0 = insn_data[icode].operand[1].mode;
7980 mode1 = insn_data[icode].operand[2].mode;
7982 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7983 op0 = copy_to_mode_reg (mode0, op0);
7984 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7986 /* @@@ better error message */
7987 error ("selector must be an immediate");
7991 || GET_MODE (target) != tmode
7992 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7993 target = gen_reg_rtx (tmode);
7994 pat = GEN_FCN (icode) (target, op0, op1);
8000 case IX86_BUILTIN_PINSRW:
8001 icode = CODE_FOR_mmx_pinsrw;
8002 arg0 = TREE_VALUE (arglist);
8003 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8004 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8005 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8006 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8007 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8008 tmode = insn_data[icode].operand[0].mode;
8009 mode0 = insn_data[icode].operand[1].mode;
8010 mode1 = insn_data[icode].operand[2].mode;
8011 mode2 = insn_data[icode].operand[3].mode;
8013 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8014 op0 = copy_to_mode_reg (mode0, op0);
8015 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8016 op1 = copy_to_mode_reg (mode1, op1);
8017 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8019 /* @@@ better error message */
8020 error ("selector must be an immediate");
8024 || GET_MODE (target) != tmode
8025 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8026 target = gen_reg_rtx (tmode);
8027 pat = GEN_FCN (icode) (target, op0, op1, op2);
8033 case IX86_BUILTIN_MASKMOVQ:
8034 icode = CODE_FOR_mmx_maskmovq;
8035 /* Note the arg order is different from the operand order. */
8036 arg1 = TREE_VALUE (arglist);
8037 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8038 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8039 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8040 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8041 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8042 mode0 = insn_data[icode].operand[0].mode;
8043 mode1 = insn_data[icode].operand[1].mode;
8044 mode2 = insn_data[icode].operand[2].mode;
8046 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8047 op0 = copy_to_mode_reg (mode0, op0);
8048 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8049 op1 = copy_to_mode_reg (mode1, op1);
8050 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8051 op2 = copy_to_mode_reg (mode2, op2);
8052 pat = GEN_FCN (icode) (op0, op1, op2);
8058 case IX86_BUILTIN_SQRTSS:
8059 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8060 case IX86_BUILTIN_RSQRTSS:
8061 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8062 case IX86_BUILTIN_RCPSS:
8063 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8065 case IX86_BUILTIN_LOADAPS:
8066 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8068 case IX86_BUILTIN_LOADUPS:
8069 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8071 case IX86_BUILTIN_STOREAPS:
8072 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8073 case IX86_BUILTIN_STOREUPS:
8074 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8076 case IX86_BUILTIN_LOADSS:
8077 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8079 case IX86_BUILTIN_STORESS:
8080 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8082 case IX86_BUILTIN_LOADHPS:
8083 case IX86_BUILTIN_LOADLPS:
8084 icode = (fcode == IX86_BUILTIN_LOADHPS
8085 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8086 arg0 = TREE_VALUE (arglist);
8087 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8088 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8089 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8090 tmode = insn_data[icode].operand[0].mode;
8091 mode0 = insn_data[icode].operand[1].mode;
8092 mode1 = insn_data[icode].operand[2].mode;
8094 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8095 op0 = copy_to_mode_reg (mode0, op0);
8096 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8098 || GET_MODE (target) != tmode
8099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8100 target = gen_reg_rtx (tmode);
8101 pat = GEN_FCN (icode) (target, op0, op1);
8107 case IX86_BUILTIN_STOREHPS:
8108 case IX86_BUILTIN_STORELPS:
8109 icode = (fcode == IX86_BUILTIN_STOREHPS
8110 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8111 arg0 = TREE_VALUE (arglist);
8112 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8113 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8114 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8115 mode0 = insn_data[icode].operand[1].mode;
8116 mode1 = insn_data[icode].operand[2].mode;
8118 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8119 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8120 op1 = copy_to_mode_reg (mode1, op1);
8122 pat = GEN_FCN (icode) (op0, op0, op1);
8128 case IX86_BUILTIN_MOVNTPS:
8129 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8130 case IX86_BUILTIN_MOVNTQ:
8131 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8133 case IX86_BUILTIN_LDMXCSR:
8134 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8135 target = assign_386_stack_local (SImode, 0);
8136 emit_move_insn (target, op0);
8137 emit_insn (gen_ldmxcsr (target));
8140 case IX86_BUILTIN_STMXCSR:
8141 target = assign_386_stack_local (SImode, 0);
8142 emit_insn (gen_stmxcsr (target));
8143 return copy_to_mode_reg (SImode, target);
8145 case IX86_BUILTIN_PREFETCH:
8146 icode = CODE_FOR_prefetch;
8147 arg0 = TREE_VALUE (arglist);
8148 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8149 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8150 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8151 mode0 = insn_data[icode].operand[0].mode;
8152 mode1 = insn_data[icode].operand[1].mode;
8154 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8156 /* @@@ better error message */
8157 error ("selector must be an immediate");
8161 op0 = copy_to_mode_reg (Pmode, op0);
8162 pat = GEN_FCN (icode) (op0, op1);
8168 case IX86_BUILTIN_SHUFPS:
8169 icode = CODE_FOR_sse_shufps;
8170 arg0 = TREE_VALUE (arglist);
8171 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8172 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8173 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8174 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8175 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8176 tmode = insn_data[icode].operand[0].mode;
8177 mode0 = insn_data[icode].operand[1].mode;
8178 mode1 = insn_data[icode].operand[2].mode;
8179 mode2 = insn_data[icode].operand[3].mode;
8181 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8182 op0 = copy_to_mode_reg (mode0, op0);
8183 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8184 op1 = copy_to_mode_reg (mode1, op1);
8185 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8187 /* @@@ better error message */
8188 error ("mask must be an immediate");
8192 || GET_MODE (target) != tmode
8193 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8194 target = gen_reg_rtx (tmode);
8195 pat = GEN_FCN (icode) (target, op0, op1, op2);
8201 case IX86_BUILTIN_PSHUFW:
8202 icode = CODE_FOR_mmx_pshufw;
8203 arg0 = TREE_VALUE (arglist);
8204 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8205 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8206 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8207 tmode = insn_data[icode].operand[0].mode;
8208 mode0 = insn_data[icode].operand[2].mode;
8209 mode1 = insn_data[icode].operand[3].mode;
8211 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8212 op0 = copy_to_mode_reg (mode0, op0);
8213 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8215 /* @@@ better error message */
8216 error ("mask must be an immediate");
8220 || GET_MODE (target) != tmode
8221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8222 target = gen_reg_rtx (tmode);
8223 pat = GEN_FCN (icode) (target, target, op0, op1);
8229 /* Composite intrinsics. */
8230 case IX86_BUILTIN_SETPS1:
8231 target = assign_386_stack_local (SFmode, 0);
8232 arg0 = TREE_VALUE (arglist);
8233 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8234 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8235 op0 = gen_reg_rtx (V4SFmode);
8236 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8237 XEXP (target, 0))));
8238 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8241 case IX86_BUILTIN_SETPS:
8242 target = assign_386_stack_local (V4SFmode, 0);
8243 op0 = change_address (target, SFmode, XEXP (target, 0));
8244 arg0 = TREE_VALUE (arglist);
8245 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8246 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8247 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8248 emit_move_insn (op0,
8249 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8250 emit_move_insn (adj_offsettable_operand (op0, 4),
8251 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8252 emit_move_insn (adj_offsettable_operand (op0, 8),
8253 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8254 emit_move_insn (adj_offsettable_operand (op0, 12),
8255 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8256 op0 = gen_reg_rtx (V4SFmode);
8257 emit_insn (gen_sse_movaps (op0, target));
8260 case IX86_BUILTIN_CLRPS:
8261 target = gen_reg_rtx (TImode);
8262 emit_insn (gen_sse_clrti (target));
8265 case IX86_BUILTIN_LOADRPS:
8266 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8267 gen_reg_rtx (V4SFmode), 1);
8268 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8271 case IX86_BUILTIN_LOADPS1:
8272 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8273 gen_reg_rtx (V4SFmode), 1);
8274 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8277 case IX86_BUILTIN_STOREPS1:
8278 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8279 case IX86_BUILTIN_STORERPS:
8280 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8282 case IX86_BUILTIN_MMX_ZERO:
8283 target = gen_reg_rtx (DImode);
8284 emit_insn (gen_mmx_clrdi (target));
8291 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8292 if (d->code == fcode)
8294 /* Compares are treated specially. */
8295 if (d->icode == CODE_FOR_maskcmpv4sf3
8296 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8297 || d->icode == CODE_FOR_maskncmpv4sf3
8298 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8299 return ix86_expand_sse_compare (d, arglist, target);
8301 return ix86_expand_binop_builtin (d->icode, arglist, target);
8304 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8305 if (d->code == fcode)
8306 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8308 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8309 if (d->code == fcode)
8310 return ix86_expand_sse_comi (d, arglist, target);
8312 /* @@@ Should really do something sensible here. */
8316 /* Store OPERAND to the memory after reload is completed. This means
8317 that we can't easilly use assign_stack_local. */
8319 ix86_force_to_memory (mode, operand)
8320 enum machine_mode mode;
8323 if (!reload_completed)
8330 split_di (&operand, 1, operands, operands+1);
8332 gen_rtx_SET (VOIDmode,
8333 gen_rtx_MEM (SImode,
8334 gen_rtx_PRE_DEC (Pmode,
8335 stack_pointer_rtx)),
8338 gen_rtx_SET (VOIDmode,
8339 gen_rtx_MEM (SImode,
8340 gen_rtx_PRE_DEC (Pmode,
8341 stack_pointer_rtx)),
8346 /* It is better to store HImodes as SImodes. */
8347 if (!TARGET_PARTIAL_REG_STALL)
8348 operand = gen_lowpart (SImode, operand);
8352 gen_rtx_SET (VOIDmode,
8353 gen_rtx_MEM (GET_MODE (operand),
8354 gen_rtx_PRE_DEC (SImode,
8355 stack_pointer_rtx)),
8361 return gen_rtx_MEM (mode, stack_pointer_rtx);
8364 /* Free operand from the memory. */
8366 ix86_free_from_memory (mode)
8367 enum machine_mode mode;
8369 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8370 to pop or add instruction if registers are available. */
8371 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8372 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8373 GEN_INT (mode == DImode
8375 : mode == HImode && TARGET_PARTIAL_REG_STALL