1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost = {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs *ix86_cost = &pentium_cost;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
188 const int x86_zero_extend_with_and = m_486 | m_PENT;
189 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
190 const int x86_double_with_add = ~m_386;
191 const int x86_use_bit_test = m_386;
192 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
193 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194 const int x86_use_any_reg = m_486;
195 const int x86_cmove = m_PPRO | m_ATHLON;
196 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
198 const int x86_partial_reg_stall = m_PPRO;
199 const int x86_use_loop = m_K6;
200 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
201 const int x86_use_mov0 = m_K6;
202 const int x86_use_cltd = ~(m_PENT | m_K6);
203 const int x86_read_modify_write = ~m_PENT;
204 const int x86_read_modify = ~(m_PENT | m_PPRO);
205 const int x86_split_long_moves = m_PPRO;
206 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
207 const int x86_single_stringop = m_386;
208 const int x86_qimode_math = ~(0);
209 const int x86_promote_qi_regs = 0;
210 const int x86_himode_math = ~(m_PPRO);
211 const int x86_promote_hi_regs = m_PPRO;
212 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214 const int x86_add_esp_4 = m_ATHLON | m_K6;
215 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
216 const int x86_integer_DFmode_moves = ~m_ATHLON;
217 const int x86_partial_reg_dependency = m_ATHLON;
218 const int x86_memory_mismatch_stall = m_ATHLON;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
232 AREG, DREG, CREG, BREG,
234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
248 /* The "default" register map. */
250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def *ix86_compare_op0 = NULL_RTX;
326 struct rtx_def *ix86_compare_op1 = NULL_RTX;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu;
341 /* which instruction set architecture to use. */
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order;
350 static char regs_allocated[FIRST_PSEUDO_REGISTER];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string;
355 /* ix86_regparm_string as a number */
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost;
374 const char *ix86_branch_cost_string;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs;
378 const char *ix86_align_funcs_string;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps;
386 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
389 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
390 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
391 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
393 static rtx gen_push PARAMS ((rtx));
394 static int memory_address_length PARAMS ((rtx addr));
395 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
396 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
397 static int ix86_safe_length PARAMS ((rtx));
398 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
399 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
400 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
401 static void ix86_dump_ppro_packet PARAMS ((FILE *));
402 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
403 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
405 static void ix86_init_machine_status PARAMS ((struct function *));
406 static void ix86_mark_machine_status PARAMS ((struct function *));
407 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408 static int ix86_safe_length_prefix PARAMS ((rtx));
409 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
411 static int ix86_nsaved_regs PARAMS((void));
412 static void ix86_emit_save_regs PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
414 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
415 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
417 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
421 rtx base, index, disp;
425 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
427 struct builtin_description;
428 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
430 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
432 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
438 /* Sometimes certain combinations of command options do not make
439 sense on a particular target machine. You can define a macro
440 `OVERRIDE_OPTIONS' to take account of this. This macro, if
441 defined, is executed once just after all the command options have
444 Don't use this macro to turn on various extra optimizations for
445 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
450 /* Comes from final.c -- no real reason to change it. */
451 #define MAX_CODE_ALIGN 16
455 struct processor_costs *cost; /* Processor costs */
456 int target_enable; /* Target flags to enable. */
457 int target_disable; /* Target flags to disable. */
458 int align_loop; /* Default alignments. */
463 const processor_target_table[PROCESSOR_max] =
465 {&i386_cost, 0, 0, 2, 2, 2, 1},
466 {&i486_cost, 0, 0, 4, 4, 4, 1},
467 {&pentium_cost, 0, 0, -4, -4, -4, 1},
468 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
469 {&k6_cost, 0, 0, -5, -5, 4, 1},
470 {&athlon_cost, 0, 0, 4, -4, 4, 1}
475 const char *name; /* processor name or nickname. */
476 enum processor_type processor;
478 const processor_alias_table[] =
480 {"i386", PROCESSOR_I386},
481 {"i486", PROCESSOR_I486},
482 {"i586", PROCESSOR_PENTIUM},
483 {"pentium", PROCESSOR_PENTIUM},
484 {"i686", PROCESSOR_PENTIUMPRO},
485 {"pentiumpro", PROCESSOR_PENTIUMPRO},
486 {"k6", PROCESSOR_K6},
487 {"athlon", PROCESSOR_ATHLON},
490 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
492 #ifdef SUBTARGET_OVERRIDE_OPTIONS
493 SUBTARGET_OVERRIDE_OPTIONS;
496 ix86_arch = PROCESSOR_I386;
497 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
499 if (ix86_arch_string != 0)
502 for (i = 0; i < pta_size; i++)
503 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
505 ix86_arch = processor_alias_table[i].processor;
506 /* Default cpu tuning to the architecture. */
507 ix86_cpu = ix86_arch;
511 error ("bad value (%s) for -march= switch", ix86_arch_string);
514 if (ix86_cpu_string != 0)
517 for (i = 0; i < pta_size; i++)
518 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
520 ix86_cpu = processor_alias_table[i].processor;
524 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
527 ix86_cost = processor_target_table[ix86_cpu].cost;
528 target_flags |= processor_target_table[ix86_cpu].target_enable;
529 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
531 /* Arrange to set up i386_stack_locals for all functions. */
532 init_machine_status = ix86_init_machine_status;
533 mark_machine_status = ix86_mark_machine_status;
535 /* Validate registers in register allocation order. */
536 if (ix86_reg_alloc_order)
539 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
545 case 'a': regno = 0; break;
546 case 'd': regno = 1; break;
547 case 'c': regno = 2; break;
548 case 'b': regno = 3; break;
549 case 'S': regno = 4; break;
550 case 'D': regno = 5; break;
551 case 'B': regno = 6; break;
553 default: fatal ("Register '%c' is unknown", ch);
556 if (regs_allocated[regno])
557 fatal ("Register '%c' already specified in allocation order", ch);
559 regs_allocated[regno] = 1;
563 /* Validate -mregparm= value. */
564 if (ix86_regparm_string)
566 ix86_regparm = atoi (ix86_regparm_string);
567 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
568 fatal ("-mregparm=%d is not between 0 and %d",
569 ix86_regparm, REGPARM_MAX);
572 /* Validate -malign-loops= value, or provide default. */
573 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
574 if (ix86_align_loops_string)
576 ix86_align_loops = atoi (ix86_align_loops_string);
577 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
578 fatal ("-malign-loops=%d is not between 0 and %d",
579 ix86_align_loops, MAX_CODE_ALIGN);
582 /* Validate -malign-jumps= value, or provide default. */
583 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
584 if (ix86_align_jumps_string)
586 ix86_align_jumps = atoi (ix86_align_jumps_string);
587 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
588 fatal ("-malign-jumps=%d is not between 0 and %d",
589 ix86_align_jumps, MAX_CODE_ALIGN);
592 /* Validate -malign-functions= value, or provide default. */
593 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
594 if (ix86_align_funcs_string)
596 ix86_align_funcs = atoi (ix86_align_funcs_string);
597 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
598 fatal ("-malign-functions=%d is not between 0 and %d",
599 ix86_align_funcs, MAX_CODE_ALIGN);
602 /* Validate -mpreferred-stack-boundary= value, or provide default.
603 The default of 128 bits is for Pentium III's SSE __m128. */
604 ix86_preferred_stack_boundary = 128;
605 if (ix86_preferred_stack_boundary_string)
607 int i = atoi (ix86_preferred_stack_boundary_string);
609 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
610 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
613 /* Validate -mbranch-cost= value, or provide default. */
614 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
615 if (ix86_branch_cost_string)
617 ix86_branch_cost = atoi (ix86_branch_cost_string);
618 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
619 fatal ("-mbranch-cost=%d is not between 0 and 5",
623 /* Keep nonleaf frame pointers. */
624 if (TARGET_OMIT_LEAF_FRAME_POINTER)
625 flag_omit_frame_pointer = 1;
627 /* If we're doing fast math, we don't care about comparison order
628 wrt NaNs. This lets us use a shorter comparison sequence. */
630 target_flags &= ~MASK_IEEE_FP;
632 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
635 target_flags |= MASK_MMX;
638 /* A C statement (sans semicolon) to choose the order in which to
639 allocate hard registers for pseudo-registers local to a basic
642 Store the desired register order in the array `reg_alloc_order'.
643 Element 0 should be the register to allocate first; element 1, the
644 next register; and so on.
646 The macro body should not assume anything about the contents of
647 `reg_alloc_order' before execution of the macro.
649 On most machines, it is not necessary to define this macro. */
652 order_regs_for_local_alloc ()
656 /* User specified the register allocation order. */
658 if (ix86_reg_alloc_order)
660 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
666 case 'a': regno = 0; break;
667 case 'd': regno = 1; break;
668 case 'c': regno = 2; break;
669 case 'b': regno = 3; break;
670 case 'S': regno = 4; break;
671 case 'D': regno = 5; break;
672 case 'B': regno = 6; break;
675 reg_alloc_order[order++] = regno;
678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
680 if (! regs_allocated[i])
681 reg_alloc_order[order++] = i;
685 /* If user did not specify a register allocation order, use natural order. */
688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
689 reg_alloc_order[i] = i;
694 optimization_options (level, size)
696 int size ATTRIBUTE_UNUSED;
698 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
699 make the problem with not enough registers even worse. */
700 #ifdef INSN_SCHEDULING
702 flag_schedule_insns = 0;
706 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
707 attribute for DECL. The attributes in ATTRIBUTES have previously been
711 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
712 tree decl ATTRIBUTE_UNUSED;
713 tree attributes ATTRIBUTE_UNUSED;
714 tree identifier ATTRIBUTE_UNUSED;
715 tree args ATTRIBUTE_UNUSED;
720 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
721 attribute for TYPE. The attributes in ATTRIBUTES have previously been
725 ix86_valid_type_attribute_p (type, attributes, identifier, args)
727 tree attributes ATTRIBUTE_UNUSED;
731 if (TREE_CODE (type) != FUNCTION_TYPE
732 && TREE_CODE (type) != METHOD_TYPE
733 && TREE_CODE (type) != FIELD_DECL
734 && TREE_CODE (type) != TYPE_DECL)
737 /* Stdcall attribute says callee is responsible for popping arguments
738 if they are not variable. */
739 if (is_attribute_p ("stdcall", identifier))
740 return (args == NULL_TREE);
742 /* Cdecl attribute says the callee is a normal C declaration. */
743 if (is_attribute_p ("cdecl", identifier))
744 return (args == NULL_TREE);
746 /* Regparm attribute specifies how many integer arguments are to be
747 passed in registers. */
748 if (is_attribute_p ("regparm", identifier))
752 if (! args || TREE_CODE (args) != TREE_LIST
753 || TREE_CHAIN (args) != NULL_TREE
754 || TREE_VALUE (args) == NULL_TREE)
757 cst = TREE_VALUE (args);
758 if (TREE_CODE (cst) != INTEGER_CST)
761 if (compare_tree_int (cst, REGPARM_MAX) > 0)
770 /* Return 0 if the attributes for two types are incompatible, 1 if they
771 are compatible, and 2 if they are nearly compatible (which causes a
772 warning to be generated). */
775 ix86_comp_type_attributes (type1, type2)
779 /* Check for mismatch of non-default calling convention. */
780 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
782 if (TREE_CODE (type1) != FUNCTION_TYPE)
785 /* Check for mismatched return types (cdecl vs stdcall). */
786 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
787 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
792 /* Value is the number of bytes of arguments automatically
793 popped when returning from a subroutine call.
794 FUNDECL is the declaration node of the function (as a tree),
795 FUNTYPE is the data type of the function (as a tree),
796 or for a library call it is an identifier node for the subroutine name.
797 SIZE is the number of bytes of arguments passed on the stack.
799 On the 80386, the RTD insn may be used to pop them if the number
800 of args is fixed, but if the number is variable then the caller
801 must pop them all. RTD can't be used for library calls now
802 because the library is compiled with the Unix compiler.
803 Use of RTD is a selectable option, since it is incompatible with
804 standard Unix calling sequences. If the option is not selected,
805 the caller must always pop the args.
807 The attribute stdcall is equivalent to RTD on a per module basis. */
810 ix86_return_pops_args (fundecl, funtype, size)
815 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
817 /* Cdecl functions override -mrtd, and never pop the stack. */
818 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
820 /* Stdcall functions will pop the stack if not variable args. */
821 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
825 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
826 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
831 /* Lose any fake structure return argument. */
832 if (aggregate_value_p (TREE_TYPE (funtype)))
833 return GET_MODE_SIZE (Pmode);
838 /* Argument support functions. */
840 /* Initialize a variable CUM of type CUMULATIVE_ARGS
841 for a call to a function whose data type is FNTYPE.
842 For a library call, FNTYPE is 0. */
845 init_cumulative_args (cum, fntype, libname)
846 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
847 tree fntype; /* tree ptr for function decl */
848 rtx libname; /* SYMBOL_REF of library name or 0 */
850 static CUMULATIVE_ARGS zero_cum;
851 tree param, next_param;
853 if (TARGET_DEBUG_ARG)
855 fprintf (stderr, "\ninit_cumulative_args (");
857 fprintf (stderr, "fntype code = %s, ret code = %s",
858 tree_code_name[(int) TREE_CODE (fntype)],
859 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
861 fprintf (stderr, "no fntype");
864 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
869 /* Set up the number of registers to use for passing arguments. */
870 cum->nregs = ix86_regparm;
873 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
876 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
879 /* Determine if this function has variable arguments. This is
880 indicated by the last argument being 'void_type_mode' if there
881 are no variable arguments. If there are variable arguments, then
882 we won't pass anything in registers */
886 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
887 param != 0; param = next_param)
889 next_param = TREE_CHAIN (param);
890 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
895 if (TARGET_DEBUG_ARG)
896 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
901 /* Update the data in CUM to advance over an argument
902 of mode MODE and data type TYPE.
903 (TYPE is null for libcalls where that information may not be available.) */
906 function_arg_advance (cum, mode, type, named)
907 CUMULATIVE_ARGS *cum; /* current arg information */
908 enum machine_mode mode; /* current arg mode */
909 tree type; /* type of the argument or 0 if lib support */
910 int named; /* whether or not the argument was named */
913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
914 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
916 if (TARGET_DEBUG_ARG)
918 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
919 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
934 /* Define where to put the arguments to a function.
935 Value is zero to push the argument on the stack,
936 or a hard register in which to store the argument.
938 MODE is the argument's machine mode.
939 TYPE is the data type of the argument (as a tree).
940 This is null for libcalls where that information may
942 CUM is a variable of type CUMULATIVE_ARGS which gives info about
943 the preceding args and about the function being called.
944 NAMED is nonzero if this argument is a named parameter
945 (otherwise it is an extra parameter matching an ellipsis). */
948 function_arg (cum, mode, type, named)
949 CUMULATIVE_ARGS *cum; /* current arg information */
950 enum machine_mode mode; /* current arg mode */
951 tree type; /* type of the argument or 0 if lib support */
952 int named; /* != 0 for normal args, == 0 for ... args */
956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
957 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
961 /* For now, pass fp/complex values on the stack. */
970 if (words <= cum->nregs)
971 ret = gen_rtx_REG (mode, cum->regno);
975 if (TARGET_DEBUG_ARG)
978 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
979 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
982 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
984 fprintf (stderr, ", stack");
986 fprintf (stderr, " )\n");
993 /* Return nonzero if OP is (const_int 1), else return zero. */
996 const_int_1_operand (op, mode)
998 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1004 reference and a constant. */
1007 symbolic_operand (op, mode)
1009 enum machine_mode mode ATTRIBUTE_UNUSED;
1011 switch (GET_CODE (op))
1019 if (GET_CODE (op) == SYMBOL_REF
1020 || GET_CODE (op) == LABEL_REF
1021 || (GET_CODE (op) == UNSPEC
1022 && XINT (op, 1) >= 6
1023 && XINT (op, 1) <= 7))
1025 if (GET_CODE (op) != PLUS
1026 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1030 if (GET_CODE (op) == SYMBOL_REF
1031 || GET_CODE (op) == LABEL_REF)
1033 /* Only @GOTOFF gets offsets. */
1034 if (GET_CODE (op) != UNSPEC
1035 || XINT (op, 1) != 7)
1038 op = XVECEXP (op, 0, 0);
1039 if (GET_CODE (op) == SYMBOL_REF
1040 || GET_CODE (op) == LABEL_REF)
1049 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1052 pic_symbolic_operand (op, mode)
1054 enum machine_mode mode ATTRIBUTE_UNUSED;
1056 if (GET_CODE (op) == CONST)
1059 if (GET_CODE (op) == UNSPEC)
1061 if (GET_CODE (op) != PLUS
1062 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1065 if (GET_CODE (op) == UNSPEC)
1071 /* Test for a valid operand for a call instruction. Don't allow the
1072 arg pointer register or virtual regs since they may decay into
1073 reg + const, which the patterns can't handle. */
1076 call_insn_operand (op, mode)
1078 enum machine_mode mode ATTRIBUTE_UNUSED;
1080 /* Disallow indirect through a virtual register. This leads to
1081 compiler aborts when trying to eliminate them. */
1082 if (GET_CODE (op) == REG
1083 && (op == arg_pointer_rtx
1084 || op == frame_pointer_rtx
1085 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1086 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1089 /* Disallow `call 1234'. Due to varying assembler lameness this
1090 gets either rejected or translated to `call .+1234'. */
1091 if (GET_CODE (op) == CONST_INT)
1094 /* Explicitly allow SYMBOL_REF even if pic. */
1095 if (GET_CODE (op) == SYMBOL_REF)
1098 /* Half-pic doesn't allow anything but registers and constants.
1099 We've just taken care of the later. */
1101 return register_operand (op, Pmode);
1103 /* Otherwise we can allow any general_operand in the address. */
1104 return general_operand (op, Pmode);
1108 constant_call_address_operand (op, mode)
1110 enum machine_mode mode ATTRIBUTE_UNUSED;
1112 if (GET_CODE (op) == CONST
1113 && GET_CODE (XEXP (op, 0)) == PLUS
1114 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1115 op = XEXP (XEXP (op, 0), 0);
1116 return GET_CODE (op) == SYMBOL_REF;
1119 /* Match exactly zero and one. */
1122 const0_operand (op, mode)
1124 enum machine_mode mode;
1126 return op == CONST0_RTX (mode);
1130 const1_operand (op, mode)
1132 enum machine_mode mode ATTRIBUTE_UNUSED;
1134 return op == const1_rtx;
1137 /* Match 2, 4, or 8. Used for leal multiplicands. */
1140 const248_operand (op, mode)
1142 enum machine_mode mode ATTRIBUTE_UNUSED;
1144 return (GET_CODE (op) == CONST_INT
1145 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1148 /* True if this is a constant appropriate for an increment or decremenmt. */
1151 incdec_operand (op, mode)
1153 enum machine_mode mode;
1155 if (op == const1_rtx || op == constm1_rtx)
1157 if (GET_CODE (op) != CONST_INT)
1159 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1161 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1163 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1168 /* Return false if this is the stack pointer, or any other fake
1169 register eliminable to the stack pointer. Otherwise, this is
1172 This is used to prevent esp from being used as an index reg.
1173 Which would only happen in pathological cases. */
1176 reg_no_sp_operand (op, mode)
1178 enum machine_mode mode;
1181 if (GET_CODE (t) == SUBREG)
1183 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1186 return register_operand (op, mode);
1190 mmx_reg_operand (op, mode)
1192 enum machine_mode mode ATTRIBUTE_UNUSED;
1194 return MMX_REG_P (op);
1197 /* Return false if this is any eliminable register. Otherwise
1201 general_no_elim_operand (op, mode)
1203 enum machine_mode mode;
1206 if (GET_CODE (t) == SUBREG)
1208 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1209 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1210 || t == virtual_stack_dynamic_rtx)
1213 return general_operand (op, mode);
1216 /* Return false if this is any eliminable register. Otherwise
1217 register_operand or const_int. */
1220 nonmemory_no_elim_operand (op, mode)
1222 enum machine_mode mode;
1225 if (GET_CODE (t) == SUBREG)
1227 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1228 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1229 || t == virtual_stack_dynamic_rtx)
1232 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1235 /* Return true if op is a Q_REGS class register. */
1238 q_regs_operand (op, mode)
1240 enum machine_mode mode;
1242 if (mode != VOIDmode && GET_MODE (op) != mode)
1244 if (GET_CODE (op) == SUBREG)
1245 op = SUBREG_REG (op);
1246 return QI_REG_P (op);
1249 /* Return true if op is a NON_Q_REGS class register. */
1252 non_q_regs_operand (op, mode)
1254 enum machine_mode mode;
1256 if (mode != VOIDmode && GET_MODE (op) != mode)
1258 if (GET_CODE (op) == SUBREG)
1259 op = SUBREG_REG (op);
1260 return NON_QI_REG_P (op);
1263 /* Return 1 if OP is a comparison operator that can use the condition code
1264 generated by a logical operation, which characteristicly does not set
1265 overflow or carry. To be used with CCNOmode. */
1268 no_comparison_operator (op, mode)
1270 enum machine_mode mode;
1272 if (mode != VOIDmode && GET_MODE (op) != mode)
1275 switch (GET_CODE (op))
1279 case LEU: case LTU: case GEU: case GTU:
1287 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1290 sse_comparison_operator (op, mode)
1292 enum machine_mode mode ATTRIBUTE_UNUSED;
1294 enum rtx_code code = GET_CODE (op);
1295 return code == EQ || code == LT || code == LE || code == UNORDERED;
1297 /* Return 1 if OP is a valid comparison operator in valid mode. */
1299 ix86_comparison_operator (op, mode)
1301 enum machine_mode mode;
1303 enum machine_mode inmode;
1304 if (mode != VOIDmode && GET_MODE (op) != mode)
1306 switch (GET_CODE (op))
1311 inmode = GET_MODE (XEXP (op, 0));
1312 if (inmode == CCmode || inmode == CCGCmode
1313 || inmode == CCGOCmode || inmode == CCNOmode)
1316 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1317 inmode = GET_MODE (XEXP (op, 0));
1318 if (inmode == CCmode)
1322 inmode = GET_MODE (XEXP (op, 0));
1323 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1331 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1334 fcmov_comparison_operator (op, mode)
1336 enum machine_mode mode;
1338 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
1339 if (mode != VOIDmode && GET_MODE (op) != mode)
1341 switch (GET_CODE (op))
1345 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1346 if (inmode == CCFPmode || inmode == CCFPUmode)
1354 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1357 promotable_binary_operator (op, mode)
1359 enum machine_mode mode ATTRIBUTE_UNUSED;
1361 switch (GET_CODE (op))
1364 /* Modern CPUs have same latency for HImode and SImode multiply,
1365 but 386 and 486 do HImode multiply faster. */
1366 return ix86_cpu > PROCESSOR_I486;
1378 /* Nearly general operand, but accept any const_double, since we wish
1379 to be able to drop them into memory rather than have them get pulled
1383 cmp_fp_expander_operand (op, mode)
1385 enum machine_mode mode;
1387 if (mode != VOIDmode && mode != GET_MODE (op))
1389 if (GET_CODE (op) == CONST_DOUBLE)
1391 return general_operand (op, mode);
1394 /* Match an SI or HImode register for a zero_extract. */
1397 ext_register_operand (op, mode)
1399 enum machine_mode mode ATTRIBUTE_UNUSED;
1401 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1403 return register_operand (op, VOIDmode);
1406 /* Return 1 if this is a valid binary floating-point operation.
1407 OP is the expression matched, and MODE is its mode. */
1410 binary_fp_operator (op, mode)
1412 enum machine_mode mode;
1414 if (mode != VOIDmode && mode != GET_MODE (op))
1417 switch (GET_CODE (op))
1423 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1431 mult_operator(op, mode)
1433 enum machine_mode mode ATTRIBUTE_UNUSED;
1435 return GET_CODE (op) == MULT;
1439 div_operator(op, mode)
1441 enum machine_mode mode ATTRIBUTE_UNUSED;
1443 return GET_CODE (op) == DIV;
1447 arith_or_logical_operator (op, mode)
1449 enum machine_mode mode;
1451 return ((mode == VOIDmode || GET_MODE (op) == mode)
1452 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1453 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1456 /* Returns 1 if OP is memory operand with a displacement. */
1459 memory_displacement_operand (op, mode)
1461 enum machine_mode mode;
1463 struct ix86_address parts;
1465 if (! memory_operand (op, mode))
1468 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1471 return parts.disp != NULL_RTX;
1474 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1475 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1477 ??? It seems likely that this will only work because cmpsi is an
1478 expander, and no actual insns use this. */
1481 cmpsi_operand (op, mode)
1483 enum machine_mode mode;
1485 if (general_operand (op, mode))
1488 if (GET_CODE (op) == AND
1489 && GET_MODE (op) == SImode
1490 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1491 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1492 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1493 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1494 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1495 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1501 /* Returns 1 if OP is memory operand that can not be represented by the
1505 long_memory_operand (op, mode)
1507 enum machine_mode mode;
1509 if (! memory_operand (op, mode))
1512 return memory_address_length (op) != 0;
1515 /* Return nonzero if the rtx is known aligned. */
1518 aligned_operand (op, mode)
1520 enum machine_mode mode;
1522 struct ix86_address parts;
1524 if (!general_operand (op, mode))
1527 /* Registers and immediate operands are always "aligned". */
1528 if (GET_CODE (op) != MEM)
1531 /* Don't even try to do any aligned optimizations with volatiles. */
1532 if (MEM_VOLATILE_P (op))
1537 /* Pushes and pops are only valid on the stack pointer. */
1538 if (GET_CODE (op) == PRE_DEC
1539 || GET_CODE (op) == POST_INC)
1542 /* Decode the address. */
1543 if (! ix86_decompose_address (op, &parts))
1546 /* Look for some component that isn't known to be aligned. */
1550 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1555 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1560 if (GET_CODE (parts.disp) != CONST_INT
1561 || (INTVAL (parts.disp) & 3) != 0)
1565 /* Didn't find one -- this must be an aligned address. */
1569 /* Return true if the constant is something that can be loaded with
1570 a special instruction. Only handle 0.0 and 1.0; others are less
1574 standard_80387_constant_p (x)
1577 if (GET_CODE (x) != CONST_DOUBLE)
1580 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1586 if (setjmp (handler))
1589 set_float_handler (handler);
1590 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1591 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1592 is1 = REAL_VALUES_EQUAL (d, dconst1);
1593 set_float_handler (NULL_PTR);
1601 /* Note that on the 80387, other constants, such as pi,
1602 are much slower to load as standard constants
1603 than to load from doubles in memory! */
1604 /* ??? Not true on K6: all constants are equal cost. */
1611 /* Returns 1 if OP contains a symbol reference */
1614 symbolic_reference_mentioned_p (op)
1617 register const char *fmt;
1620 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1623 fmt = GET_RTX_FORMAT (GET_CODE (op));
1624 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1630 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1631 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1635 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1642 /* Return 1 if it is appropriate to emit `ret' instructions in the
1643 body of a function. Do this only if the epilogue is simple, needing a
1644 couple of insns. Prior to reloading, we can't tell how many registers
1645 must be saved, so return 0 then. Return 0 if there is no frame
1646 marker to de-allocate.
1648 If NON_SAVING_SETJMP is defined and true, then it is not possible
1649 for the epilogue to be simple, so return 0. This is a special case
1650 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1651 until final, but jump_optimize may need to know sooner if a
1655 ix86_can_use_return_insn_p ()
1657 HOST_WIDE_INT tsize;
1660 #ifdef NON_SAVING_SETJMP
1661 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1664 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1665 if (profile_block_flag == 2)
1669 if (! reload_completed || frame_pointer_needed)
1672 /* Don't allow more than 32 pop, since that's all we can do
1673 with one instruction. */
1674 if (current_function_pops_args
1675 && current_function_args_size >= 32768)
1678 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1679 return tsize == 0 && nregs == 0;
1682 static const char *pic_label_name;
1683 static int pic_label_output;
1685 /* This function generates code for -fpic that loads %ebx with
1686 the return address of the caller and then returns. */
1689 asm_output_function_prefix (file, name)
1691 const char *name ATTRIBUTE_UNUSED;
1694 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1695 || current_function_uses_const_pool);
1696 xops[0] = pic_offset_table_rtx;
1697 xops[1] = stack_pointer_rtx;
1699 /* Deep branch prediction favors having a return for every call. */
1700 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1702 if (!pic_label_output)
1704 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1705 internal (non-global) label that's being emitted, it didn't make
1706 sense to have .type information for local labels. This caused
1707 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1708 me debug info for a label that you're declaring non-global?) this
1709 was changed to call ASM_OUTPUT_LABEL() instead. */
1711 ASM_OUTPUT_LABEL (file, pic_label_name);
1713 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1714 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1715 output_asm_insn ("ret", xops);
1717 pic_label_output = 1;
1723 load_pic_register ()
1727 gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1729 if (TARGET_DEEP_BRANCH_PREDICTION)
1731 if (pic_label_name == NULL)
1734 ASM_GENERATE_INTERNAL_LABEL (buf, "LPR", 0);
1735 pic_label_name = ggc_strdup (buf);
1737 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1741 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1744 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1746 if (! TARGET_DEEP_BRANCH_PREDICTION)
1747 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1749 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1752 /* Generate an SImode "push" pattern for input ARG. */
1758 return gen_rtx_SET (VOIDmode,
1759 gen_rtx_MEM (SImode,
1760 gen_rtx_PRE_DEC (SImode,
1761 stack_pointer_rtx)),
1765 /* Return number of registers to be saved on the stack. */
1771 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1772 || current_function_uses_const_pool);
1773 int limit = (frame_pointer_needed
1774 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1777 for (regno = limit - 1; regno >= 0; regno--)
1778 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1779 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1786 /* Return the offset between two registers, one to be eliminated, and the other
1787 its replacement, at the start of a routine. */
1790 ix86_initial_elimination_offset (from, to)
1797 /* Stack grows downward:
1803 saved frame pointer if frame_pointer_needed
1804 <- HARD_FRAME_POINTER
1814 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1815 /* Skip saved PC and previous frame pointer.
1816 Executed only when frame_pointer_needed. */
1818 else if (from == FRAME_POINTER_REGNUM
1819 && to == HARD_FRAME_POINTER_REGNUM)
1821 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1822 padding1 += nregs * UNITS_PER_WORD;
1827 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1828 int frame_size = frame_pointer_needed ? 8 : 4;
1829 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1830 &nregs, &padding1, (int *) 0);
1832 if (to != STACK_POINTER_REGNUM)
1834 else if (from == ARG_POINTER_REGNUM)
1835 return tsize + nregs * UNITS_PER_WORD + frame_size;
1836 else if (from != FRAME_POINTER_REGNUM)
1839 return tsize - padding1;
1843 /* Compute the size of local storage taking into consideration the
1844 desired stack alignment which is to be maintained. Also determine
1845 the number of registers saved below the local storage.
1847 PADDING1 returns padding before stack frame and PADDING2 returns
1848 padding after stack frame;
1851 static HOST_WIDE_INT
1852 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1854 int *nregs_on_stack;
1861 HOST_WIDE_INT total_size;
1862 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1864 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1866 nregs = ix86_nsaved_regs ();
1869 offset = frame_pointer_needed ? 8 : 4;
1871 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1872 since i386 port is the only using those features that may break easilly. */
1874 if (size && !stack_alignment_needed)
1876 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1878 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1880 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1882 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1885 if (stack_alignment_needed < 4)
1886 stack_alignment_needed = 4;
1888 offset += nregs * UNITS_PER_WORD;
1890 if (ACCUMULATE_OUTGOING_ARGS)
1891 total_size += current_function_outgoing_args_size;
1893 total_size += offset;
1895 /* Align start of frame for local function. */
1896 padding1 = ((offset + stack_alignment_needed - 1)
1897 & -stack_alignment_needed) - offset;
1898 total_size += padding1;
1900 /* Align stack boundary. */
1901 padding2 = ((total_size + preferred_alignment - 1)
1902 & -preferred_alignment) - total_size;
1904 if (ACCUMULATE_OUTGOING_ARGS)
1905 padding2 += current_function_outgoing_args_size;
1908 *nregs_on_stack = nregs;
1910 *rpadding1 = padding1;
1912 *rpadding2 = padding2;
1914 return size + padding1 + padding2;
1917 /* Emit code to save registers in the prologue. */
1920 ix86_emit_save_regs ()
1925 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1926 || current_function_uses_const_pool);
1927 limit = (frame_pointer_needed
1928 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1930 for (regno = limit - 1; regno >= 0; regno--)
1931 if ((regs_ever_live[regno] && !call_used_regs[regno])
1932 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1934 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1935 RTX_FRAME_RELATED_P (insn) = 1;
1939 /* Expand the prologue into a bunch of separate insns. */
1942 ix86_expand_prologue ()
1944 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1947 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1948 || current_function_uses_const_pool);
1950 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1951 slower on all targets. Also sdb doesn't like it. */
1953 if (frame_pointer_needed)
1955 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1956 RTX_FRAME_RELATED_P (insn) = 1;
1958 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1959 RTX_FRAME_RELATED_P (insn) = 1;
1962 ix86_emit_save_regs ();
1966 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1968 if (frame_pointer_needed)
1969 insn = emit_insn (gen_pro_epilogue_adjust_stack
1970 (stack_pointer_rtx, stack_pointer_rtx,
1971 GEN_INT (-tsize), hard_frame_pointer_rtx));
1973 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1975 RTX_FRAME_RELATED_P (insn) = 1;
1979 /* ??? Is this only valid for Win32? */
1983 arg0 = gen_rtx_REG (SImode, 0);
1984 emit_move_insn (arg0, GEN_INT (tsize));
1986 sym = gen_rtx_MEM (FUNCTION_MODE,
1987 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1988 insn = emit_call_insn (gen_call (sym, const0_rtx));
1990 CALL_INSN_FUNCTION_USAGE (insn)
1991 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1992 CALL_INSN_FUNCTION_USAGE (insn));
1995 #ifdef SUBTARGET_PROLOGUE
2000 load_pic_register ();
2002 /* If we are profiling, make sure no instructions are scheduled before
2003 the call to mcount. However, if -fpic, the above call will have
2005 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2006 emit_insn (gen_blockage ());
2009 /* Emit code to add TSIZE to esp value. Use POP instruction when
2013 ix86_emit_epilogue_esp_adjustment (tsize)
2016 /* If a frame pointer is present, we must be sure to tie the sp
2017 to the fp so that we don't mis-schedule. */
2018 if (frame_pointer_needed)
2019 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2022 hard_frame_pointer_rtx));
2024 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2028 /* Emit code to restore saved registers using MOV insns. First register
2029 is restored from POINTER + OFFSET. */
2031 ix86_emit_restore_regs_using_mov (pointer, offset)
2036 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2037 || current_function_uses_const_pool);
2038 int limit = (frame_pointer_needed
2039 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2041 for (regno = 0; regno < limit; regno++)
2042 if ((regs_ever_live[regno] && !call_used_regs[regno])
2043 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2045 emit_move_insn (gen_rtx_REG (SImode, regno),
2046 adj_offsettable_operand (gen_rtx_MEM (SImode,
2053 /* Restore function stack, frame, and registers. */
2056 ix86_expand_epilogue (emit_return)
2062 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2063 || current_function_uses_const_pool);
2064 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2065 HOST_WIDE_INT offset;
2066 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2067 (int *) 0, (int *) 0);
2069 /* Calculate start of saved registers relative to ebp. */
2070 offset = -nregs * UNITS_PER_WORD;
2072 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2073 if (profile_block_flag == 2)
2075 FUNCTION_BLOCK_PROFILER_EXIT;
2079 /* If we're only restoring one register and sp is not valid then
2080 using a move instruction to restore the register since it's
2081 less work than reloading sp and popping the register.
2083 The default code result in stack adjustment using add/lea instruction,
2084 while this code results in LEAVE instruction (or discrete equivalent),
2085 so it is profitable in some other cases as well. Especially when there
2086 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2087 and there is exactly one register to pop. This heruistic may need some
2088 tuning in future. */
2089 if ((!sp_valid && nregs <= 1)
2090 || (frame_pointer_needed && !nregs && tsize)
2091 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2094 /* Restore registers. We can use ebp or esp to address the memory
2095 locations. If both are available, default to ebp, since offsets
2096 are known to be small. Only exception is esp pointing directly to the
2097 end of block of saved registers, where we may simplify addressing
2100 if (!frame_pointer_needed || (sp_valid && !tsize))
2101 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2103 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2105 if (!frame_pointer_needed)
2106 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2107 /* If not an i386, mov & pop is faster than "leave". */
2108 else if (TARGET_USE_LEAVE || optimize_size)
2109 emit_insn (gen_leave ());
2112 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2113 hard_frame_pointer_rtx,
2115 hard_frame_pointer_rtx));
2116 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2121 /* First step is to deallocate the stack frame so that we can
2122 pop the registers. */
2125 if (!frame_pointer_needed)
2127 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2128 hard_frame_pointer_rtx,
2130 hard_frame_pointer_rtx));
2133 ix86_emit_epilogue_esp_adjustment (tsize);
2135 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2136 if ((regs_ever_live[regno] && !call_used_regs[regno])
2137 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2138 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2141 /* Sibcall epilogues don't want a return instruction. */
2145 if (current_function_pops_args && current_function_args_size)
2147 rtx popc = GEN_INT (current_function_pops_args);
2149 /* i386 can only pop 64K bytes. If asked to pop more, pop
2150 return address, do explicit add, and jump indirectly to the
2153 if (current_function_pops_args >= 65536)
2155 rtx ecx = gen_rtx_REG (SImode, 2);
2157 emit_insn (gen_popsi1 (ecx));
2158 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2159 emit_jump_insn (gen_return_indirect_internal (ecx));
2162 emit_jump_insn (gen_return_pop_internal (popc));
2165 emit_jump_insn (gen_return_internal ());
2168 /* Extract the parts of an RTL expression that is a valid memory address
2169 for an instruction. Return false if the structure of the address is
2173 ix86_decompose_address (addr, out)
2175 struct ix86_address *out;
2177 rtx base = NULL_RTX;
2178 rtx index = NULL_RTX;
2179 rtx disp = NULL_RTX;
2180 HOST_WIDE_INT scale = 1;
2181 rtx scale_rtx = NULL_RTX;
2183 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2185 else if (GET_CODE (addr) == PLUS)
2187 rtx op0 = XEXP (addr, 0);
2188 rtx op1 = XEXP (addr, 1);
2189 enum rtx_code code0 = GET_CODE (op0);
2190 enum rtx_code code1 = GET_CODE (op1);
2192 if (code0 == REG || code0 == SUBREG)
2194 if (code1 == REG || code1 == SUBREG)
2195 index = op0, base = op1; /* index + base */
2197 base = op0, disp = op1; /* base + displacement */
2199 else if (code0 == MULT)
2201 index = XEXP (op0, 0);
2202 scale_rtx = XEXP (op0, 1);
2203 if (code1 == REG || code1 == SUBREG)
2204 base = op1; /* index*scale + base */
2206 disp = op1; /* index*scale + disp */
2208 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2210 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2211 scale_rtx = XEXP (XEXP (op0, 0), 1);
2212 base = XEXP (op0, 1);
2215 else if (code0 == PLUS)
2217 index = XEXP (op0, 0); /* index + base + disp */
2218 base = XEXP (op0, 1);
2224 else if (GET_CODE (addr) == MULT)
2226 index = XEXP (addr, 0); /* index*scale */
2227 scale_rtx = XEXP (addr, 1);
2229 else if (GET_CODE (addr) == ASHIFT)
2233 /* We're called for lea too, which implements ashift on occasion. */
2234 index = XEXP (addr, 0);
2235 tmp = XEXP (addr, 1);
2236 if (GET_CODE (tmp) != CONST_INT)
2238 scale = INTVAL (tmp);
2239 if ((unsigned HOST_WIDE_INT) scale > 3)
2244 disp = addr; /* displacement */
2246 /* Extract the integral value of scale. */
2249 if (GET_CODE (scale_rtx) != CONST_INT)
2251 scale = INTVAL (scale_rtx);
2254 /* Allow arg pointer and stack pointer as index if there is not scaling */
2255 if (base && index && scale == 1
2256 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2257 || index == stack_pointer_rtx))
2264 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2265 if ((base == hard_frame_pointer_rtx
2266 || base == frame_pointer_rtx
2267 || base == arg_pointer_rtx) && !disp)
2270 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2271 Avoid this by transforming to [%esi+0]. */
2272 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2273 && base && !index && !disp
2275 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2278 /* Special case: encode reg+reg instead of reg*2. */
2279 if (!base && index && scale && scale == 2)
2280 base = index, scale = 1;
2282 /* Special case: scaling cannot be encoded without base or displacement. */
2283 if (!base && !disp && index && scale != 1)
2294 /* Return cost of the memory address x.
2295 For i386, it is better to use a complex address than let gcc copy
2296 the address into a reg and make a new pseudo. But not if the address
2297 requires to two regs - that would mean more pseudos with longer
2300 ix86_address_cost (x)
2303 struct ix86_address parts;
2306 if (!ix86_decompose_address (x, &parts))
2309 /* More complex memory references are better. */
2310 if (parts.disp && parts.disp != const0_rtx)
2313 /* Attempt to minimize number of registers in the address. */
2315 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2317 && (!REG_P (parts.index)
2318 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2322 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2324 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2325 && parts.base != parts.index)
2328 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2329 since it's predecode logic can't detect the length of instructions
2330 and it degenerates to vector decoded. Increase cost of such
2331 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2332 to split such addresses or even refuse such addresses at all.
2334 Following addressing modes are affected:
2339 The first and last case may be avoidable by explicitly coding the zero in
2340 memory address, but I don't have AMD-K6 machine handy to check this
2344 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2345 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2346 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2352 /* If X is a machine specific address (i.e. a symbol or label being
2353 referenced as a displacement from the GOT implemented using an
2354 UNSPEC), then return the base term. Otherwise return X. */
2357 ix86_find_base_term (x)
2362 if (GET_CODE (x) != PLUS
2363 || XEXP (x, 0) != pic_offset_table_rtx
2364 || GET_CODE (XEXP (x, 1)) != CONST)
2367 term = XEXP (XEXP (x, 1), 0);
2369 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2370 term = XEXP (term, 0);
2372 if (GET_CODE (term) != UNSPEC
2373 || XVECLEN (term, 0) != 1
2374 || XINT (term, 1) != 7)
2377 term = XVECEXP (term, 0, 0);
2379 if (GET_CODE (term) != SYMBOL_REF
2380 && GET_CODE (term) != LABEL_REF)
2386 /* Determine if a given CONST RTX is a valid memory displacement
2390 legitimate_pic_address_disp_p (disp)
2393 if (GET_CODE (disp) != CONST)
2395 disp = XEXP (disp, 0);
2397 if (GET_CODE (disp) == PLUS)
2399 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2401 disp = XEXP (disp, 0);
2404 if (GET_CODE (disp) != UNSPEC
2405 || XVECLEN (disp, 0) != 1)
2408 /* Must be @GOT or @GOTOFF. */
2409 if (XINT (disp, 1) != 6
2410 && XINT (disp, 1) != 7)
2413 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2414 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2420 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2421 memory address for an instruction. The MODE argument is the machine mode
2422 for the MEM expression that wants to use this address.
2424 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2425 convert common non-canonical forms to canonical form so that they will
2429 legitimate_address_p (mode, addr, strict)
2430 enum machine_mode mode;
2434 struct ix86_address parts;
2435 rtx base, index, disp;
2436 HOST_WIDE_INT scale;
2437 const char *reason = NULL;
2438 rtx reason_rtx = NULL_RTX;
2440 if (TARGET_DEBUG_ADDR)
2443 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2444 GET_MODE_NAME (mode), strict);
2448 if (! ix86_decompose_address (addr, &parts))
2450 reason = "decomposition failed";
2455 index = parts.index;
2457 scale = parts.scale;
2459 /* Validate base register.
2461 Don't allow SUBREG's here, it can lead to spill failures when the base
2462 is one word out of a two word structure, which is represented internally
2469 if (GET_CODE (base) != REG)
2471 reason = "base is not a register";
2475 if (GET_MODE (base) != Pmode)
2477 reason = "base is not in Pmode";
2481 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2482 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2484 reason = "base is not valid";
2489 /* Validate index register.
2491 Don't allow SUBREG's here, it can lead to spill failures when the index
2492 is one word out of a two word structure, which is represented internally
2499 if (GET_CODE (index) != REG)
2501 reason = "index is not a register";
2505 if (GET_MODE (index) != Pmode)
2507 reason = "index is not in Pmode";
2511 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2512 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2514 reason = "index is not valid";
2519 /* Validate scale factor. */
2522 reason_rtx = GEN_INT (scale);
2525 reason = "scale without index";
2529 if (scale != 2 && scale != 4 && scale != 8)
2531 reason = "scale is not a valid multiplier";
2536 /* Validate displacement. */
2541 if (!CONSTANT_ADDRESS_P (disp))
2543 reason = "displacement is not constant";
2547 if (GET_CODE (disp) == CONST_DOUBLE)
2549 reason = "displacement is a const_double";
2553 if (flag_pic && SYMBOLIC_CONST (disp))
2555 if (! legitimate_pic_address_disp_p (disp))
2557 reason = "displacement is an invalid pic construct";
2561 /* This code used to verify that a symbolic pic displacement
2562 includes the pic_offset_table_rtx register.
2564 While this is good idea, unfortunately these constructs may
2565 be created by "adds using lea" optimization for incorrect
2574 This code is nonsensical, but results in addressing
2575 GOT table with pic_offset_table_rtx base. We can't
2576 just refuse it easilly, since it gets matched by
2577 "addsi3" pattern, that later gets split to lea in the
2578 case output register differs from input. While this
2579 can be handled by separate addsi pattern for this case
2580 that never results in lea, this seems to be easier and
2581 correct fix for crash to disable this test. */
2583 else if (HALF_PIC_P ())
2585 if (! HALF_PIC_ADDRESS_P (disp)
2586 || (base != NULL_RTX || index != NULL_RTX))
2588 reason = "displacement is an invalid half-pic reference";
2594 /* Everything looks valid. */
2595 if (TARGET_DEBUG_ADDR)
2596 fprintf (stderr, "Success.\n");
2600 if (TARGET_DEBUG_ADDR)
2602 fprintf (stderr, "Error: %s\n", reason);
2603 debug_rtx (reason_rtx);
2608 /* Return an unique alias set for the GOT. */
2610 static HOST_WIDE_INT
2611 ix86_GOT_alias_set ()
2613 static HOST_WIDE_INT set = -1;
2615 set = new_alias_set ();
2619 /* Return a legitimate reference for ORIG (an address) using the
2620 register REG. If REG is 0, a new pseudo is generated.
2622 There are two types of references that must be handled:
2624 1. Global data references must load the address from the GOT, via
2625 the PIC reg. An insn is emitted to do this load, and the reg is
2628 2. Static data references, constant pool addresses, and code labels
2629 compute the address as an offset from the GOT, whose base is in
2630 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2631 differentiate them from global data objects. The returned
2632 address is the PIC reg + an unspec constant.
2634 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2635 reg also appears in the address. */
2638 legitimize_pic_address (orig, reg)
2646 if (GET_CODE (addr) == LABEL_REF
2647 || (GET_CODE (addr) == SYMBOL_REF
2648 && (CONSTANT_POOL_ADDRESS_P (addr)
2649 || SYMBOL_REF_FLAG (addr))))
2651 /* This symbol may be referenced via a displacement from the PIC
2652 base address (@GOTOFF). */
2654 current_function_uses_pic_offset_table = 1;
2655 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2656 new = gen_rtx_CONST (Pmode, new);
2657 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2661 emit_move_insn (reg, new);
2665 else if (GET_CODE (addr) == SYMBOL_REF)
2667 /* This symbol must be referenced via a load from the
2668 Global Offset Table (@GOT). */
2670 current_function_uses_pic_offset_table = 1;
2671 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2672 new = gen_rtx_CONST (Pmode, new);
2673 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2674 new = gen_rtx_MEM (Pmode, new);
2675 RTX_UNCHANGING_P (new) = 1;
2676 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2679 reg = gen_reg_rtx (Pmode);
2680 emit_move_insn (reg, new);
2685 if (GET_CODE (addr) == CONST)
2687 addr = XEXP (addr, 0);
2688 if (GET_CODE (addr) == UNSPEC)
2690 /* Check that the unspec is one of the ones we generate? */
2692 else if (GET_CODE (addr) != PLUS)
2695 if (GET_CODE (addr) == PLUS)
2697 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2699 /* Check first to see if this is a constant offset from a @GOTOFF
2700 symbol reference. */
2701 if ((GET_CODE (op0) == LABEL_REF
2702 || (GET_CODE (op0) == SYMBOL_REF
2703 && (CONSTANT_POOL_ADDRESS_P (op0)
2704 || SYMBOL_REF_FLAG (op0))))
2705 && GET_CODE (op1) == CONST_INT)
2707 current_function_uses_pic_offset_table = 1;
2708 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2709 new = gen_rtx_PLUS (Pmode, new, op1);
2710 new = gen_rtx_CONST (Pmode, new);
2711 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2715 emit_move_insn (reg, new);
2721 base = legitimize_pic_address (XEXP (addr, 0), reg);
2722 new = legitimize_pic_address (XEXP (addr, 1),
2723 base == reg ? NULL_RTX : reg);
2725 if (GET_CODE (new) == CONST_INT)
2726 new = plus_constant (base, INTVAL (new));
2729 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2731 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2732 new = XEXP (new, 1);
2734 new = gen_rtx_PLUS (Pmode, base, new);
2742 /* Try machine-dependent ways of modifying an illegitimate address
2743 to be legitimate. If we find one, return the new, valid address.
2744 This macro is used in only one place: `memory_address' in explow.c.
2746 OLDX is the address as it was before break_out_memory_refs was called.
2747 In some cases it is useful to look at this to decide what needs to be done.
2749 MODE and WIN are passed so that this macro can use
2750 GO_IF_LEGITIMATE_ADDRESS.
2752 It is always safe for this macro to do nothing. It exists to recognize
2753 opportunities to optimize the output.
2755 For the 80386, we handle X+REG by loading X into a register R and
2756 using R+REG. R will go in a general reg and indexing will be used.
2757 However, if REG is a broken-out memory address or multiplication,
2758 nothing needs to be done because REG can certainly go in a general reg.
2760 When -fpic is used, special handling is needed for symbolic references.
2761 See comments by legitimize_pic_address in i386.c for details. */
2764 legitimize_address (x, oldx, mode)
2766 register rtx oldx ATTRIBUTE_UNUSED;
2767 enum machine_mode mode;
2772 if (TARGET_DEBUG_ADDR)
2774 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2775 GET_MODE_NAME (mode));
2779 if (flag_pic && SYMBOLIC_CONST (x))
2780 return legitimize_pic_address (x, 0);
2782 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2783 if (GET_CODE (x) == ASHIFT
2784 && GET_CODE (XEXP (x, 1)) == CONST_INT
2785 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2788 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2789 GEN_INT (1 << log));
2792 if (GET_CODE (x) == PLUS)
2794 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2796 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2797 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2798 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2801 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2802 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2803 GEN_INT (1 << log));
2806 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2807 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2808 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2811 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2812 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2813 GEN_INT (1 << log));
2816 /* Put multiply first if it isn't already. */
2817 if (GET_CODE (XEXP (x, 1)) == MULT)
2819 rtx tmp = XEXP (x, 0);
2820 XEXP (x, 0) = XEXP (x, 1);
2825 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2826 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2827 created by virtual register instantiation, register elimination, and
2828 similar optimizations. */
2829 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2832 x = gen_rtx_PLUS (Pmode,
2833 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2834 XEXP (XEXP (x, 1), 0)),
2835 XEXP (XEXP (x, 1), 1));
2839 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2840 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2841 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2842 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2843 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2844 && CONSTANT_P (XEXP (x, 1)))
2847 rtx other = NULL_RTX;
2849 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2851 constant = XEXP (x, 1);
2852 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2854 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2856 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2857 other = XEXP (x, 1);
2865 x = gen_rtx_PLUS (Pmode,
2866 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2867 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2868 plus_constant (other, INTVAL (constant)));
2872 if (changed && legitimate_address_p (mode, x, FALSE))
2875 if (GET_CODE (XEXP (x, 0)) == MULT)
2878 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2881 if (GET_CODE (XEXP (x, 1)) == MULT)
2884 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2888 && GET_CODE (XEXP (x, 1)) == REG
2889 && GET_CODE (XEXP (x, 0)) == REG)
2892 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2895 x = legitimize_pic_address (x, 0);
2898 if (changed && legitimate_address_p (mode, x, FALSE))
2901 if (GET_CODE (XEXP (x, 0)) == REG)
2903 register rtx temp = gen_reg_rtx (Pmode);
2904 register rtx val = force_operand (XEXP (x, 1), temp);
2906 emit_move_insn (temp, val);
2912 else if (GET_CODE (XEXP (x, 1)) == REG)
2914 register rtx temp = gen_reg_rtx (Pmode);
2915 register rtx val = force_operand (XEXP (x, 0), temp);
2917 emit_move_insn (temp, val);
2927 /* Print an integer constant expression in assembler syntax. Addition
2928 and subtraction are the only arithmetic that may appear in these
2929 expressions. FILE is the stdio stream to write to, X is the rtx, and
2930 CODE is the operand print code from the output string. */
2933 output_pic_addr_const (file, x, code)
2940 switch (GET_CODE (x))
2950 assemble_name (file, XSTR (x, 0));
2951 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2952 fputs ("@PLT", file);
2959 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2960 assemble_name (asm_out_file, buf);
2964 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2968 /* This used to output parentheses around the expression,
2969 but that does not work on the 386 (either ATT or BSD assembler). */
2970 output_pic_addr_const (file, XEXP (x, 0), code);
2974 if (GET_MODE (x) == VOIDmode)
2976 /* We can use %d if the number is <32 bits and positive. */
2977 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2978 fprintf (file, "0x%lx%08lx",
2979 (unsigned long) CONST_DOUBLE_HIGH (x),
2980 (unsigned long) CONST_DOUBLE_LOW (x));
2982 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2985 /* We can't handle floating point constants;
2986 PRINT_OPERAND must handle them. */
2987 output_operand_lossage ("floating constant misused");
2991 /* Some assemblers need integer constants to appear first. */
2992 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2994 output_pic_addr_const (file, XEXP (x, 0), code);
2996 output_pic_addr_const (file, XEXP (x, 1), code);
2998 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3000 output_pic_addr_const (file, XEXP (x, 1), code);
3002 output_pic_addr_const (file, XEXP (x, 0), code);
3009 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3010 output_pic_addr_const (file, XEXP (x, 0), code);
3012 output_pic_addr_const (file, XEXP (x, 1), code);
3013 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3017 if (XVECLEN (x, 0) != 1)
3019 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3020 switch (XINT (x, 1))
3023 fputs ("@GOT", file);
3026 fputs ("@GOTOFF", file);
3029 fputs ("@PLT", file);
3032 output_operand_lossage ("invalid UNSPEC as operand");
3038 output_operand_lossage ("invalid expression as operand");
3042 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3043 We need to handle our special PIC relocations. */
3046 i386_dwarf_output_addr_const (file, x)
3050 fprintf (file, "%s", INT_ASM_OP);
3052 output_pic_addr_const (file, x, '\0');
3054 output_addr_const (file, x);
3058 /* In the name of slightly smaller debug output, and to cater to
3059 general assembler losage, recognize PIC+GOTOFF and turn it back
3060 into a direct symbol reference. */
3063 i386_simplify_dwarf_addr (orig_x)
3068 if (GET_CODE (x) != PLUS
3069 || GET_CODE (XEXP (x, 0)) != REG
3070 || GET_CODE (XEXP (x, 1)) != CONST)
3073 x = XEXP (XEXP (x, 1), 0);
3074 if (GET_CODE (x) == UNSPEC
3075 && (XINT (x, 1) == 6
3076 || XINT (x, 1) == 7))
3077 return XVECEXP (x, 0, 0);
3079 if (GET_CODE (x) == PLUS
3080 && GET_CODE (XEXP (x, 0)) == UNSPEC
3081 && GET_CODE (XEXP (x, 1)) == CONST_INT
3082 && (XINT (XEXP (x, 0), 1) == 6
3083 || XINT (XEXP (x, 0), 1) == 7))
3084 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3090 put_condition_code (code, mode, reverse, fp, file)
3092 enum machine_mode mode;
3099 code = reverse_condition (code);
3110 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3115 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3116 Those same assemblers have the same but opposite losage on cmov. */
3119 suffix = fp ? "nbe" : "a";
3122 if (mode == CCNOmode || mode == CCGOCmode)
3124 else if (mode == CCmode || mode == CCGCmode)
3135 if (mode == CCNOmode || mode == CCGOCmode)
3137 else if (mode == CCmode || mode == CCGCmode)
3146 suffix = fp ? "nb" : "ae";
3149 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3167 fputs (suffix, file);
3171 print_reg (x, code, file)
3176 if (REGNO (x) == ARG_POINTER_REGNUM
3177 || REGNO (x) == FRAME_POINTER_REGNUM
3178 || REGNO (x) == FLAGS_REG
3179 || REGNO (x) == FPSR_REG)
3182 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3187 else if (code == 'b')
3189 else if (code == 'k')
3191 else if (code == 'y')
3193 else if (code == 'h')
3195 else if (code == 'm' || MMX_REG_P (x))
3198 code = GET_MODE_SIZE (GET_MODE (x));
3203 fputs (hi_reg_name[REGNO (x)], file);
3206 if (STACK_TOP_P (x))
3208 fputs ("st(0)", file);
3220 fputs (hi_reg_name[REGNO (x)], file);
3223 fputs (qi_reg_name[REGNO (x)], file);
3226 fputs (qi_high_reg_name[REGNO (x)], file);
3234 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3235 C -- print opcode suffix for set/cmov insn.
3236 c -- like C, but print reversed condition
3237 R -- print the prefix for register names.
3238 z -- print the opcode suffix for the size of the current operand.
3239 * -- print a star (in certain assembler syntax)
3240 A -- print an absolute memory reference.
3241 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3242 s -- print a shift double count, followed by the assemblers argument
3244 b -- print the QImode name of the register for the indicated operand.
3245 %b0 would print %al if operands[0] is reg 0.
3246 w -- likewise, print the HImode name of the register.
3247 k -- likewise, print the SImode name of the register.
3248 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3249 y -- print "st(0)" instead of "st" as a register.
3250 m -- print "st(n)" as an mmx register. */
3253 print_operand (file, x, code)
3263 if (ASSEMBLER_DIALECT == 0)
3268 if (ASSEMBLER_DIALECT == 0)
3270 else if (ASSEMBLER_DIALECT == 1)
3272 /* Intel syntax. For absolute addresses, registers should not
3273 be surrounded by braces. */
3274 if (GET_CODE (x) != REG)
3277 PRINT_OPERAND (file, x, 0);
3283 PRINT_OPERAND (file, x, 0);
3288 if (ASSEMBLER_DIALECT == 0)
3293 if (ASSEMBLER_DIALECT == 0)
3298 if (ASSEMBLER_DIALECT == 0)
3303 if (ASSEMBLER_DIALECT == 0)
3308 if (ASSEMBLER_DIALECT == 0)
3313 if (ASSEMBLER_DIALECT == 0)
3318 /* 387 opcodes don't get size suffixes if the operands are
3321 if (STACK_REG_P (x))
3324 /* this is the size of op from size of operand */
3325 switch (GET_MODE_SIZE (GET_MODE (x)))
3328 #ifdef HAVE_GAS_FILDS_FISTS
3334 if (GET_MODE (x) == SFmode)
3349 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3351 #ifdef GAS_MNEMONICS
3377 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3379 PRINT_OPERAND (file, x, 0);
3385 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3388 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3391 /* Like above, but reverse condition */
3393 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3402 sprintf (str, "invalid operand code `%c'", code);
3403 output_operand_lossage (str);
3408 if (GET_CODE (x) == REG)
3410 PRINT_REG (x, code, file);
3413 else if (GET_CODE (x) == MEM)
3415 /* No `byte ptr' prefix for call instructions. */
3416 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3419 switch (GET_MODE_SIZE (GET_MODE (x)))
3421 case 1: size = "BYTE"; break;
3422 case 2: size = "WORD"; break;
3423 case 4: size = "DWORD"; break;
3424 case 8: size = "QWORD"; break;
3425 case 12: size = "XWORD"; break;
3426 case 16: size = "XMMWORD"; break;
3431 /* Check for explicit size override (codes 'b', 'w' and 'k') */
3434 else if (code == 'w')
3436 else if (code == 'k')
3440 fputs (" PTR ", file);
3444 if (flag_pic && CONSTANT_ADDRESS_P (x))
3445 output_pic_addr_const (file, x, code);
3450 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3455 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3456 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3458 if (ASSEMBLER_DIALECT == 0)
3460 fprintf (file, "0x%lx", l);
3463 /* These float cases don't actually occur as immediate operands. */
3464 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3469 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3470 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3471 fprintf (file, "%s", dstr);
3474 else if (GET_CODE (x) == CONST_DOUBLE
3475 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
3480 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3481 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3482 fprintf (file, "%s", dstr);
3488 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3490 if (ASSEMBLER_DIALECT == 0)
3493 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3494 || GET_CODE (x) == LABEL_REF)
3496 if (ASSEMBLER_DIALECT == 0)
3499 fputs ("OFFSET FLAT:", file);
3502 if (GET_CODE (x) == CONST_INT)
3503 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3505 output_pic_addr_const (file, x, code);
3507 output_addr_const (file, x);
3511 /* Print a memory operand whose address is ADDR. */
3514 print_operand_address (file, addr)
3518 struct ix86_address parts;
3519 rtx base, index, disp;
3522 if (! ix86_decompose_address (addr, &parts))
3526 index = parts.index;
3528 scale = parts.scale;
3530 if (!base && !index)
3532 /* Displacement only requires special attention. */
3534 if (GET_CODE (disp) == CONST_INT)
3536 if (ASSEMBLER_DIALECT != 0)
3538 if (USER_LABEL_PREFIX[0] == 0)
3540 fputs ("ds:", file);
3542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3545 output_pic_addr_const (file, addr, 0);
3547 output_addr_const (file, addr);
3551 if (ASSEMBLER_DIALECT == 0)
3556 output_pic_addr_const (file, disp, 0);
3557 else if (GET_CODE (disp) == LABEL_REF)
3558 output_asm_label (disp);
3560 output_addr_const (file, disp);
3565 PRINT_REG (base, 0, file);
3569 PRINT_REG (index, 0, file);
3571 fprintf (file, ",%d", scale);
3577 rtx offset = NULL_RTX;
3581 /* Pull out the offset of a symbol; print any symbol itself. */
3582 if (GET_CODE (disp) == CONST
3583 && GET_CODE (XEXP (disp, 0)) == PLUS
3584 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3586 offset = XEXP (XEXP (disp, 0), 1);
3587 disp = gen_rtx_CONST (VOIDmode,
3588 XEXP (XEXP (disp, 0), 0));
3592 output_pic_addr_const (file, disp, 0);
3593 else if (GET_CODE (disp) == LABEL_REF)
3594 output_asm_label (disp);
3595 else if (GET_CODE (disp) == CONST_INT)
3598 output_addr_const (file, disp);
3604 PRINT_REG (base, 0, file);
3607 if (INTVAL (offset) >= 0)
3609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3613 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3620 PRINT_REG (index, 0, file);
3622 fprintf (file, "*%d", scale);
3629 /* Split one or more DImode RTL references into pairs of SImode
3630 references. The RTL can be REG, offsettable MEM, integer constant, or
3631 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3632 split and "num" is its length. lo_half and hi_half are output arrays
3633 that parallel "operands". */
3636 split_di (operands, num, lo_half, hi_half)
3639 rtx lo_half[], hi_half[];
3643 rtx op = operands[num];
3644 if (CONSTANT_P (op))
3645 split_double (op, &lo_half[num], &hi_half[num]);
3646 else if (! reload_completed)
3648 lo_half[num] = gen_lowpart (SImode, op);
3649 hi_half[num] = gen_highpart (SImode, op);
3651 else if (GET_CODE (op) == REG)
3653 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3654 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3656 else if (offsettable_memref_p (op))
3658 rtx lo_addr = XEXP (op, 0);
3659 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3660 lo_half[num] = change_address (op, SImode, lo_addr);
3661 hi_half[num] = change_address (op, SImode, hi_addr);
3668 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3669 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3670 is the expression of the binary operation. The output may either be
3671 emitted here, or returned to the caller, like all output_* functions.
3673 There is no guarantee that the operands are the same mode, as they
3674 might be within FLOAT or FLOAT_EXTEND expressions. */
3676 #ifndef SYSV386_COMPAT
3677 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3678 wants to fix the assemblers because that causes incompatibility
3679 with gcc. No-one wants to fix gcc because that causes
3680 incompatibility with assemblers... You can use the option of
3681 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3682 #define SYSV386_COMPAT 1
3686 output_387_binary_op (insn, operands)
3690 static char buf[30];
3693 #ifdef ENABLE_CHECKING
3694 /* Even if we do not want to check the inputs, this documents input
3695 constraints. Which helps in understanding the following code. */
3696 if (STACK_REG_P (operands[0])
3697 && ((REG_P (operands[1])
3698 && REGNO (operands[0]) == REGNO (operands[1])
3699 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3700 || (REG_P (operands[2])
3701 && REGNO (operands[0]) == REGNO (operands[2])
3702 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3703 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3709 switch (GET_CODE (operands[3]))
3712 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3713 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3720 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3721 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3728 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3729 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3736 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3737 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3749 switch (GET_CODE (operands[3]))
3753 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3755 rtx temp = operands[2];
3756 operands[2] = operands[1];
3760 /* know operands[0] == operands[1]. */
3762 if (GET_CODE (operands[2]) == MEM)
3768 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3770 if (STACK_TOP_P (operands[0]))
3771 /* How is it that we are storing to a dead operand[2]?
3772 Well, presumably operands[1] is dead too. We can't
3773 store the result to st(0) as st(0) gets popped on this
3774 instruction. Instead store to operands[2] (which I
3775 think has to be st(1)). st(1) will be popped later.
3776 gcc <= 2.8.1 didn't have this check and generated
3777 assembly code that the Unixware assembler rejected. */
3778 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3780 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3784 if (STACK_TOP_P (operands[0]))
3785 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3787 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3792 if (GET_CODE (operands[1]) == MEM)
3798 if (GET_CODE (operands[2]) == MEM)
3804 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3807 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3808 derived assemblers, confusingly reverse the direction of
3809 the operation for fsub{r} and fdiv{r} when the
3810 destination register is not st(0). The Intel assembler
3811 doesn't have this brain damage. Read !SYSV386_COMPAT to
3812 figure out what the hardware really does. */
3813 if (STACK_TOP_P (operands[0]))
3814 p = "{p\t%0, %2|rp\t%2, %0}";
3816 p = "{rp\t%2, %0|p\t%0, %2}";
3818 if (STACK_TOP_P (operands[0]))
3819 /* As above for fmul/fadd, we can't store to st(0). */
3820 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3822 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3827 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3830 if (STACK_TOP_P (operands[0]))
3831 p = "{rp\t%0, %1|p\t%1, %0}";
3833 p = "{p\t%1, %0|rp\t%0, %1}";
3835 if (STACK_TOP_P (operands[0]))
3836 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3838 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3843 if (STACK_TOP_P (operands[0]))
3845 if (STACK_TOP_P (operands[1]))
3846 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3848 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3851 else if (STACK_TOP_P (operands[1]))
3854 p = "{\t%1, %0|r\t%0, %1}";
3856 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3862 p = "{r\t%2, %0|\t%0, %2}";
3864 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3877 /* Output code for INSN to convert a float to a signed int. OPERANDS
3878 are the insn operands. The output may be [HSD]Imode and the input
3879 operand may be [SDX]Fmode. */
3882 output_fix_trunc (insn, operands)
3886 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3887 int dimode_p = GET_MODE (operands[0]) == DImode;
3890 /* Jump through a hoop or two for DImode, since the hardware has no
3891 non-popping instruction. We used to do this a different way, but
3892 that was somewhat fragile and broke with post-reload splitters. */
3893 if (dimode_p && !stack_top_dies)
3894 output_asm_insn ("fld\t%y1", operands);
3896 if (! STACK_TOP_P (operands[1]))
3899 xops[0] = GEN_INT (12);
3900 xops[1] = adj_offsettable_operand (operands[2], 1);
3901 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3903 xops[2] = operands[0];
3904 if (GET_CODE (operands[0]) != MEM)
3905 xops[2] = operands[3];
3907 output_asm_insn ("fnstcw\t%2", operands);
3908 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3909 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3910 output_asm_insn ("fldcw\t%2", operands);
3911 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3913 if (stack_top_dies || dimode_p)
3914 output_asm_insn ("fistp%z2\t%2", xops);
3916 output_asm_insn ("fist%z2\t%2", xops);
3918 output_asm_insn ("fldcw\t%2", operands);
3920 if (GET_CODE (operands[0]) != MEM)
3924 split_di (operands+0, 1, xops+0, xops+1);
3925 split_di (operands+3, 1, xops+2, xops+3);
3926 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3927 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3929 else if (GET_MODE (operands[0]) == SImode)
3930 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3932 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3938 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3939 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3940 when fucom should be used. */
3943 output_fp_compare (insn, operands, eflags_p, unordered_p)
3946 int eflags_p, unordered_p;
3949 rtx cmp_op0 = operands[0];
3950 rtx cmp_op1 = operands[1];
3955 cmp_op1 = operands[2];
3958 if (! STACK_TOP_P (cmp_op0))
3961 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3963 if (STACK_REG_P (cmp_op1)
3965 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3966 && REGNO (cmp_op1) != FIRST_STACK_REG)
3968 /* If both the top of the 387 stack dies, and the other operand
3969 is also a stack register that dies, then this must be a
3970 `fcompp' float compare */
3974 /* There is no double popping fcomi variant. Fortunately,
3975 eflags is immune from the fstp's cc clobbering. */
3977 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3979 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3987 return "fucompp\n\tfnstsw\t%0";
3989 return "fcompp\n\tfnstsw\t%0";
4002 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
4004 static const char * const alt[24] =
4016 "fcomi\t{%y1, %0|%0, %y1}",
4017 "fcomip\t{%y1, %0|%0, %y1}",
4018 "fucomi\t{%y1, %0|%0, %y1}",
4019 "fucomip\t{%y1, %0|%0, %y1}",
4026 "fcom%z2\t%y2\n\tfnstsw\t%0",
4027 "fcomp%z2\t%y2\n\tfnstsw\t%0",
4028 "fucom%z2\t%y2\n\tfnstsw\t%0",
4029 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4031 "ficom%z2\t%y2\n\tfnstsw\t%0",
4032 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4040 mask = eflags_p << 3;
4041 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4042 mask |= unordered_p << 1;
4043 mask |= stack_top_dies;
4055 /* Output assembler code to FILE to initialize basic-block profiling.
4057 If profile_block_flag == 2
4059 Output code to call the subroutine `__bb_init_trace_func'
4060 and pass two parameters to it. The first parameter is
4061 the address of a block allocated in the object module.
4062 The second parameter is the number of the first basic block
4065 The name of the block is a local symbol made with this statement:
4067 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4069 Of course, since you are writing the definition of
4070 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4071 can take a short cut in the definition of this macro and use the
4072 name that you know will result.
4074 The number of the first basic block of the function is
4075 passed to the macro in BLOCK_OR_LABEL.
4077 If described in a virtual assembler language the code to be
4081 parameter2 <- BLOCK_OR_LABEL
4082 call __bb_init_trace_func
4084 else if profile_block_flag != 0
4086 Output code to call the subroutine `__bb_init_func'
4087 and pass one single parameter to it, which is the same
4088 as the first parameter to `__bb_init_trace_func'.
4090 The first word of this parameter is a flag which will be nonzero if
4091 the object module has already been initialized. So test this word
4092 first, and do not call `__bb_init_func' if the flag is nonzero.
4093 Note: When profile_block_flag == 2 the test need not be done
4094 but `__bb_init_trace_func' *must* be called.
4096 BLOCK_OR_LABEL may be used to generate a label number as a
4097 branch destination in case `__bb_init_func' will not be called.
4099 If described in a virtual assembler language the code to be
4110 ix86_output_function_block_profiler (file, block_or_label)
4114 static int num_func = 0;
4116 char block_table[80], false_label[80];
4118 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4120 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4121 xops[5] = stack_pointer_rtx;
4122 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4124 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4126 switch (profile_block_flag)
4129 xops[2] = GEN_INT (block_or_label);
4130 xops[3] = gen_rtx_MEM (Pmode,
4131 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4132 xops[6] = GEN_INT (8);
4134 output_asm_insn ("push{l}\t%2", xops);
4136 output_asm_insn ("push{l}\t%1", xops);
4139 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4140 output_asm_insn ("push{l}\t%7", xops);
4142 output_asm_insn ("call\t%P3", xops);
4143 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4147 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4149 xops[0] = const0_rtx;
4150 xops[2] = gen_rtx_MEM (Pmode,
4151 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4152 xops[3] = gen_rtx_MEM (Pmode,
4153 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4154 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4155 xops[6] = GEN_INT (4);
4157 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4159 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4160 output_asm_insn ("jne\t%2", xops);
4163 output_asm_insn ("push{l}\t%1", xops);
4166 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4167 output_asm_insn ("push{l}\t%7", xops);
4169 output_asm_insn ("call\t%P3", xops);
4170 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4171 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4177 /* Output assembler code to FILE to increment a counter associated
4178 with basic block number BLOCKNO.
4180 If profile_block_flag == 2
4182 Output code to initialize the global structure `__bb' and
4183 call the function `__bb_trace_func' which will increment the
4186 `__bb' consists of two words. In the first word the number
4187 of the basic block has to be stored. In the second word
4188 the address of a block allocated in the object module
4191 The basic block number is given by BLOCKNO.
4193 The address of the block is given by the label created with
4195 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4197 by FUNCTION_BLOCK_PROFILER.
4199 Of course, since you are writing the definition of
4200 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4201 can take a short cut in the definition of this macro and use the
4202 name that you know will result.
4204 If described in a virtual assembler language the code to be
4207 move BLOCKNO -> (__bb)
4208 move LPBX0 -> (__bb+4)
4209 call __bb_trace_func
4211 Note that function `__bb_trace_func' must not change the
4212 machine state, especially the flag register. To grant
4213 this, you must output code to save and restore registers
4214 either in this macro or in the macros MACHINE_STATE_SAVE
4215 and MACHINE_STATE_RESTORE. The last two macros will be
4216 used in the function `__bb_trace_func', so you must make
4217 sure that the function prologue does not change any
4218 register prior to saving it with MACHINE_STATE_SAVE.
4220 else if profile_block_flag != 0
4222 Output code to increment the counter directly.
4223 Basic blocks are numbered separately from zero within each
4224 compiled object module. The count associated with block number
4225 BLOCKNO is at index BLOCKNO in an array of words; the name of
4226 this array is a local symbol made with this statement:
4228 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4230 Of course, since you are writing the definition of
4231 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4232 can take a short cut in the definition of this macro and use the
4233 name that you know will result.
4235 If described in a virtual assembler language the code to be
4238 inc (LPBX2+4*BLOCKNO)
4242 ix86_output_block_profiler (file, blockno)
4243 FILE *file ATTRIBUTE_UNUSED;
4246 rtx xops[8], cnt_rtx;
4248 char *block_table = counts;
4250 switch (profile_block_flag)
4253 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4255 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4256 xops[2] = GEN_INT (blockno);
4257 xops[3] = gen_rtx_MEM (Pmode,
4258 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4259 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4260 xops[5] = plus_constant (xops[4], 4);
4261 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4262 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4264 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4266 output_asm_insn ("pushf", xops);
4267 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4270 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4271 output_asm_insn ("push{l}\t%7", xops);
4272 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4273 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4274 output_asm_insn ("pop{l}\t%7", xops);
4277 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4278 output_asm_insn ("call\t%P3", xops);
4279 output_asm_insn ("popf", xops);
4284 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4285 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4286 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4289 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4292 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4294 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4295 output_asm_insn ("inc{l}\t%0", xops);
4302 ix86_expand_move (mode, operands)
4303 enum machine_mode mode;
4306 int strict = (reload_in_progress || reload_completed);
4309 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4311 /* Emit insns to move operands[1] into operands[0]. */
4313 if (GET_CODE (operands[0]) == MEM)
4314 operands[1] = force_reg (Pmode, operands[1]);
4317 rtx temp = operands[0];
4318 if (GET_CODE (temp) != REG)
4319 temp = gen_reg_rtx (Pmode);
4320 temp = legitimize_pic_address (operands[1], temp);
4321 if (temp == operands[0])
4328 if (GET_CODE (operands[0]) == MEM
4329 && (GET_MODE (operands[0]) == QImode
4330 || !push_operand (operands[0], mode))
4331 && GET_CODE (operands[1]) == MEM)
4332 operands[1] = force_reg (mode, operands[1]);
4334 if (push_operand (operands[0], mode)
4335 && ! general_no_elim_operand (operands[1], mode))
4336 operands[1] = copy_to_mode_reg (mode, operands[1]);
4338 if (FLOAT_MODE_P (mode))
4340 /* If we are loading a floating point constant to a register,
4341 force the value to memory now, since we'll get better code
4342 out the back end. */
4346 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4347 && register_operand (operands[0], mode))
4348 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4352 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4357 /* Attempt to expand a binary operator. Make the expansion closer to the
4358 actual machine, then just general_operand, which will allow 3 separate
4359 memory references (one output, two input) in a single insn. */
4362 ix86_expand_binary_operator (code, mode, operands)
4364 enum machine_mode mode;
4367 int matching_memory;
4368 rtx src1, src2, dst, op, clob;
4374 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4375 if (GET_RTX_CLASS (code) == 'c'
4376 && (rtx_equal_p (dst, src2)
4377 || immediate_operand (src1, mode)))
4384 /* If the destination is memory, and we do not have matching source
4385 operands, do things in registers. */
4386 matching_memory = 0;
4387 if (GET_CODE (dst) == MEM)
4389 if (rtx_equal_p (dst, src1))
4390 matching_memory = 1;
4391 else if (GET_RTX_CLASS (code) == 'c'
4392 && rtx_equal_p (dst, src2))
4393 matching_memory = 2;
4395 dst = gen_reg_rtx (mode);
4398 /* Both source operands cannot be in memory. */
4399 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4401 if (matching_memory != 2)
4402 src2 = force_reg (mode, src2);
4404 src1 = force_reg (mode, src1);
4407 /* If the operation is not commutable, source 1 cannot be a constant
4408 or non-matching memory. */
4409 if ((CONSTANT_P (src1)
4410 || (!matching_memory && GET_CODE (src1) == MEM))
4411 && GET_RTX_CLASS (code) != 'c')
4412 src1 = force_reg (mode, src1);
4414 /* If optimizing, copy to regs to improve CSE */
4415 if (optimize && ! no_new_pseudos)
4417 if (GET_CODE (dst) == MEM)
4418 dst = gen_reg_rtx (mode);
4419 if (GET_CODE (src1) == MEM)
4420 src1 = force_reg (mode, src1);
4421 if (GET_CODE (src2) == MEM)
4422 src2 = force_reg (mode, src2);
4425 /* Emit the instruction. */
4427 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4428 if (reload_in_progress)
4430 /* Reload doesn't know about the flags register, and doesn't know that
4431 it doesn't want to clobber it. We can only do this with PLUS. */
4438 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4439 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4442 /* Fix up the destination if needed. */
4443 if (dst != operands[0])
4444 emit_move_insn (operands[0], dst);
4447 /* Return TRUE or FALSE depending on whether the binary operator meets the
4448 appropriate constraints. */
4451 ix86_binary_operator_ok (code, mode, operands)
4453 enum machine_mode mode ATTRIBUTE_UNUSED;
4456 /* Both source operands cannot be in memory. */
4457 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4459 /* If the operation is not commutable, source 1 cannot be a constant. */
4460 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4462 /* If the destination is memory, we must have a matching source operand. */
4463 if (GET_CODE (operands[0]) == MEM
4464 && ! (rtx_equal_p (operands[0], operands[1])
4465 || (GET_RTX_CLASS (code) == 'c'
4466 && rtx_equal_p (operands[0], operands[2]))))
4468 /* If the operation is not commutable and the source 1 is memory, we must
4469 have a matching destionation. */
4470 if (GET_CODE (operands[1]) == MEM
4471 && GET_RTX_CLASS (code) != 'c'
4472 && ! rtx_equal_p (operands[0], operands[1]))
4477 /* Attempt to expand a unary operator. Make the expansion closer to the
4478 actual machine, then just general_operand, which will allow 2 separate
4479 memory references (one output, one input) in a single insn. */
4482 ix86_expand_unary_operator (code, mode, operands)
4484 enum machine_mode mode;
4487 int matching_memory;
4488 rtx src, dst, op, clob;
4493 /* If the destination is memory, and we do not have matching source
4494 operands, do things in registers. */
4495 matching_memory = 0;
4496 if (GET_CODE (dst) == MEM)
4498 if (rtx_equal_p (dst, src))
4499 matching_memory = 1;
4501 dst = gen_reg_rtx (mode);
4504 /* When source operand is memory, destination must match. */
4505 if (!matching_memory && GET_CODE (src) == MEM)
4506 src = force_reg (mode, src);
4508 /* If optimizing, copy to regs to improve CSE */
4509 if (optimize && ! no_new_pseudos)
4511 if (GET_CODE (dst) == MEM)
4512 dst = gen_reg_rtx (mode);
4513 if (GET_CODE (src) == MEM)
4514 src = force_reg (mode, src);
4517 /* Emit the instruction. */
4519 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4520 if (reload_in_progress || code == NOT)
4522 /* Reload doesn't know about the flags register, and doesn't know that
4523 it doesn't want to clobber it. */
4530 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4531 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4534 /* Fix up the destination if needed. */
4535 if (dst != operands[0])
4536 emit_move_insn (operands[0], dst);
4539 /* Return TRUE or FALSE depending on whether the unary operator meets the
4540 appropriate constraints. */
4543 ix86_unary_operator_ok (code, mode, operands)
4544 enum rtx_code code ATTRIBUTE_UNUSED;
4545 enum machine_mode mode ATTRIBUTE_UNUSED;
4546 rtx operands[2] ATTRIBUTE_UNUSED;
4548 /* If one of operands is memory, source and destination must match. */
4549 if ((GET_CODE (operands[0]) == MEM
4550 || GET_CODE (operands[1]) == MEM)
4551 && ! rtx_equal_p (operands[0], operands[1]))
4556 /* Return TRUE or FALSE depending on whether the first SET in INSN
4557 has source and destination with matching CC modes, and that the
4558 CC mode is at least as constrained as REQ_MODE. */
4561 ix86_match_ccmode (insn, req_mode)
4563 enum machine_mode req_mode;
4566 enum machine_mode set_mode;
4568 set = PATTERN (insn);
4569 if (GET_CODE (set) == PARALLEL)
4570 set = XVECEXP (set, 0, 0);
4571 if (GET_CODE (set) != SET)
4573 if (GET_CODE (SET_SRC (set)) != COMPARE)
4576 set_mode = GET_MODE (SET_DEST (set));
4580 if (req_mode != CCNOmode
4581 && (req_mode != CCmode
4582 || XEXP (SET_SRC (set), 1) != const0_rtx))
4586 if (req_mode == CCGCmode)
4590 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4594 if (req_mode == CCZmode)
4604 return (GET_MODE (SET_SRC (set)) == set_mode);
4607 /* Produce an unsigned comparison for a given signed comparison. */
4609 static enum rtx_code
4610 unsigned_comparison (code)
4642 /* Generate insn patterns to do an integer compare of OPERANDS. */
4645 ix86_expand_int_compare (code, op0, op1)
4649 enum machine_mode cmpmode;
4652 cmpmode = SELECT_CC_MODE (code, op0, op1);
4653 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4655 /* This is very simple, but making the interface the same as in the
4656 FP case makes the rest of the code easier. */
4657 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4658 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4660 /* Return the test that should be put into the flags user, i.e.
4661 the bcc, scc, or cmov instruction. */
4662 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4665 /* Figure out whether to use ordered or unordered fp comparisons.
4666 Return the appropriate mode to use. */
4669 ix86_fp_compare_mode (code)
4677 /* When not doing IEEE compliant compares, fault on NaNs. */
4678 unordered = (TARGET_IEEE_FP != 0);
4681 case LT: case LE: case GT: case GE:
4685 case UNORDERED: case ORDERED:
4686 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4694 /* ??? If we knew whether invalid-operand exceptions were masked,
4695 we could rely on fcom to raise an exception and take care of
4696 NaNs. But we don't. We could know this from c99 math pragmas. */
4700 return unordered ? CCFPUmode : CCFPmode;
4704 ix86_cc_mode (code, op0, op1)
4708 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4709 return ix86_fp_compare_mode (code);
4712 /* Only zero flag is needed. */
4714 case NE: /* ZF!=0 */
4716 /* Codes needing carry flag. */
4717 case GEU: /* CF=0 */
4718 case GTU: /* CF=0 & ZF=0 */
4719 case LTU: /* CF=1 */
4720 case LEU: /* CF=1 | ZF=1 */
4722 /* Codes possibly doable only with sign flag when
4723 comparing against zero. */
4724 case GE: /* SF=OF or SF=0 */
4725 case LT: /* SF<>OF or SF=1 */
4726 if (op1 == const0_rtx)
4729 /* For other cases Carry flag is not required. */
4731 /* Codes doable only with sign flag when comparing
4732 against zero, but we miss jump instruction for it
4733 so we need to use relational tests agains overflow
4734 that thus needs to be zero. */
4735 case GT: /* ZF=0 & SF=OF */
4736 case LE: /* ZF=1 | SF<>OF */
4737 if (op1 == const0_rtx)
4746 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4749 ix86_use_fcomi_compare (code)
4752 return (TARGET_CMOVE
4753 && (code == ORDERED || code == UNORDERED
4754 /* All other unordered compares require checking
4755 multiple sets of bits. */
4756 || ix86_fp_compare_mode (code) == CCFPmode));
4759 /* Swap, force into registers, or otherwise massage the two operands
4760 to a fp comparison. The operands are updated in place; the new
4761 comparsion code is returned. */
4763 static enum rtx_code
4764 ix86_prepare_fp_compare_args (code, pop0, pop1)
4768 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4769 rtx op0 = *pop0, op1 = *pop1;
4770 enum machine_mode op_mode = GET_MODE (op0);
4772 /* All of the unordered compare instructions only work on registers.
4773 The same is true of the XFmode compare instructions. The same is
4774 true of the fcomi compare instructions. */
4776 if (fpcmp_mode == CCFPUmode
4777 || op_mode == XFmode
4778 || op_mode == TFmode
4779 || ix86_use_fcomi_compare (code))
4781 op0 = force_reg (op_mode, op0);
4782 op1 = force_reg (op_mode, op1);
4786 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4787 things around if they appear profitable, otherwise force op0
4790 if (standard_80387_constant_p (op0) == 0
4791 || (GET_CODE (op0) == MEM
4792 && ! (standard_80387_constant_p (op1) == 0
4793 || GET_CODE (op1) == MEM)))
4796 tmp = op0, op0 = op1, op1 = tmp;
4797 code = swap_condition (code);
4800 if (GET_CODE (op0) != REG)
4801 op0 = force_reg (op_mode, op0);
4803 if (CONSTANT_P (op1))
4805 if (standard_80387_constant_p (op1))
4806 op1 = force_reg (op_mode, op1);
4808 op1 = validize_mem (force_const_mem (op_mode, op1));
4817 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4820 ix86_expand_fp_compare (code, op0, op1, scratch)
4822 rtx op0, op1, scratch;
4824 enum machine_mode fpcmp_mode, intcmp_mode;
4827 fpcmp_mode = ix86_fp_compare_mode (code);
4828 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4830 /* %%% fcomi is probably always faster, even when dealing with memory,
4831 since compare-and-branch would be three insns instead of four. */
4832 if (ix86_use_fcomi_compare (code))
4834 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4835 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4838 /* The FP codes work out to act like unsigned. */
4839 code = unsigned_comparison (code);
4840 intcmp_mode = CCmode;
4844 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4847 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4848 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4849 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4851 if (fpcmp_mode == CCFPmode
4853 || code == UNORDERED)
4855 /* We have two options here -- use sahf, or testing bits of ah
4856 directly. On PPRO, they are equivalent, sahf being one byte
4857 smaller. On Pentium, sahf is non-pairable while test is UV
4860 if (TARGET_USE_SAHF || optimize_size)
4863 emit_insn (gen_x86_sahf_1 (scratch));
4865 /* The FP codes work out to act like unsigned. */
4866 code = unsigned_comparison (code);
4867 intcmp_mode = CCmode;
4872 * The numbers below correspond to the bits of the FPSW in AH.
4873 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4895 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4896 faster in all cases to just fall back on sahf. */
4923 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4924 intcmp_mode = CCNOmode;
4929 /* In the unordered case, we have to check C2 for NaN's, which
4930 doesn't happen to work out to anything nice combination-wise.
4931 So do some bit twiddling on the value we've got in AH to come
4932 up with an appropriate set of condition codes. */
4934 intcmp_mode = CCNOmode;
4938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4942 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4943 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4944 intcmp_mode = CCmode;
4948 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4952 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4953 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4954 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4955 intcmp_mode = CCmode;
4959 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4960 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4961 intcmp_mode = CCmode;
4965 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4966 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4971 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4975 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4983 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4984 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4988 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4989 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4990 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4991 intcmp_mode = CCmode;
4995 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4999 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
5003 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
5013 /* Return the test that should be put into the flags user, i.e.
5014 the bcc, scc, or cmov instruction. */
5015 return gen_rtx_fmt_ee (code, VOIDmode,
5016 gen_rtx_REG (intcmp_mode, FLAGS_REG),
5021 ix86_expand_compare (code)
5025 op0 = ix86_compare_op0;
5026 op1 = ix86_compare_op1;
5028 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5029 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
5031 ret = ix86_expand_int_compare (code, op0, op1);
5037 ix86_expand_branch (code, label)
5043 switch (GET_MODE (ix86_compare_op0))
5048 tmp = ix86_expand_compare (code);
5049 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5050 gen_rtx_LABEL_REF (VOIDmode, label),
5052 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5059 /* Don't expand the comparison early, so that we get better code
5060 when jump or whoever decides to reverse the comparison. */
5065 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5068 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5069 ix86_compare_op0, ix86_compare_op1);
5070 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5071 gen_rtx_LABEL_REF (VOIDmode, label),
5073 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5075 use_fcomi = ix86_use_fcomi_compare (code);
5076 vec = rtvec_alloc (3 + !use_fcomi);
5077 RTVEC_ELT (vec, 0) = tmp;
5079 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5081 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5084 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5086 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5091 /* Expand DImode branch into multiple compare+branch. */
5093 rtx lo[2], hi[2], label2;
5094 enum rtx_code code1, code2, code3;
5096 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5098 tmp = ix86_compare_op0;
5099 ix86_compare_op0 = ix86_compare_op1;
5100 ix86_compare_op1 = tmp;
5101 code = swap_condition (code);
5103 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5104 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5106 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5107 avoid two branches. This costs one extra insn, so disable when
5108 optimizing for size. */
5110 if ((code == EQ || code == NE)
5112 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5117 if (hi[1] != const0_rtx)
5118 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5119 NULL_RTX, 0, OPTAB_WIDEN);
5122 if (lo[1] != const0_rtx)
5123 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5124 NULL_RTX, 0, OPTAB_WIDEN);
5126 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5127 NULL_RTX, 0, OPTAB_WIDEN);
5129 ix86_compare_op0 = tmp;
5130 ix86_compare_op1 = const0_rtx;
5131 ix86_expand_branch (code, label);
5135 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5136 op1 is a constant and the low word is zero, then we can just
5137 examine the high word. */
5139 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5142 case LT: case LTU: case GE: case GEU:
5143 ix86_compare_op0 = hi[0];
5144 ix86_compare_op1 = hi[1];
5145 ix86_expand_branch (code, label);
5151 /* Otherwise, we need two or three jumps. */
5153 label2 = gen_label_rtx ();
5156 code2 = swap_condition (code);
5157 code3 = unsigned_condition (code);
5161 case LT: case GT: case LTU: case GTU:
5164 case LE: code1 = LT; code2 = GT; break;
5165 case GE: code1 = GT; code2 = LT; break;
5166 case LEU: code1 = LTU; code2 = GTU; break;
5167 case GEU: code1 = GTU; code2 = LTU; break;
5169 case EQ: code1 = NIL; code2 = NE; break;
5170 case NE: code2 = NIL; break;
5178 * if (hi(a) < hi(b)) goto true;
5179 * if (hi(a) > hi(b)) goto false;
5180 * if (lo(a) < lo(b)) goto true;
5184 ix86_compare_op0 = hi[0];
5185 ix86_compare_op1 = hi[1];
5188 ix86_expand_branch (code1, label);
5190 ix86_expand_branch (code2, label2);
5192 ix86_compare_op0 = lo[0];
5193 ix86_compare_op1 = lo[1];
5194 ix86_expand_branch (code3, label);
5197 emit_label (label2);
5207 ix86_expand_setcc (code, dest)
5214 if (GET_MODE (ix86_compare_op0) == DImode)
5215 return 0; /* FAIL */
5217 /* Three modes of generation:
5218 0 -- destination does not overlap compare sources:
5219 clear dest first, emit strict_low_part setcc.
5220 1 -- destination does overlap compare sources:
5221 emit subreg setcc, zero extend.
5222 2 -- destination is in QImode:
5228 if (GET_MODE (dest) == QImode)
5230 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5231 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5235 emit_move_insn (dest, const0_rtx);
5237 ret = ix86_expand_compare (code);
5238 PUT_MODE (ret, QImode);
5243 tmp = gen_lowpart (QImode, dest);
5244 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5248 if (!cse_not_expected)
5249 tmp = gen_reg_rtx (QImode);
5251 tmp = gen_lowpart (QImode, dest);
5254 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5260 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5261 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5262 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5263 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5267 return 1; /* DONE */
5271 ix86_expand_int_movcc (operands)
5274 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5275 rtx compare_seq, compare_op;
5277 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5278 In case comparsion is done with immediate, we can convert it to LTU or
5279 GEU by altering the integer. */
5281 if ((code == LEU || code == GTU)
5282 && GET_CODE (ix86_compare_op1) == CONST_INT
5283 && GET_MODE (operands[0]) != HImode
5284 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5285 && GET_CODE (operands[2]) == CONST_INT
5286 && GET_CODE (operands[3]) == CONST_INT)
5292 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5296 compare_op = ix86_expand_compare (code);
5297 compare_seq = gen_sequence ();
5300 compare_code = GET_CODE (compare_op);
5302 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5303 HImode insns, we'd be swallowed in word prefix ops. */
5305 if (GET_MODE (operands[0]) != HImode
5306 && GET_CODE (operands[2]) == CONST_INT
5307 && GET_CODE (operands[3]) == CONST_INT)
5309 rtx out = operands[0];
5310 HOST_WIDE_INT ct = INTVAL (operands[2]);
5311 HOST_WIDE_INT cf = INTVAL (operands[3]);
5314 if (compare_code == LTU || compare_code == GEU)
5317 /* Detect overlap between destination and compare sources. */
5320 /* To simplify rest of code, restrict to the GEU case. */
5321 if (compare_code == LTU)
5326 compare_code = reverse_condition (compare_code);
5327 code = reverse_condition (code);
5331 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5332 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5333 tmp = gen_reg_rtx (SImode);
5335 emit_insn (compare_seq);
5336 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5348 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5359 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5361 else if (diff == -1 && ct)
5371 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5373 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5380 * andl cf - ct, dest
5385 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5387 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5391 emit_move_insn (out, tmp);
5393 return 1; /* DONE */
5400 tmp = ct, ct = cf, cf = tmp;
5402 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5404 /* We may be reversing unordered compare to normal compare, that
5405 is not valid in general (we may convert non-trapping condition
5406 to trapping one), however on i386 we currently emit all
5407 comparisons unordered. */
5408 compare_code = reverse_condition_maybe_unordered (compare_code);
5409 code = reverse_condition_maybe_unordered (code);
5413 compare_code = reverse_condition (compare_code);
5414 code = reverse_condition (code);
5417 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5418 || diff == 3 || diff == 5 || diff == 9)
5424 * lea cf(dest*(ct-cf)),dest
5428 * This also catches the degenerate setcc-only case.
5434 out = emit_store_flag (out, code, ix86_compare_op0,
5435 ix86_compare_op1, VOIDmode, 0, 1);
5442 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5446 tmp = gen_rtx_PLUS (SImode, tmp, out);
5452 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5458 emit_move_insn (out, tmp);
5463 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5464 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5466 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5467 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5471 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5473 if (out != operands[0])
5474 emit_move_insn (operands[0], out);
5476 return 1; /* DONE */
5480 * General case: Jumpful:
5481 * xorl dest,dest cmpl op1, op2
5482 * cmpl op1, op2 movl ct, dest
5484 * decl dest movl cf, dest
5485 * andl (cf-ct),dest 1:
5490 * This is reasonably steep, but branch mispredict costs are
5491 * high on modern cpus, so consider failing only if optimizing
5494 * %%% Parameterize branch_cost on the tuning architecture, then
5495 * use that. The 80386 couldn't care less about mispredicts.
5498 if (!optimize_size && !TARGET_CMOVE)
5504 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
5506 /* We may be reversing unordered compare to normal compare,
5507 that is not valid in general (we may convert non-trapping
5508 condition to trapping one), however on i386 we currently
5509 emit all comparisons unordered. */
5510 compare_code = reverse_condition_maybe_unordered (compare_code);
5511 code = reverse_condition_maybe_unordered (code);
5515 compare_code = reverse_condition (compare_code);
5516 code = reverse_condition (code);
5520 out = emit_store_flag (out, code, ix86_compare_op0,
5521 ix86_compare_op1, VOIDmode, 0, 1);
5523 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5524 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5526 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5527 if (out != operands[0])
5528 emit_move_insn (operands[0], out);
5530 return 1; /* DONE */
5536 /* Try a few things more with specific constants and a variable. */
5539 rtx var, orig_out, out, tmp;
5542 return 0; /* FAIL */
5544 /* If one of the two operands is an interesting constant, load a
5545 constant with the above and mask it in with a logical operation. */
5547 if (GET_CODE (operands[2]) == CONST_INT)
5550 if (INTVAL (operands[2]) == 0)
5551 operands[3] = constm1_rtx, op = and_optab;
5552 else if (INTVAL (operands[2]) == -1)
5553 operands[3] = const0_rtx, op = ior_optab;
5555 return 0; /* FAIL */
5557 else if (GET_CODE (operands[3]) == CONST_INT)
5560 if (INTVAL (operands[3]) == 0)
5561 operands[2] = constm1_rtx, op = and_optab;
5562 else if (INTVAL (operands[3]) == -1)
5563 operands[2] = const0_rtx, op = ior_optab;
5565 return 0; /* FAIL */
5568 return 0; /* FAIL */
5570 orig_out = operands[0];
5571 tmp = gen_reg_rtx (GET_MODE (orig_out));
5574 /* Recurse to get the constant loaded. */
5575 if (ix86_expand_int_movcc (operands) == 0)
5576 return 0; /* FAIL */
5578 /* Mask in the interesting variable. */
5579 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5581 if (out != orig_out)
5582 emit_move_insn (orig_out, out);
5584 return 1; /* DONE */
5588 * For comparison with above,
5598 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5599 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5600 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5601 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5603 emit_insn (compare_seq);
5604 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5605 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5606 compare_op, operands[2],
5609 return 1; /* DONE */
5613 ix86_expand_fp_movcc (operands)
5617 enum machine_mode mode;
5620 /* The floating point conditional move instructions don't directly
5621 support conditions resulting from a signed integer comparison. */
5623 code = GET_CODE (operands[1]);
5636 tmp = gen_reg_rtx (QImode);
5637 ix86_expand_setcc (code, tmp);
5639 ix86_compare_op0 = tmp;
5640 ix86_compare_op1 = const0_rtx;
5647 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5648 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5649 gen_rtx_COMPARE (mode,
5651 ix86_compare_op1)));
5652 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5653 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5654 gen_rtx_fmt_ee (code, VOIDmode,
5655 gen_rtx_REG (mode, FLAGS_REG),
5663 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5664 works for floating pointer parameters and nonoffsetable memories.
5665 For pushes, it returns just stack offsets; the values will be saved
5666 in the right order. Maximally three parts are generated. */
5669 ix86_split_to_parts (operand, parts, mode)
5672 enum machine_mode mode;
5674 int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
5676 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5678 if (size < 2 || size > 3)
5681 /* Optimize constant pool reference to immediates. This is used by fp moves,
5682 that force all constants to memory to allow combining. */
5684 if (GET_CODE (operand) == MEM
5685 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5686 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5687 operand = get_pool_constant (XEXP (operand, 0));
5689 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5691 /* The only non-offsetable memories we handle are pushes. */
5692 if (! push_operand (operand, VOIDmode))
5695 PUT_MODE (operand, SImode);
5696 parts[0] = parts[1] = parts[2] = operand;
5701 split_di (&operand, 1, &parts[0], &parts[1]);
5704 if (REG_P (operand))
5706 if (!reload_completed)
5708 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5709 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5711 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5713 else if (offsettable_memref_p (operand))
5715 PUT_MODE (operand, SImode);
5717 parts[1] = adj_offsettable_operand (operand, 4);
5719 parts[2] = adj_offsettable_operand (operand, 8);
5721 else if (GET_CODE (operand) == CONST_DOUBLE)
5726 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5731 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5732 parts[2] = GEN_INT (l[2]);
5735 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5740 parts[1] = GEN_INT (l[1]);
5741 parts[0] = GEN_INT (l[0]);
5751 /* Emit insns to perform a move or push of DI, DF, and XF values.
5752 Return false when normal moves are needed; true when all required
5753 insns have been emitted. Operands 2-4 contain the input values
5754 int the correct order; operands 5-7 contain the output values. */
5757 ix86_split_long_move (operands1)
5766 /* Make our own copy to avoid clobbering the operands. */
5767 operands[0] = copy_rtx (operands1[0]);
5768 operands[1] = copy_rtx (operands1[1]);
5770 /* The only non-offsettable memory we handle is push. */
5771 if (push_operand (operands[0], VOIDmode))
5773 else if (GET_CODE (operands[0]) == MEM
5774 && ! offsettable_memref_p (operands[0]))
5777 size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5778 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5780 /* When emitting push, take care for source operands on the stack. */
5781 if (push && GET_CODE (operands[1]) == MEM
5782 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5785 part[1][1] = part[1][2];
5786 part[1][0] = part[1][1];
5789 /* We need to do copy in the right order in case an address register
5790 of the source overlaps the destination. */
5791 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5793 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5795 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5798 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5801 /* Collision in the middle part can be handled by reordering. */
5802 if (collisions == 1 && size == 3
5803 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5806 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5807 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5810 /* If there are more collisions, we can't handle it by reordering.
5811 Do an lea to the last part and use only one colliding move. */
5812 else if (collisions > 1)
5815 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5816 XEXP (part[1][0], 0)));
5817 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5818 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5820 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5828 /* We use only first 12 bytes of TFmode value, but for pushing we
5829 are required to adjust stack as if we were pushing real 16byte
5831 if (GET_MODE (operands1[0]) == TFmode)
5832 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
5834 emit_insn (gen_push (part[1][2]));
5836 emit_insn (gen_push (part[1][1]));
5837 emit_insn (gen_push (part[1][0]));
5841 /* Choose correct order to not overwrite the source before it is copied. */
5842 if ((REG_P (part[0][0])
5843 && REG_P (part[1][1])
5844 && (REGNO (part[0][0]) == REGNO (part[1][1])
5846 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5848 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5852 operands1[2] = part[0][2];
5853 operands1[3] = part[0][1];
5854 operands1[4] = part[0][0];
5855 operands1[5] = part[1][2];
5856 operands1[6] = part[1][1];
5857 operands1[7] = part[1][0];
5861 operands1[2] = part[0][1];
5862 operands1[3] = part[0][0];
5863 operands1[5] = part[1][1];
5864 operands1[6] = part[1][0];
5871 operands1[2] = part[0][0];
5872 operands1[3] = part[0][1];
5873 operands1[4] = part[0][2];
5874 operands1[5] = part[1][0];
5875 operands1[6] = part[1][1];
5876 operands1[7] = part[1][2];
5880 operands1[2] = part[0][0];
5881 operands1[3] = part[0][1];
5882 operands1[5] = part[1][0];
5883 operands1[6] = part[1][1];
5891 ix86_split_ashldi (operands, scratch)
5892 rtx *operands, scratch;
5894 rtx low[2], high[2];
5897 if (GET_CODE (operands[2]) == CONST_INT)
5899 split_di (operands, 2, low, high);
5900 count = INTVAL (operands[2]) & 63;
5904 emit_move_insn (high[0], low[1]);
5905 emit_move_insn (low[0], const0_rtx);
5908 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5912 if (!rtx_equal_p (operands[0], operands[1]))
5913 emit_move_insn (operands[0], operands[1]);
5914 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5915 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5920 if (!rtx_equal_p (operands[0], operands[1]))
5921 emit_move_insn (operands[0], operands[1]);
5923 split_di (operands, 1, low, high);
5925 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5926 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5928 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5930 if (! no_new_pseudos)
5931 scratch = force_reg (SImode, const0_rtx);
5933 emit_move_insn (scratch, const0_rtx);
5935 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5939 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5944 ix86_split_ashrdi (operands, scratch)
5945 rtx *operands, scratch;
5947 rtx low[2], high[2];
5950 if (GET_CODE (operands[2]) == CONST_INT)
5952 split_di (operands, 2, low, high);
5953 count = INTVAL (operands[2]) & 63;
5957 emit_move_insn (low[0], high[1]);
5959 if (! reload_completed)
5960 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5963 emit_move_insn (high[0], low[0]);
5964 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5968 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5972 if (!rtx_equal_p (operands[0], operands[1]))
5973 emit_move_insn (operands[0], operands[1]);
5974 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5975 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5980 if (!rtx_equal_p (operands[0], operands[1]))
5981 emit_move_insn (operands[0], operands[1]);
5983 split_di (operands, 1, low, high);
5985 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5986 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5988 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5990 if (! no_new_pseudos)
5991 scratch = gen_reg_rtx (SImode);
5992 emit_move_insn (scratch, high[0]);
5993 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5994 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5998 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
6003 ix86_split_lshrdi (operands, scratch)
6004 rtx *operands, scratch;
6006 rtx low[2], high[2];
6009 if (GET_CODE (operands[2]) == CONST_INT)
6011 split_di (operands, 2, low, high);
6012 count = INTVAL (operands[2]) & 63;
6016 emit_move_insn (low[0], high[1]);
6017 emit_move_insn (high[0], const0_rtx);
6020 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
6024 if (!rtx_equal_p (operands[0], operands[1]))
6025 emit_move_insn (operands[0], operands[1]);
6026 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
6027 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
6032 if (!rtx_equal_p (operands[0], operands[1]))
6033 emit_move_insn (operands[0], operands[1]);
6035 split_di (operands, 1, low, high);
6037 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
6038 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
6040 /* Heh. By reversing the arguments, we can reuse this pattern. */
6041 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
6043 if (! no_new_pseudos)
6044 scratch = force_reg (SImode, const0_rtx);
6046 emit_move_insn (scratch, const0_rtx);
6048 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
6052 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
6056 /* Expand the appropriate insns for doing strlen if not just doing
6059 out = result, initialized with the start address
6060 align_rtx = alignment of the address.
6061 scratch = scratch register, initialized with the startaddress when
6062 not aligned, otherwise undefined
6064 This is just the body. It needs the initialisations mentioned above and
6065 some address computing at the end. These things are done in i386.md. */
6068 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6069 rtx out, align_rtx, scratch;
6073 rtx align_2_label = NULL_RTX;
6074 rtx align_3_label = NULL_RTX;
6075 rtx align_4_label = gen_label_rtx ();
6076 rtx end_0_label = gen_label_rtx ();
6078 rtx tmpreg = gen_reg_rtx (SImode);
6081 if (GET_CODE (align_rtx) == CONST_INT)
6082 align = INTVAL (align_rtx);
6084 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6086 /* Is there a known alignment and is it less than 4? */
6089 /* Is there a known alignment and is it not 2? */
6092 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6093 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6095 /* Leave just the 3 lower bits. */
6096 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6097 NULL_RTX, 0, OPTAB_WIDEN);
6099 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6100 SImode, 1, 0, align_4_label);
6101 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6102 SImode, 1, 0, align_2_label);
6103 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6104 SImode, 1, 0, align_3_label);
6108 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6109 check if is aligned to 4 - byte. */
6111 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6112 NULL_RTX, 0, OPTAB_WIDEN);
6114 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6115 SImode, 1, 0, align_4_label);
6118 mem = gen_rtx_MEM (QImode, out);
6120 /* Now compare the bytes. */
6122 /* Compare the first n unaligned byte on a byte per byte basis. */
6123 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6124 QImode, 1, 0, end_0_label);
6126 /* Increment the address. */
6127 emit_insn (gen_addsi3 (out, out, const1_rtx));
6129 /* Not needed with an alignment of 2 */
6132 emit_label (align_2_label);
6134 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6135 QImode, 1, 0, end_0_label);
6137 emit_insn (gen_addsi3 (out, out, const1_rtx));
6139 emit_label (align_3_label);
6142 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6143 QImode, 1, 0, end_0_label);
6145 emit_insn (gen_addsi3 (out, out, const1_rtx));
6148 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6149 align this loop. It gives only huge programs, but does not help to
6151 emit_label (align_4_label);
6153 mem = gen_rtx_MEM (SImode, out);
6154 emit_move_insn (scratch, mem);
6155 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6157 /* This formula yields a nonzero result iff one of the bytes is zero.
6158 This saves three branches inside loop and many cycles. */
6160 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6161 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6162 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6163 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6164 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6165 SImode, 1, 0, align_4_label);
6169 rtx reg = gen_reg_rtx (SImode);
6170 emit_move_insn (reg, tmpreg);
6171 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6173 /* If zero is not in the first two bytes, move two bytes forward. */
6174 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6175 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6176 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6177 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6178 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6181 /* Emit lea manually to avoid clobbering of flags. */
6182 emit_insn (gen_rtx_SET (SImode, reg,
6183 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6185 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6186 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6187 emit_insn (gen_rtx_SET (VOIDmode, out,
6188 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6195 rtx end_2_label = gen_label_rtx ();
6196 /* Is zero in the first two bytes? */
6198 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6199 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6200 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6201 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6202 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6204 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6205 JUMP_LABEL (tmp) = end_2_label;
6207 /* Not in the first two. Move two bytes forward. */
6208 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6209 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6211 emit_label (end_2_label);
6215 /* Avoid branch in fixing the byte. */
6216 tmpreg = gen_lowpart (QImode, tmpreg);
6217 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6218 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6220 emit_label (end_0_label);
6223 /* Clear stack slot assignments remembered from previous functions.
6224 This is called from INIT_EXPANDERS once before RTL is emitted for each
6228 ix86_init_machine_status (p)
6231 enum machine_mode mode;
6234 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6236 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6237 mode = (enum machine_mode) ((int) mode + 1))
6238 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6239 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6242 /* Mark machine specific bits of P for GC. */
6244 ix86_mark_machine_status (p)
6247 enum machine_mode mode;
6250 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6251 mode = (enum machine_mode) ((int) mode + 1))
6252 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6253 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6256 /* Return a MEM corresponding to a stack slot with mode MODE.
6257 Allocate a new slot if necessary.
6259 The RTL for a function can have several slots available: N is
6260 which slot to use. */
6263 assign_386_stack_local (mode, n)
6264 enum machine_mode mode;
6267 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6270 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6271 ix86_stack_locals[(int) mode][n]
6272 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6274 return ix86_stack_locals[(int) mode][n];
6277 /* Calculate the length of the memory address in the instruction
6278 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6281 memory_address_length (addr)
6284 struct ix86_address parts;
6285 rtx base, index, disp;
6288 if (GET_CODE (addr) == PRE_DEC
6289 || GET_CODE (addr) == POST_INC)
6292 if (! ix86_decompose_address (addr, &parts))
6296 index = parts.index;
6300 /* Register Indirect. */
6301 if (base && !index && !disp)
6303 /* Special cases: ebp and esp need the two-byte modrm form. */
6304 if (addr == stack_pointer_rtx
6305 || addr == arg_pointer_rtx
6306 || addr == frame_pointer_rtx
6307 || addr == hard_frame_pointer_rtx)
6311 /* Direct Addressing. */
6312 else if (disp && !base && !index)
6317 /* Find the length of the displacement constant. */
6320 if (GET_CODE (disp) == CONST_INT
6321 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6327 /* An index requires the two-byte modrm form. */
6335 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6336 expect that insn have 8bit immediate alternative. */
6338 ix86_attr_length_immediate_default (insn, shortform)
6344 extract_insn_cached (insn);
6345 for (i = recog_data.n_operands - 1; i >= 0; --i)
6346 if (CONSTANT_P (recog_data.operand[i]))
6351 && GET_CODE (recog_data.operand[i]) == CONST_INT
6352 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6356 switch (get_attr_mode (insn))
6368 fatal_insn ("Unknown insn mode", insn);
6374 /* Compute default value for "length_address" attribute. */
6376 ix86_attr_length_address_default (insn)
6380 extract_insn_cached (insn);
6381 for (i = recog_data.n_operands - 1; i >= 0; --i)
6382 if (GET_CODE (recog_data.operand[i]) == MEM)
6384 return memory_address_length (XEXP (recog_data.operand[i], 0));
6390 /* Return the maximum number of instructions a cpu can issue. */
6397 case PROCESSOR_PENTIUM:
6401 case PROCESSOR_PENTIUMPRO:
6409 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6410 by DEP_INSN and nothing set by DEP_INSN. */
6413 ix86_flags_dependant (insn, dep_insn, insn_type)
6415 enum attr_type insn_type;
6419 /* Simplify the test for uninteresting insns. */
6420 if (insn_type != TYPE_SETCC
6421 && insn_type != TYPE_ICMOV
6422 && insn_type != TYPE_FCMOV
6423 && insn_type != TYPE_IBR)
6426 if ((set = single_set (dep_insn)) != 0)
6428 set = SET_DEST (set);
6431 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6432 && XVECLEN (PATTERN (dep_insn), 0) == 2
6433 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6434 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6436 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6437 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6442 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6445 /* This test is true if the dependant insn reads the flags but
6446 not any other potentially set register. */
6447 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6450 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6456 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6457 address with operands set by DEP_INSN. */
6460 ix86_agi_dependant (insn, dep_insn, insn_type)
6462 enum attr_type insn_type;
6466 if (insn_type == TYPE_LEA)
6468 addr = PATTERN (insn);
6469 if (GET_CODE (addr) == SET)
6471 else if (GET_CODE (addr) == PARALLEL
6472 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6473 addr = XVECEXP (addr, 0, 0);
6476 addr = SET_SRC (addr);
6481 extract_insn_cached (insn);
6482 for (i = recog_data.n_operands - 1; i >= 0; --i)
6483 if (GET_CODE (recog_data.operand[i]) == MEM)
6485 addr = XEXP (recog_data.operand[i], 0);
6492 return modified_in_p (addr, dep_insn);
6496 ix86_adjust_cost (insn, link, dep_insn, cost)
6497 rtx insn, link, dep_insn;
6500 enum attr_type insn_type, dep_insn_type;
6501 enum attr_memory memory;
6503 int dep_insn_code_number;
6505 /* Anti and output depenancies have zero cost on all CPUs. */
6506 if (REG_NOTE_KIND (link) != 0)
6509 dep_insn_code_number = recog_memoized (dep_insn);
6511 /* If we can't recognize the insns, we can't really do anything. */
6512 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6515 insn_type = get_attr_type (insn);
6516 dep_insn_type = get_attr_type (dep_insn);
6518 /* Prologue and epilogue allocators can have a false dependency on ebp.
6519 This results in one cycle extra stall on Pentium prologue scheduling,
6520 so handle this important case manually. */
6521 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6522 && dep_insn_type == TYPE_ALU
6523 && !reg_mentioned_p (stack_pointer_rtx, insn))
6528 case PROCESSOR_PENTIUM:
6529 /* Address Generation Interlock adds a cycle of latency. */
6530 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6533 /* ??? Compares pair with jump/setcc. */
6534 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6537 /* Floating point stores require value to be ready one cycle ealier. */
6538 if (insn_type == TYPE_FMOV
6539 && get_attr_memory (insn) == MEMORY_STORE
6540 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6544 case PROCESSOR_PENTIUMPRO:
6545 /* Since we can't represent delayed latencies of load+operation,
6546 increase the cost here for non-imov insns. */
6547 if (dep_insn_type != TYPE_IMOV
6548 && dep_insn_type != TYPE_FMOV
6549 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6550 || memory == MEMORY_BOTH))
6553 /* INT->FP conversion is expensive. */
6554 if (get_attr_fp_int_src (dep_insn))
6557 /* There is one cycle extra latency between an FP op and a store. */
6558 if (insn_type == TYPE_FMOV
6559 && (set = single_set (dep_insn)) != NULL_RTX
6560 && (set2 = single_set (insn)) != NULL_RTX
6561 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6562 && GET_CODE (SET_DEST (set2)) == MEM)
6567 /* The esp dependency is resolved before the instruction is really
6569 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6570 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6573 /* Since we can't represent delayed latencies of load+operation,
6574 increase the cost here for non-imov insns. */
6575 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6576 || memory == MEMORY_BOTH)
6577 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6579 /* INT->FP conversion is expensive. */
6580 if (get_attr_fp_int_src (dep_insn))
6584 case PROCESSOR_ATHLON:
6585 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6586 || memory == MEMORY_BOTH)
6588 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6603 struct ppro_sched_data
6606 int issued_this_cycle;
6611 ix86_safe_length (insn)
6614 if (recog_memoized (insn) >= 0)
6615 return get_attr_length(insn);
6621 ix86_safe_length_prefix (insn)
6624 if (recog_memoized (insn) >= 0)
6625 return get_attr_length(insn);
6630 static enum attr_memory
6631 ix86_safe_memory (insn)
6634 if (recog_memoized (insn) >= 0)
6635 return get_attr_memory(insn);
6637 return MEMORY_UNKNOWN;
6640 static enum attr_pent_pair
6641 ix86_safe_pent_pair (insn)
6644 if (recog_memoized (insn) >= 0)
6645 return get_attr_pent_pair(insn);
6647 return PENT_PAIR_NP;
6650 static enum attr_ppro_uops
6651 ix86_safe_ppro_uops (insn)
6654 if (recog_memoized (insn) >= 0)
6655 return get_attr_ppro_uops (insn);
6657 return PPRO_UOPS_MANY;
6661 ix86_dump_ppro_packet (dump)
6664 if (ix86_sched_data.ppro.decode[0])
6666 fprintf (dump, "PPRO packet: %d",
6667 INSN_UID (ix86_sched_data.ppro.decode[0]));
6668 if (ix86_sched_data.ppro.decode[1])
6669 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6670 if (ix86_sched_data.ppro.decode[2])
6671 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6676 /* We're beginning a new block. Initialize data structures as necessary. */
6679 ix86_sched_init (dump, sched_verbose)
6680 FILE *dump ATTRIBUTE_UNUSED;
6681 int sched_verbose ATTRIBUTE_UNUSED;
6683 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6686 /* Shift INSN to SLOT, and shift everything else down. */
6689 ix86_reorder_insn (insnp, slot)
6696 insnp[0] = insnp[1];
6697 while (++insnp != slot);
6702 /* Find an instruction with given pairability and minimal amount of cycles
6703 lost by the fact that the CPU waits for both pipelines to finish before
6704 reading next instructions. Also take care that both instructions together
6705 can not exceed 7 bytes. */
6708 ix86_pent_find_pair (e_ready, ready, type, first)
6711 enum attr_pent_pair type;
6714 int mincycles, cycles;
6715 enum attr_pent_pair tmp;
6716 enum attr_memory memory;
6717 rtx *insnp, *bestinsnp = NULL;
6719 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6722 memory = ix86_safe_memory (first);
6723 cycles = result_ready_cost (first);
6724 mincycles = INT_MAX;
6726 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6727 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6728 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6730 enum attr_memory second_memory;
6731 int secondcycles, currentcycles;
6733 second_memory = ix86_safe_memory (*insnp);
6734 secondcycles = result_ready_cost (*insnp);
6735 currentcycles = abs (cycles - secondcycles);
6737 if (secondcycles >= 1 && cycles >= 1)
6739 /* Two read/modify/write instructions together takes two
6741 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6744 /* Read modify/write instruction followed by read/modify
6745 takes one cycle longer. */
6746 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6747 && tmp != PENT_PAIR_UV
6748 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6751 if (currentcycles < mincycles)
6752 bestinsnp = insnp, mincycles = currentcycles;
6758 /* Subroutines of ix86_sched_reorder. */
6761 ix86_sched_reorder_pentium (ready, e_ready)
6765 enum attr_pent_pair pair1, pair2;
6768 /* This wouldn't be necessary if Haifa knew that static insn ordering
6769 is important to which pipe an insn is issued to. So we have to make
6770 some minor rearrangements. */
6772 pair1 = ix86_safe_pent_pair (*e_ready);
6774 /* If the first insn is non-pairable, let it be. */
6775 if (pair1 == PENT_PAIR_NP)
6778 pair2 = PENT_PAIR_NP;
6781 /* If the first insn is UV or PV pairable, search for a PU
6783 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6785 insnp = ix86_pent_find_pair (e_ready-1, ready,
6786 PENT_PAIR_PU, *e_ready);
6788 pair2 = PENT_PAIR_PU;
6791 /* If the first insn is PU or UV pairable, search for a PV
6793 if (pair2 == PENT_PAIR_NP
6794 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6796 insnp = ix86_pent_find_pair (e_ready-1, ready,
6797 PENT_PAIR_PV, *e_ready);
6799 pair2 = PENT_PAIR_PV;
6802 /* If the first insn is pairable, search for a UV
6804 if (pair2 == PENT_PAIR_NP)
6806 insnp = ix86_pent_find_pair (e_ready-1, ready,
6807 PENT_PAIR_UV, *e_ready);
6809 pair2 = PENT_PAIR_UV;
6812 if (pair2 == PENT_PAIR_NP)
6815 /* Found something! Decide if we need to swap the order. */
6816 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6817 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6818 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6819 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6820 ix86_reorder_insn (insnp, e_ready);
6822 ix86_reorder_insn (insnp, e_ready - 1);
6826 ix86_sched_reorder_ppro (ready, e_ready)
6831 enum attr_ppro_uops cur_uops;
6832 int issued_this_cycle;
6836 /* At this point .ppro.decode contains the state of the three
6837 decoders from last "cycle". That is, those insns that were
6838 actually independent. But here we're scheduling for the
6839 decoder, and we may find things that are decodable in the
6842 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
6843 issued_this_cycle = 0;
6846 cur_uops = ix86_safe_ppro_uops (*insnp);
6848 /* If the decoders are empty, and we've a complex insn at the
6849 head of the priority queue, let it issue without complaint. */
6850 if (decode[0] == NULL)
6852 if (cur_uops == PPRO_UOPS_MANY)
6858 /* Otherwise, search for a 2-4 uop unsn to issue. */
6859 while (cur_uops != PPRO_UOPS_FEW)
6863 cur_uops = ix86_safe_ppro_uops (*--insnp);
6866 /* If so, move it to the head of the line. */
6867 if (cur_uops == PPRO_UOPS_FEW)
6868 ix86_reorder_insn (insnp, e_ready);
6870 /* Issue the head of the queue. */
6871 issued_this_cycle = 1;
6872 decode[0] = *e_ready--;
6875 /* Look for simple insns to fill in the other two slots. */
6876 for (i = 1; i < 3; ++i)
6877 if (decode[i] == NULL)
6879 if (ready >= e_ready)
6883 cur_uops = ix86_safe_ppro_uops (*insnp);
6884 while (cur_uops != PPRO_UOPS_ONE)
6888 cur_uops = ix86_safe_ppro_uops (*--insnp);
6891 /* Found one. Move it to the head of the queue and issue it. */
6892 if (cur_uops == PPRO_UOPS_ONE)
6894 ix86_reorder_insn (insnp, e_ready);
6895 decode[i] = *e_ready--;
6896 issued_this_cycle++;
6900 /* ??? Didn't find one. Ideally, here we would do a lazy split
6901 of 2-uop insns, issue one and queue the other. */
6905 if (issued_this_cycle == 0)
6906 issued_this_cycle = 1;
6907 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6910 /* We are about to being issuing insns for this clock cycle.
6911 Override the default sort algorithm to better slot instructions. */
6913 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6914 FILE *dump ATTRIBUTE_UNUSED;
6915 int sched_verbose ATTRIBUTE_UNUSED;
6918 int clock_var ATTRIBUTE_UNUSED;
6920 rtx *e_ready = ready + n_ready - 1;
6930 case PROCESSOR_PENTIUM:
6931 ix86_sched_reorder_pentium (ready, e_ready);
6934 case PROCESSOR_PENTIUMPRO:
6935 ix86_sched_reorder_ppro (ready, e_ready);
6940 return ix86_issue_rate ();
6943 /* We are about to issue INSN. Return the number of insns left on the
6944 ready queue that can be issued this cycle. */
6947 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6957 return can_issue_more - 1;
6959 case PROCESSOR_PENTIUMPRO:
6961 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6963 if (uops == PPRO_UOPS_MANY)
6966 ix86_dump_ppro_packet (dump);
6967 ix86_sched_data.ppro.decode[0] = insn;
6968 ix86_sched_data.ppro.decode[1] = NULL;
6969 ix86_sched_data.ppro.decode[2] = NULL;
6971 ix86_dump_ppro_packet (dump);
6972 ix86_sched_data.ppro.decode[0] = NULL;
6974 else if (uops == PPRO_UOPS_FEW)
6977 ix86_dump_ppro_packet (dump);
6978 ix86_sched_data.ppro.decode[0] = insn;
6979 ix86_sched_data.ppro.decode[1] = NULL;
6980 ix86_sched_data.ppro.decode[2] = NULL;
6984 for (i = 0; i < 3; ++i)
6985 if (ix86_sched_data.ppro.decode[i] == NULL)
6987 ix86_sched_data.ppro.decode[i] = insn;
6995 ix86_dump_ppro_packet (dump);
6996 ix86_sched_data.ppro.decode[0] = NULL;
6997 ix86_sched_data.ppro.decode[1] = NULL;
6998 ix86_sched_data.ppro.decode[2] = NULL;
7002 return --ix86_sched_data.ppro.issued_this_cycle;
7006 /* Compute the alignment given to a constant that is being placed in memory.
7007 EXP is the constant and ALIGN is the alignment that the object would
7009 The value of this function is used instead of that alignment to align
7013 ix86_constant_alignment (exp, align)
7017 if (TREE_CODE (exp) == REAL_CST)
7019 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
7021 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
7024 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
7031 /* Compute the alignment for a static variable.
7032 TYPE is the data type, and ALIGN is the alignment that
7033 the object would ordinarily have. The value of this function is used
7034 instead of that alignment to align the object. */
7037 ix86_data_alignment (type, align)
7041 if (AGGREGATE_TYPE_P (type)
7043 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
7044 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
7045 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
7048 if (TREE_CODE (type) == ARRAY_TYPE)
7050 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7052 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7055 else if (TREE_CODE (type) == COMPLEX_TYPE)
7058 if (TYPE_MODE (type) == DCmode && align < 64)
7060 if (TYPE_MODE (type) == XCmode && align < 128)
7063 else if ((TREE_CODE (type) == RECORD_TYPE
7064 || TREE_CODE (type) == UNION_TYPE
7065 || TREE_CODE (type) == QUAL_UNION_TYPE)
7066 && TYPE_FIELDS (type))
7068 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7070 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7073 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7074 || TREE_CODE (type) == INTEGER_TYPE)
7076 if (TYPE_MODE (type) == DFmode && align < 64)
7078 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7085 /* Compute the alignment for a local variable.
7086 TYPE is the data type, and ALIGN is the alignment that
7087 the object would ordinarily have. The value of this macro is used
7088 instead of that alignment to align the object. */
7091 ix86_local_alignment (type, align)
7095 if (TREE_CODE (type) == ARRAY_TYPE)
7097 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7099 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7102 else if (TREE_CODE (type) == COMPLEX_TYPE)
7104 if (TYPE_MODE (type) == DCmode && align < 64)
7106 if (TYPE_MODE (type) == XCmode && align < 128)
7109 else if ((TREE_CODE (type) == RECORD_TYPE
7110 || TREE_CODE (type) == UNION_TYPE
7111 || TREE_CODE (type) == QUAL_UNION_TYPE)
7112 && TYPE_FIELDS (type))
7114 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7116 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7119 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7120 || TREE_CODE (type) == INTEGER_TYPE)
7123 if (TYPE_MODE (type) == DFmode && align < 64)
7125 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7131 #define def_builtin(NAME, TYPE, CODE) \
7132 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7133 struct builtin_description
7135 enum insn_code icode;
7137 enum ix86_builtins code;
7138 enum rtx_code comparison;
7142 static struct builtin_description bdesc_comi[] =
7144 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7145 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7146 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7147 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7148 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7149 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7150 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7151 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7152 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7153 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7154 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7155 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7158 static struct builtin_description bdesc_2arg[] =
7161 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7162 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7163 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7164 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7165 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7166 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7167 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7168 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7170 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7171 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7172 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7173 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7174 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7175 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7176 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7177 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7178 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7179 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7180 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7181 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7182 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7183 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7184 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7185 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7186 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7187 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7188 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7189 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7190 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7191 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7192 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7193 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7195 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7196 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7197 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7198 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7200 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7201 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7202 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7203 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7205 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7206 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7207 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7208 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7209 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7212 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7213 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7214 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7215 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7216 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7217 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7219 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7220 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7221 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7222 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7223 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7224 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7225 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7226 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7228 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7229 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7230 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7232 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7233 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7234 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7235 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7237 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7238 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7240 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7241 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7242 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7243 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7244 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7245 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7247 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7248 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7249 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7250 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7252 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7253 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7254 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7255 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7256 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7257 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7260 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7261 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7262 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7264 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7265 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7267 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7268 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7269 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7270 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7271 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7272 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7274 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7275 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7276 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7277 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7278 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7279 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7281 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7282 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7283 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7284 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7286 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7287 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7291 static struct builtin_description bdesc_1arg[] =
7293 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7294 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7296 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7297 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7298 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7300 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7301 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7302 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7303 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7307 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7308 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7311 ix86_init_builtins ()
7313 struct builtin_description * d;
7315 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
7317 tree pchar_type_node = build_pointer_type (char_type_node);
7318 tree pfloat_type_node = build_pointer_type (float_type_node);
7319 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7320 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7323 tree int_ftype_v4sf_v4sf
7324 = build_function_type (integer_type_node,
7325 tree_cons (NULL_TREE, V4SF_type_node,
7326 tree_cons (NULL_TREE,
7329 tree v4si_ftype_v4sf_v4sf
7330 = build_function_type (V4SI_type_node,
7331 tree_cons (NULL_TREE, V4SF_type_node,
7332 tree_cons (NULL_TREE,
7335 /* MMX/SSE/integer conversions. */
7336 tree int_ftype_v4sf_int
7337 = build_function_type (integer_type_node,
7338 tree_cons (NULL_TREE, V4SF_type_node,
7339 tree_cons (NULL_TREE,
7343 = build_function_type (integer_type_node,
7344 tree_cons (NULL_TREE, V4SF_type_node,
7347 = build_function_type (integer_type_node,
7348 tree_cons (NULL_TREE, V8QI_type_node,
7351 = build_function_type (integer_type_node,
7352 tree_cons (NULL_TREE, V2SI_type_node,
7355 = build_function_type (V2SI_type_node,
7356 tree_cons (NULL_TREE, integer_type_node,
7358 tree v4sf_ftype_v4sf_int
7359 = build_function_type (integer_type_node,
7360 tree_cons (NULL_TREE, V4SF_type_node,
7361 tree_cons (NULL_TREE, integer_type_node,
7363 tree v4sf_ftype_v4sf_v2si
7364 = build_function_type (V4SF_type_node,
7365 tree_cons (NULL_TREE, V4SF_type_node,
7366 tree_cons (NULL_TREE, V2SI_type_node,
7368 tree int_ftype_v4hi_int
7369 = build_function_type (integer_type_node,
7370 tree_cons (NULL_TREE, V4HI_type_node,
7371 tree_cons (NULL_TREE, integer_type_node,
7373 tree v4hi_ftype_v4hi_int_int
7374 = build_function_type (V4HI_type_node,
7375 tree_cons (NULL_TREE, V4HI_type_node,
7376 tree_cons (NULL_TREE, integer_type_node,
7377 tree_cons (NULL_TREE,
7380 /* Miscellaneous. */
7381 tree v8qi_ftype_v4hi_v4hi
7382 = build_function_type (V8QI_type_node,
7383 tree_cons (NULL_TREE, V4HI_type_node,
7384 tree_cons (NULL_TREE, V4HI_type_node,
7386 tree v4hi_ftype_v2si_v2si
7387 = build_function_type (V4HI_type_node,
7388 tree_cons (NULL_TREE, V2SI_type_node,
7389 tree_cons (NULL_TREE, V2SI_type_node,
7391 tree v4sf_ftype_v4sf_v4sf_int
7392 = build_function_type (V4SF_type_node,
7393 tree_cons (NULL_TREE, V4SF_type_node,
7394 tree_cons (NULL_TREE, V4SF_type_node,
7395 tree_cons (NULL_TREE,
7398 tree v4hi_ftype_v8qi_v8qi
7399 = build_function_type (V4HI_type_node,
7400 tree_cons (NULL_TREE, V8QI_type_node,
7401 tree_cons (NULL_TREE, V8QI_type_node,
7403 tree v2si_ftype_v4hi_v4hi
7404 = build_function_type (V2SI_type_node,
7405 tree_cons (NULL_TREE, V4HI_type_node,
7406 tree_cons (NULL_TREE, V4HI_type_node,
7408 tree v4hi_ftype_v4hi_int
7409 = build_function_type (V4HI_type_node,
7410 tree_cons (NULL_TREE, V4HI_type_node,
7411 tree_cons (NULL_TREE, integer_type_node,
7413 tree di_ftype_di_int
7414 = build_function_type (long_long_unsigned_type_node,
7415 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7416 tree_cons (NULL_TREE, integer_type_node,
7418 tree v8qi_ftype_v8qi_di
7419 = build_function_type (V8QI_type_node,
7420 tree_cons (NULL_TREE, V8QI_type_node,
7421 tree_cons (NULL_TREE,
7422 long_long_integer_type_node,
7424 tree v4hi_ftype_v4hi_di
7425 = build_function_type (V4HI_type_node,
7426 tree_cons (NULL_TREE, V4HI_type_node,
7427 tree_cons (NULL_TREE,
7428 long_long_integer_type_node,
7430 tree v2si_ftype_v2si_di
7431 = build_function_type (V2SI_type_node,
7432 tree_cons (NULL_TREE, V2SI_type_node,
7433 tree_cons (NULL_TREE,
7434 long_long_integer_type_node,
7436 tree void_ftype_void
7437 = build_function_type (void_type_node, endlink);
7438 tree void_ftype_pchar_int
7439 = build_function_type (void_type_node,
7440 tree_cons (NULL_TREE, pchar_type_node,
7441 tree_cons (NULL_TREE, integer_type_node,
7443 tree void_ftype_unsigned
7444 = build_function_type (void_type_node,
7445 tree_cons (NULL_TREE, unsigned_type_node,
7447 tree unsigned_ftype_void
7448 = build_function_type (unsigned_type_node, endlink);
7450 = build_function_type (long_long_unsigned_type_node, endlink);
7452 = build_function_type (intTI_type_node, endlink);
7453 tree v2si_ftype_v4sf
7454 = build_function_type (V2SI_type_node,
7455 tree_cons (NULL_TREE, V4SF_type_node,
7458 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7459 tree_cons (NULL_TREE, V8QI_type_node,
7460 tree_cons (NULL_TREE,
7463 tree void_ftype_v8qi_v8qi_pchar
7464 = build_function_type (void_type_node, maskmovq_args);
7465 tree v4sf_ftype_pfloat
7466 = build_function_type (V4SF_type_node,
7467 tree_cons (NULL_TREE, pfloat_type_node,
7469 tree v4sf_ftype_float
7470 = build_function_type (V4SF_type_node,
7471 tree_cons (NULL_TREE, float_type_node,
7473 tree v4sf_ftype_float_float_float_float
7474 = build_function_type (V4SF_type_node,
7475 tree_cons (NULL_TREE, float_type_node,
7476 tree_cons (NULL_TREE, float_type_node,
7477 tree_cons (NULL_TREE,
7479 tree_cons (NULL_TREE,
7482 /* @@@ the type is bogus */
7483 tree v4sf_ftype_v4sf_pv2si
7484 = build_function_type (V4SF_type_node,
7485 tree_cons (NULL_TREE, V4SF_type_node,
7486 tree_cons (NULL_TREE, pv2si_type_node,
7488 tree v4sf_ftype_pv2si_v4sf
7489 = build_function_type (V4SF_type_node,
7490 tree_cons (NULL_TREE, V4SF_type_node,
7491 tree_cons (NULL_TREE, pv2si_type_node,
7493 tree void_ftype_pfloat_v4sf
7494 = build_function_type (void_type_node,
7495 tree_cons (NULL_TREE, pfloat_type_node,
7496 tree_cons (NULL_TREE, V4SF_type_node,
7498 tree void_ftype_pdi_di
7499 = build_function_type (void_type_node,
7500 tree_cons (NULL_TREE, pdi_type_node,
7501 tree_cons (NULL_TREE,
7502 long_long_unsigned_type_node,
7504 /* Normal vector unops. */
7505 tree v4sf_ftype_v4sf
7506 = build_function_type (V4SF_type_node,
7507 tree_cons (NULL_TREE, V4SF_type_node,
7510 /* Normal vector binops. */
7511 tree v4sf_ftype_v4sf_v4sf
7512 = build_function_type (V4SF_type_node,
7513 tree_cons (NULL_TREE, V4SF_type_node,
7514 tree_cons (NULL_TREE, V4SF_type_node,
7516 tree v8qi_ftype_v8qi_v8qi
7517 = build_function_type (V8QI_type_node,
7518 tree_cons (NULL_TREE, V8QI_type_node,
7519 tree_cons (NULL_TREE, V8QI_type_node,
7521 tree v4hi_ftype_v4hi_v4hi
7522 = build_function_type (V4HI_type_node,
7523 tree_cons (NULL_TREE, V4HI_type_node,
7524 tree_cons (NULL_TREE, V4HI_type_node,
7526 tree v2si_ftype_v2si_v2si
7527 = build_function_type (V2SI_type_node,
7528 tree_cons (NULL_TREE, V2SI_type_node,
7529 tree_cons (NULL_TREE, V2SI_type_node,
7532 = build_function_type (intTI_type_node,
7533 tree_cons (NULL_TREE, intTI_type_node,
7534 tree_cons (NULL_TREE, intTI_type_node,
7537 = build_function_type (long_long_unsigned_type_node,
7538 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7539 tree_cons (NULL_TREE,
7540 long_long_unsigned_type_node,
7543 /* Add all builtins that are more or less simple operations on two
7545 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7547 /* Use one of the operands; the target can have a different mode for
7548 mask-generating compares. */
7549 enum machine_mode mode;
7554 mode = insn_data[d->icode].operand[1].mode;
7556 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7562 type = v4sf_ftype_v4sf_v4sf;
7565 type = v8qi_ftype_v8qi_v8qi;
7568 type = v4hi_ftype_v4hi_v4hi;
7571 type = v2si_ftype_v2si_v2si;
7574 type = ti_ftype_ti_ti;
7577 type = di_ftype_di_di;
7584 /* Override for comparisons. */
7585 if (d->icode == CODE_FOR_maskcmpv4sf3
7586 || d->icode == CODE_FOR_maskncmpv4sf3
7587 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7588 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7589 type = v4si_ftype_v4sf_v4sf;
7591 def_builtin (d->name, type, d->code);
7594 /* Add the remaining MMX insns with somewhat more complicated types. */
7595 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7596 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7597 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7598 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7599 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7600 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7601 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7602 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7603 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7605 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7606 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7607 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7609 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7610 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7612 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7613 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7615 /* Everything beyond this point is SSE only. */
7619 /* comi/ucomi insns. */
7620 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7621 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7623 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7624 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7625 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7627 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7628 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7629 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7630 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7631 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7632 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7634 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7635 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7637 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7639 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7640 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7641 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7642 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7643 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7644 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7646 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7647 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7648 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7649 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7651 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7652 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7653 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7654 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7656 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7657 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7659 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7661 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7662 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7663 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7664 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7665 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7666 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7668 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7670 /* Composite intrinsics. */
7671 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7672 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7673 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7674 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7675 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7676 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7677 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7680 /* Errors in the source file can cause expand_expr to return const0_rtx
7681 where we expect a vector. To avoid crashing, use one of the vector
7682 clear instructions. */
7684 safe_vector_operand (x, mode)
7686 enum machine_mode mode;
7688 if (x != const0_rtx)
7690 x = gen_reg_rtx (mode);
7692 if (VALID_MMX_REG_MODE (mode))
7693 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7694 : gen_rtx_SUBREG (DImode, x, 0)));
7696 emit_insn (gen_sse_clrti (mode == TImode ? x
7697 : gen_rtx_SUBREG (TImode, x, 0)));
7701 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7704 ix86_expand_binop_builtin (icode, arglist, target)
7705 enum insn_code icode;
7710 tree arg0 = TREE_VALUE (arglist);
7711 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7712 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7713 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7714 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7715 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7716 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7718 if (VECTOR_MODE_P (mode0))
7719 op0 = safe_vector_operand (op0, mode0);
7720 if (VECTOR_MODE_P (mode1))
7721 op1 = safe_vector_operand (op1, mode1);
7724 || GET_MODE (target) != tmode
7725 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7726 target = gen_reg_rtx (tmode);
7728 /* In case the insn wants input operands in modes different from
7729 the result, abort. */
7730 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
7733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7734 op0 = copy_to_mode_reg (mode0, op0);
7735 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7736 op1 = copy_to_mode_reg (mode1, op1);
7738 pat = GEN_FCN (icode) (target, op0, op1);
7745 /* Subroutine of ix86_expand_builtin to take care of stores. */
7748 ix86_expand_store_builtin (icode, arglist, shuffle)
7749 enum insn_code icode;
7754 tree arg0 = TREE_VALUE (arglist);
7755 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7756 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7757 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7758 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
7759 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
7761 if (VECTOR_MODE_P (mode1))
7762 op1 = safe_vector_operand (op1, mode1);
7764 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7765 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
7766 op1 = copy_to_mode_reg (mode1, op1);
7768 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
7769 pat = GEN_FCN (icode) (op0, op1);
7775 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
7778 ix86_expand_unop_builtin (icode, arglist, target, do_load)
7779 enum insn_code icode;
7785 tree arg0 = TREE_VALUE (arglist);
7786 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7787 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7788 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7791 || GET_MODE (target) != tmode
7792 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7793 target = gen_reg_rtx (tmode);
7795 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7798 if (VECTOR_MODE_P (mode0))
7799 op0 = safe_vector_operand (op0, mode0);
7801 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7802 op0 = copy_to_mode_reg (mode0, op0);
7805 pat = GEN_FCN (icode) (target, op0);
7812 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
7813 sqrtss, rsqrtss, rcpss. */
7816 ix86_expand_unop1_builtin (icode, arglist, target)
7817 enum insn_code icode;
7822 tree arg0 = TREE_VALUE (arglist);
7823 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7824 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7825 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7828 || GET_MODE (target) != tmode
7829 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7830 target = gen_reg_rtx (tmode);
7832 if (VECTOR_MODE_P (mode0))
7833 op0 = safe_vector_operand (op0, mode0);
7835 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7836 op0 = copy_to_mode_reg (mode0, op0);
7838 pat = GEN_FCN (icode) (target, op0, op0);
7845 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
7848 ix86_expand_sse_compare (d, arglist, target)
7849 struct builtin_description *d;
7854 tree arg0 = TREE_VALUE (arglist);
7855 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7856 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7857 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7859 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
7860 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
7861 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
7862 enum rtx_code comparison = d->comparison;
7864 if (VECTOR_MODE_P (mode0))
7865 op0 = safe_vector_operand (op0, mode0);
7866 if (VECTOR_MODE_P (mode1))
7867 op1 = safe_vector_operand (op1, mode1);
7869 /* Swap operands if we have a comparison that isn't available in
7873 target = gen_reg_rtx (tmode);
7874 emit_move_insn (target, op1);
7877 comparison = swap_condition (comparison);
7880 || GET_MODE (target) != tmode
7881 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
7882 target = gen_reg_rtx (tmode);
7884 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
7885 op0 = copy_to_mode_reg (mode0, op0);
7886 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
7887 op1 = copy_to_mode_reg (mode1, op1);
7889 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7890 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
7897 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
7900 ix86_expand_sse_comi (d, arglist, target)
7901 struct builtin_description *d;
7906 tree arg0 = TREE_VALUE (arglist);
7907 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7908 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7909 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7911 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
7912 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
7913 enum rtx_code comparison = d->comparison;
7915 if (VECTOR_MODE_P (mode0))
7916 op0 = safe_vector_operand (op0, mode0);
7917 if (VECTOR_MODE_P (mode1))
7918 op1 = safe_vector_operand (op1, mode1);
7920 /* Swap operands if we have a comparison that isn't available in
7927 comparison = swap_condition (comparison);
7930 target = gen_reg_rtx (SImode);
7931 emit_move_insn (target, const0_rtx);
7932 target = gen_rtx_SUBREG (QImode, target, 0);
7934 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
7935 op0 = copy_to_mode_reg (mode0, op0);
7936 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
7937 op1 = copy_to_mode_reg (mode1, op1);
7939 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7940 pat = GEN_FCN (d->icode) (op0, op1, op2);
7944 emit_insn (gen_setcc_2 (target, op2));
7949 /* Expand an expression EXP that calls a built-in function,
7950 with result going to TARGET if that's convenient
7951 (and in mode MODE if that's convenient).
7952 SUBTARGET may be used as the target for computing one of EXP's operands.
7953 IGNORE is nonzero if the value is to be ignored. */
7956 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
7959 rtx subtarget ATTRIBUTE_UNUSED;
7960 enum machine_mode mode ATTRIBUTE_UNUSED;
7961 int ignore ATTRIBUTE_UNUSED;
7963 struct builtin_description *d;
7965 enum insn_code icode;
7966 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7967 tree arglist = TREE_OPERAND (exp, 1);
7968 tree arg0, arg1, arg2, arg3;
7969 rtx op0, op1, op2, pat;
7970 enum machine_mode tmode, mode0, mode1, mode2;
7971 int fcode = DECL_FUNCTION_CODE (fndecl);
7975 case IX86_BUILTIN_EMMS:
7976 emit_insn (gen_emms ());
7979 case IX86_BUILTIN_SFENCE:
7980 emit_insn (gen_sfence ());
7983 case IX86_BUILTIN_M_FROM_INT:
7984 target = gen_reg_rtx (DImode);
7985 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7986 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
7989 case IX86_BUILTIN_M_TO_INT:
7990 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7991 op0 = copy_to_mode_reg (DImode, op0);
7992 target = gen_reg_rtx (SImode);
7993 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
7996 case IX86_BUILTIN_PEXTRW:
7997 icode = CODE_FOR_mmx_pextrw;
7998 arg0 = TREE_VALUE (arglist);
7999 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8002 tmode = insn_data[icode].operand[0].mode;
8003 mode0 = insn_data[icode].operand[1].mode;
8004 mode1 = insn_data[icode].operand[2].mode;
8006 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8007 op0 = copy_to_mode_reg (mode0, op0);
8008 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8010 /* @@@ better error message */
8011 error ("selector must be an immediate");
8015 || GET_MODE (target) != tmode
8016 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8017 target = gen_reg_rtx (tmode);
8018 pat = GEN_FCN (icode) (target, op0, op1);
8024 case IX86_BUILTIN_PINSRW:
8025 icode = CODE_FOR_mmx_pinsrw;
8026 arg0 = TREE_VALUE (arglist);
8027 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8028 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8029 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8030 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8031 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8032 tmode = insn_data[icode].operand[0].mode;
8033 mode0 = insn_data[icode].operand[1].mode;
8034 mode1 = insn_data[icode].operand[2].mode;
8035 mode2 = insn_data[icode].operand[3].mode;
8037 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8038 op0 = copy_to_mode_reg (mode0, op0);
8039 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8040 op1 = copy_to_mode_reg (mode1, op1);
8041 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8043 /* @@@ better error message */
8044 error ("selector must be an immediate");
8048 || GET_MODE (target) != tmode
8049 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8050 target = gen_reg_rtx (tmode);
8051 pat = GEN_FCN (icode) (target, op0, op1, op2);
8057 case IX86_BUILTIN_MASKMOVQ:
8058 icode = CODE_FOR_mmx_maskmovq;
8059 /* Note the arg order is different from the operand order. */
8060 arg1 = TREE_VALUE (arglist);
8061 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
8062 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8063 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8064 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8065 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8066 mode0 = insn_data[icode].operand[0].mode;
8067 mode1 = insn_data[icode].operand[1].mode;
8068 mode2 = insn_data[icode].operand[2].mode;
8070 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8071 op0 = copy_to_mode_reg (mode0, op0);
8072 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8073 op1 = copy_to_mode_reg (mode1, op1);
8074 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8075 op2 = copy_to_mode_reg (mode2, op2);
8076 pat = GEN_FCN (icode) (op0, op1, op2);
8082 case IX86_BUILTIN_SQRTSS:
8083 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8084 case IX86_BUILTIN_RSQRTSS:
8085 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8086 case IX86_BUILTIN_RCPSS:
8087 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8089 case IX86_BUILTIN_LOADAPS:
8090 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8092 case IX86_BUILTIN_LOADUPS:
8093 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8095 case IX86_BUILTIN_STOREAPS:
8096 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8097 case IX86_BUILTIN_STOREUPS:
8098 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8100 case IX86_BUILTIN_LOADSS:
8101 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8103 case IX86_BUILTIN_STORESS:
8104 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8106 case IX86_BUILTIN_LOADHPS:
8107 case IX86_BUILTIN_LOADLPS:
8108 icode = (fcode == IX86_BUILTIN_LOADHPS
8109 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8110 arg0 = TREE_VALUE (arglist);
8111 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8112 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8113 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8114 tmode = insn_data[icode].operand[0].mode;
8115 mode0 = insn_data[icode].operand[1].mode;
8116 mode1 = insn_data[icode].operand[2].mode;
8118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8119 op0 = copy_to_mode_reg (mode0, op0);
8120 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8122 || GET_MODE (target) != tmode
8123 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8124 target = gen_reg_rtx (tmode);
8125 pat = GEN_FCN (icode) (target, op0, op1);
8131 case IX86_BUILTIN_STOREHPS:
8132 case IX86_BUILTIN_STORELPS:
8133 icode = (fcode == IX86_BUILTIN_STOREHPS
8134 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8135 arg0 = TREE_VALUE (arglist);
8136 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8137 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8138 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8139 mode0 = insn_data[icode].operand[1].mode;
8140 mode1 = insn_data[icode].operand[2].mode;
8142 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8143 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8144 op1 = copy_to_mode_reg (mode1, op1);
8146 pat = GEN_FCN (icode) (op0, op0, op1);
8152 case IX86_BUILTIN_MOVNTPS:
8153 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8154 case IX86_BUILTIN_MOVNTQ:
8155 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8157 case IX86_BUILTIN_LDMXCSR:
8158 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8159 target = assign_386_stack_local (SImode, 0);
8160 emit_move_insn (target, op0);
8161 emit_insn (gen_ldmxcsr (target));
8164 case IX86_BUILTIN_STMXCSR:
8165 target = assign_386_stack_local (SImode, 0);
8166 emit_insn (gen_stmxcsr (target));
8167 return copy_to_mode_reg (SImode, target);
8169 case IX86_BUILTIN_PREFETCH:
8170 icode = CODE_FOR_prefetch;
8171 arg0 = TREE_VALUE (arglist);
8172 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8173 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8174 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8175 mode0 = insn_data[icode].operand[0].mode;
8176 mode1 = insn_data[icode].operand[1].mode;
8178 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8180 /* @@@ better error message */
8181 error ("selector must be an immediate");
8185 op0 = copy_to_mode_reg (Pmode, op0);
8186 pat = GEN_FCN (icode) (op0, op1);
8192 case IX86_BUILTIN_SHUFPS:
8193 icode = CODE_FOR_sse_shufps;
8194 arg0 = TREE_VALUE (arglist);
8195 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8196 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8197 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8198 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8199 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8200 tmode = insn_data[icode].operand[0].mode;
8201 mode0 = insn_data[icode].operand[1].mode;
8202 mode1 = insn_data[icode].operand[2].mode;
8203 mode2 = insn_data[icode].operand[3].mode;
8205 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8206 op0 = copy_to_mode_reg (mode0, op0);
8207 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8208 op1 = copy_to_mode_reg (mode1, op1);
8209 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8211 /* @@@ better error message */
8212 error ("mask must be an immediate");
8216 || GET_MODE (target) != tmode
8217 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8218 target = gen_reg_rtx (tmode);
8219 pat = GEN_FCN (icode) (target, op0, op1, op2);
8225 case IX86_BUILTIN_PSHUFW:
8226 icode = CODE_FOR_mmx_pshufw;
8227 arg0 = TREE_VALUE (arglist);
8228 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8229 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8230 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8231 tmode = insn_data[icode].operand[0].mode;
8232 mode0 = insn_data[icode].operand[2].mode;
8233 mode1 = insn_data[icode].operand[3].mode;
8235 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8236 op0 = copy_to_mode_reg (mode0, op0);
8237 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8239 /* @@@ better error message */
8240 error ("mask must be an immediate");
8244 || GET_MODE (target) != tmode
8245 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8246 target = gen_reg_rtx (tmode);
8247 pat = GEN_FCN (icode) (target, target, op0, op1);
8253 /* Composite intrinsics. */
8254 case IX86_BUILTIN_SETPS1:
8255 target = assign_386_stack_local (SFmode, 0);
8256 arg0 = TREE_VALUE (arglist);
8257 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8258 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8259 op0 = gen_reg_rtx (V4SFmode);
8260 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8261 XEXP (target, 0))));
8262 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8265 case IX86_BUILTIN_SETPS:
8266 target = assign_386_stack_local (V4SFmode, 0);
8267 op0 = change_address (target, SFmode, XEXP (target, 0));
8268 arg0 = TREE_VALUE (arglist);
8269 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8270 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8271 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8272 emit_move_insn (op0,
8273 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8274 emit_move_insn (adj_offsettable_operand (op0, 4),
8275 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8276 emit_move_insn (adj_offsettable_operand (op0, 8),
8277 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8278 emit_move_insn (adj_offsettable_operand (op0, 12),
8279 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8280 op0 = gen_reg_rtx (V4SFmode);
8281 emit_insn (gen_sse_movaps (op0, target));
8284 case IX86_BUILTIN_CLRPS:
8285 target = gen_reg_rtx (TImode);
8286 emit_insn (gen_sse_clrti (target));
8289 case IX86_BUILTIN_LOADRPS:
8290 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8291 gen_reg_rtx (V4SFmode), 1);
8292 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8295 case IX86_BUILTIN_LOADPS1:
8296 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8297 gen_reg_rtx (V4SFmode), 1);
8298 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8301 case IX86_BUILTIN_STOREPS1:
8302 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8303 case IX86_BUILTIN_STORERPS:
8304 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8306 case IX86_BUILTIN_MMX_ZERO:
8307 target = gen_reg_rtx (DImode);
8308 emit_insn (gen_mmx_clrdi (target));
8315 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8316 if (d->code == fcode)
8318 /* Compares are treated specially. */
8319 if (d->icode == CODE_FOR_maskcmpv4sf3
8320 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8321 || d->icode == CODE_FOR_maskncmpv4sf3
8322 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8323 return ix86_expand_sse_compare (d, arglist, target);
8325 return ix86_expand_binop_builtin (d->icode, arglist, target);
8328 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8329 if (d->code == fcode)
8330 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8332 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8333 if (d->code == fcode)
8334 return ix86_expand_sse_comi (d, arglist, target);
8336 /* @@@ Should really do something sensible here. */
8340 /* Store OPERAND to the memory after reload is completed. This means
8341 that we can't easilly use assign_stack_local. */
8343 ix86_force_to_memory (mode, operand)
8344 enum machine_mode mode;
8347 if (!reload_completed)
8354 split_di (&operand, 1, operands, operands+1);
8356 gen_rtx_SET (VOIDmode,
8357 gen_rtx_MEM (SImode,
8358 gen_rtx_PRE_DEC (Pmode,
8359 stack_pointer_rtx)),
8362 gen_rtx_SET (VOIDmode,
8363 gen_rtx_MEM (SImode,
8364 gen_rtx_PRE_DEC (Pmode,
8365 stack_pointer_rtx)),
8370 /* It is better to store HImodes as SImodes. */
8371 if (!TARGET_PARTIAL_REG_STALL)
8372 operand = gen_lowpart (SImode, operand);
8376 gen_rtx_SET (VOIDmode,
8377 gen_rtx_MEM (GET_MODE (operand),
8378 gen_rtx_PRE_DEC (SImode,
8379 stack_pointer_rtx)),
8385 return gen_rtx_MEM (mode, stack_pointer_rtx);
8388 /* Free operand from the memory. */
8390 ix86_free_from_memory (mode)
8391 enum machine_mode mode;
8393 /* Use LEA to deallocate stack space. In peephole2 it will be converted
8394 to pop or add instruction if registers are available. */
8395 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8396 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8397 GEN_INT (mode == DImode
8399 : mode == HImode && TARGET_PARTIAL_REG_STALL