1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifndef CHECK_STACK_LIMIT
46 #define CHECK_STACK_LIMIT -1
49 /* Processor costs (relative to an add) */
50 struct processor_costs i386_cost = { /* 386 specific costs */
51 1, /* cost of an add instruction */
52 1, /* cost of a lea instruction */
53 3, /* variable shift costs */
54 2, /* constant shift costs */
55 6, /* cost of starting a multiply */
56 1, /* cost of multiply per each bit set */
57 23, /* cost of a divide/mod */
58 15, /* "large" insn */
60 4, /* cost for loading QImode using movzbl */
61 {2, 4, 2}, /* cost of loading integer registers
62 in QImode, HImode and SImode.
63 Relative to reg-reg move (2). */
64 {2, 4, 2}, /* cost of storing integer registers */
65 2, /* cost of reg,reg fld/fst */
66 {8, 8, 8}, /* cost of loading fp registers
67 in SFmode, DFmode and XFmode */
68 {8, 8, 8} /* cost of loading integer registers */
71 struct processor_costs i486_cost = { /* 486 specific costs */
72 1, /* cost of an add instruction */
73 1, /* cost of a lea instruction */
74 3, /* variable shift costs */
75 2, /* constant shift costs */
76 12, /* cost of starting a multiply */
77 1, /* cost of multiply per each bit set */
78 40, /* cost of a divide/mod */
79 15, /* "large" insn */
81 4, /* cost for loading QImode using movzbl */
82 {2, 4, 2}, /* cost of loading integer registers
83 in QImode, HImode and SImode.
84 Relative to reg-reg move (2). */
85 {2, 4, 2}, /* cost of storing integer registers */
86 2, /* cost of reg,reg fld/fst */
87 {8, 8, 8}, /* cost of loading fp registers
88 in SFmode, DFmode and XFmode */
89 {8, 8, 8} /* cost of loading integer registers */
92 struct processor_costs pentium_cost = {
93 1, /* cost of an add instruction */
94 1, /* cost of a lea instruction */
95 4, /* variable shift costs */
96 1, /* constant shift costs */
97 11, /* cost of starting a multiply */
98 0, /* cost of multiply per each bit set */
99 25, /* cost of a divide/mod */
100 8, /* "large" insn */
102 6, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {2, 2, 6}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {4, 4, 6} /* cost of loading integer registers */
113 struct processor_costs pentiumpro_cost = {
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 1, /* variable shift costs */
117 1, /* constant shift costs */
118 4, /* cost of starting a multiply */
119 0, /* cost of multiply per each bit set */
120 17, /* cost of a divide/mod */
121 8, /* "large" insn */
123 2, /* cost for loading QImode using movzbl */
124 {4, 4, 4}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 2, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {2, 2, 6}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {4, 4, 6} /* cost of loading integer registers */
134 struct processor_costs k6_cost = {
135 1, /* cost of an add instruction */
136 2, /* cost of a lea instruction */
137 1, /* variable shift costs */
138 1, /* constant shift costs */
139 3, /* cost of starting a multiply */
140 0, /* cost of multiply per each bit set */
141 18, /* cost of a divide/mod */
142 8, /* "large" insn */
144 3, /* cost for loading QImode using movzbl */
145 {4, 5, 4}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 3, 2}, /* cost of storing integer registers */
149 4, /* cost of reg,reg fld/fst */
150 {6, 6, 6}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {4, 4, 4} /* cost of loading integer registers */
155 struct processor_costs athlon_cost = {
156 1, /* cost of an add instruction */
157 2, /* cost of a lea instruction */
158 1, /* variable shift costs */
159 1, /* constant shift costs */
160 5, /* cost of starting a multiply */
161 0, /* cost of multiply per each bit set */
162 42, /* cost of a divide/mod */
163 8, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {4, 5, 4}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 3, 2}, /* cost of storing integer registers */
170 4, /* cost of reg,reg fld/fst */
171 {6, 6, 20}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {4, 4, 16} /* cost of loading integer registers */
176 struct processor_costs *ix86_cost = &pentium_cost;
178 /* Processor feature/optimization bitmasks. */
179 #define m_386 (1<<PROCESSOR_I386)
180 #define m_486 (1<<PROCESSOR_I486)
181 #define m_PENT (1<<PROCESSOR_PENTIUM)
182 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
183 #define m_K6 (1<<PROCESSOR_K6)
184 #define m_ATHLON (1<<PROCESSOR_ATHLON)
186 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
187 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
188 const int x86_zero_extend_with_and = m_486 | m_PENT;
189 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
190 const int x86_double_with_add = ~m_386;
191 const int x86_use_bit_test = m_386;
192 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
193 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
194 const int x86_use_any_reg = m_486;
195 const int x86_cmove = m_PPRO | m_ATHLON;
196 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
197 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
198 const int x86_partial_reg_stall = m_PPRO;
199 const int x86_use_loop = m_K6;
200 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
201 const int x86_use_mov0 = m_K6;
202 const int x86_use_cltd = ~(m_PENT | m_K6);
203 const int x86_read_modify_write = ~m_PENT;
204 const int x86_read_modify = ~(m_PENT | m_PPRO);
205 const int x86_split_long_moves = m_PPRO;
206 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
207 const int x86_single_stringop = m_386;
208 const int x86_qimode_math = ~(0);
209 const int x86_promote_qi_regs = 0;
210 const int x86_himode_math = ~(m_PPRO);
211 const int x86_promote_hi_regs = m_PPRO;
212 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
213 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
214 const int x86_add_esp_4 = m_ATHLON | m_K6;
215 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
216 const int x86_integer_DFmode_moves = ~m_ATHLON;
217 const int x86_partial_reg_dependency = m_ATHLON;
218 const int x86_memory_mismatch_stall = m_ATHLON;
220 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
222 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
223 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
224 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
226 /* Array of the smallest class containing reg number REGNO, indexed by
227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
229 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
232 AREG, DREG, CREG, BREG,
234 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
236 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
237 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
240 /* flags, fpsr, dirflag, frame */
241 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
242 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
244 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
248 /* The "default" register map. */
250 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
252 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
253 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
254 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
255 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
256 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
259 /* Define the register numbers to be used in Dwarf debugging information.
260 The SVR4 reference port C compiler uses the following register numbers
261 in its Dwarf output code:
262 0 for %eax (gcc regno = 0)
263 1 for %ecx (gcc regno = 2)
264 2 for %edx (gcc regno = 1)
265 3 for %ebx (gcc regno = 3)
266 4 for %esp (gcc regno = 7)
267 5 for %ebp (gcc regno = 6)
268 6 for %esi (gcc regno = 4)
269 7 for %edi (gcc regno = 5)
270 The following three DWARF register numbers are never generated by
271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
272 believes these numbers have these meanings.
273 8 for %eip (no gcc equivalent)
274 9 for %eflags (gcc regno = 17)
275 10 for %trapno (no gcc equivalent)
276 It is not at all clear how we should number the FP stack registers
277 for the x86 architecture. If the version of SDB on x86/svr4 were
278 a bit less brain dead with respect to floating-point then we would
279 have a precedent to follow with respect to DWARF register numbers
280 for x86 FP registers, but the SDB on x86/svr4 is so completely
281 broken with respect to FP registers that it is hardly worth thinking
282 of it as something to strive for compatibility with.
283 The version of x86/svr4 SDB I have at the moment does (partially)
284 seem to believe that DWARF register number 11 is associated with
285 the x86 register %st(0), but that's about all. Higher DWARF
286 register numbers don't seem to be associated with anything in
287 particular, and even for DWARF regno 11, SDB only seems to under-
288 stand that it should say that a variable lives in %st(0) (when
289 asked via an `=' command) if we said it was in DWARF regno 11,
290 but SDB still prints garbage when asked for the value of the
291 variable in question (via a `/' command).
292 (Also note that the labels SDB prints for various FP stack regs
293 when doing an `x' command are all wrong.)
294 Note that these problems generally don't affect the native SVR4
295 C compiler because it doesn't allow the use of -O with -g and
296 because when it is *not* optimizing, it allocates a memory
297 location for each floating-point variable, and the memory
298 location is what gets described in the DWARF AT_location
299 attribute for the variable in question.
300 Regardless of the severe mental illness of the x86/svr4 SDB, we
301 do something sensible here and we use the following DWARF
302 register numbers. Note that these are all stack-top-relative
304 11 for %st(0) (gcc regno = 8)
305 12 for %st(1) (gcc regno = 9)
306 13 for %st(2) (gcc regno = 10)
307 14 for %st(3) (gcc regno = 11)
308 15 for %st(4) (gcc regno = 12)
309 16 for %st(5) (gcc regno = 13)
310 17 for %st(6) (gcc regno = 14)
311 18 for %st(7) (gcc regno = 15)
313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
317 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
322 /* Test and compare insns in i386.md store the information needed to
323 generate branch and scc insns here. */
325 struct rtx_def *ix86_compare_op0 = NULL_RTX;
326 struct rtx_def *ix86_compare_op1 = NULL_RTX;
328 #define MAX_386_STACK_LOCALS 2
330 /* Define the structure for the machine field in struct function. */
331 struct machine_function
333 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
336 #define ix86_stack_locals (cfun->machine->stack_locals)
338 /* which cpu are we scheduling for */
339 enum processor_type ix86_cpu;
341 /* which instruction set architecture to use. */
344 /* Strings to hold which cpu and instruction set architecture to use. */
345 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
346 const char *ix86_arch_string; /* for -march=<xxx> */
348 /* Register allocation order */
349 const char *ix86_reg_alloc_order;
350 static char regs_allocated[FIRST_PSEUDO_REGISTER];
352 /* # of registers to use to pass arguments. */
353 const char *ix86_regparm_string;
355 /* ix86_regparm_string as a number */
358 /* Alignment to use for loops and jumps: */
360 /* Power of two alignment for loops. */
361 const char *ix86_align_loops_string;
363 /* Power of two alignment for non-loop jumps. */
364 const char *ix86_align_jumps_string;
366 /* Power of two alignment for stack boundary in bytes. */
367 const char *ix86_preferred_stack_boundary_string;
369 /* Preferred alignment for stack boundary in bits. */
370 int ix86_preferred_stack_boundary;
372 /* Values 1-5: see jump.c */
373 int ix86_branch_cost;
374 const char *ix86_branch_cost_string;
376 /* Power of two alignment for functions. */
377 int ix86_align_funcs;
378 const char *ix86_align_funcs_string;
380 /* Power of two alignment for loops. */
381 int ix86_align_loops;
383 /* Power of two alignment for non-loop jumps. */
384 int ix86_align_jumps;
386 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
387 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
389 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
390 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
391 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
393 static rtx gen_push PARAMS ((rtx));
394 static int memory_address_length PARAMS ((rtx addr));
395 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
396 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
397 static int ix86_safe_length PARAMS ((rtx));
398 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
399 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
400 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
401 static void ix86_dump_ppro_packet PARAMS ((FILE *));
402 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
403 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
405 static void ix86_init_machine_status PARAMS ((struct function *));
406 static void ix86_mark_machine_status PARAMS ((struct function *));
407 static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
408 static int ix86_safe_length_prefix PARAMS ((rtx));
409 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
410 int *, int *, int *));
411 static int ix86_nsaved_regs PARAMS((void));
412 static void ix86_emit_save_regs PARAMS((void));
413 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
414 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
415 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
416 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
417 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
421 rtx base, index, disp;
425 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
427 struct builtin_description;
428 static rtx ix86_expand_sse_comi PARAMS ((struct builtin_description *, tree,
430 static rtx ix86_expand_sse_compare PARAMS ((struct builtin_description *, tree,
432 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
433 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
434 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
435 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
436 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
438 /* Sometimes certain combinations of command options do not make
439 sense on a particular target machine. You can define a macro
440 `OVERRIDE_OPTIONS' to take account of this. This macro, if
441 defined, is executed once just after all the command options have
444 Don't use this macro to turn on various extra optimizations for
445 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
450 /* Comes from final.c -- no real reason to change it. */
451 #define MAX_CODE_ALIGN 16
455 struct processor_costs *cost; /* Processor costs */
456 int target_enable; /* Target flags to enable. */
457 int target_disable; /* Target flags to disable. */
458 int align_loop; /* Default alignments. */
463 const processor_target_table[PROCESSOR_max] =
465 {&i386_cost, 0, 0, 2, 2, 2, 1},
466 {&i486_cost, 0, 0, 4, 4, 4, 1},
467 {&pentium_cost, 0, 0, -4, -4, -4, 1},
468 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
469 {&k6_cost, 0, 0, -5, -5, 4, 1},
470 {&athlon_cost, 0, 0, 4, -4, 4, 1}
475 const char *name; /* processor name or nickname. */
476 enum processor_type processor;
478 const processor_alias_table[] =
480 {"i386", PROCESSOR_I386},
481 {"i486", PROCESSOR_I486},
482 {"i586", PROCESSOR_PENTIUM},
483 {"pentium", PROCESSOR_PENTIUM},
484 {"i686", PROCESSOR_PENTIUMPRO},
485 {"pentiumpro", PROCESSOR_PENTIUMPRO},
486 {"k6", PROCESSOR_K6},
487 {"athlon", PROCESSOR_ATHLON},
490 int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
492 #ifdef SUBTARGET_OVERRIDE_OPTIONS
493 SUBTARGET_OVERRIDE_OPTIONS;
496 ix86_arch = PROCESSOR_I386;
497 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
499 if (ix86_arch_string != 0)
502 for (i = 0; i < pta_size; i++)
503 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
505 ix86_arch = processor_alias_table[i].processor;
506 /* Default cpu tuning to the architecture. */
507 ix86_cpu = ix86_arch;
511 error ("bad value (%s) for -march= switch", ix86_arch_string);
514 if (ix86_cpu_string != 0)
517 for (i = 0; i < pta_size; i++)
518 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
520 ix86_cpu = processor_alias_table[i].processor;
524 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
527 ix86_cost = processor_target_table[ix86_cpu].cost;
528 target_flags |= processor_target_table[ix86_cpu].target_enable;
529 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
531 /* Arrange to set up i386_stack_locals for all functions. */
532 init_machine_status = ix86_init_machine_status;
533 mark_machine_status = ix86_mark_machine_status;
535 /* Validate registers in register allocation order. */
536 if (ix86_reg_alloc_order)
539 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
545 case 'a': regno = 0; break;
546 case 'd': regno = 1; break;
547 case 'c': regno = 2; break;
548 case 'b': regno = 3; break;
549 case 'S': regno = 4; break;
550 case 'D': regno = 5; break;
551 case 'B': regno = 6; break;
553 default: fatal ("Register '%c' is unknown", ch);
556 if (regs_allocated[regno])
557 fatal ("Register '%c' already specified in allocation order", ch);
559 regs_allocated[regno] = 1;
563 /* Validate -mregparm= value. */
564 if (ix86_regparm_string)
566 ix86_regparm = atoi (ix86_regparm_string);
567 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
568 fatal ("-mregparm=%d is not between 0 and %d",
569 ix86_regparm, REGPARM_MAX);
572 /* Validate -malign-loops= value, or provide default. */
573 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
574 if (ix86_align_loops_string)
576 ix86_align_loops = atoi (ix86_align_loops_string);
577 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
578 fatal ("-malign-loops=%d is not between 0 and %d",
579 ix86_align_loops, MAX_CODE_ALIGN);
582 /* Validate -malign-jumps= value, or provide default. */
583 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
584 if (ix86_align_jumps_string)
586 ix86_align_jumps = atoi (ix86_align_jumps_string);
587 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
588 fatal ("-malign-jumps=%d is not between 0 and %d",
589 ix86_align_jumps, MAX_CODE_ALIGN);
592 /* Validate -malign-functions= value, or provide default. */
593 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
594 if (ix86_align_funcs_string)
596 ix86_align_funcs = atoi (ix86_align_funcs_string);
597 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
598 fatal ("-malign-functions=%d is not between 0 and %d",
599 ix86_align_funcs, MAX_CODE_ALIGN);
602 /* Validate -mpreferred-stack-boundary= value, or provide default.
603 The default of 128 bits is for Pentium III's SSE __m128. */
604 ix86_preferred_stack_boundary = 128;
605 if (ix86_preferred_stack_boundary_string)
607 int i = atoi (ix86_preferred_stack_boundary_string);
609 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
610 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
613 /* Validate -mbranch-cost= value, or provide default. */
614 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
615 if (ix86_branch_cost_string)
617 ix86_branch_cost = atoi (ix86_branch_cost_string);
618 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
619 fatal ("-mbranch-cost=%d is not between 0 and 5",
623 /* Keep nonleaf frame pointers. */
624 if (TARGET_OMIT_LEAF_FRAME_POINTER)
625 flag_omit_frame_pointer = 1;
627 /* If we're doing fast math, we don't care about comparison order
628 wrt NaNs. This lets us use a shorter comparison sequence. */
630 target_flags &= ~MASK_IEEE_FP;
632 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
635 target_flags |= MASK_MMX;
638 /* A C statement (sans semicolon) to choose the order in which to
639 allocate hard registers for pseudo-registers local to a basic
642 Store the desired register order in the array `reg_alloc_order'.
643 Element 0 should be the register to allocate first; element 1, the
644 next register; and so on.
646 The macro body should not assume anything about the contents of
647 `reg_alloc_order' before execution of the macro.
649 On most machines, it is not necessary to define this macro. */
652 order_regs_for_local_alloc ()
656 /* User specified the register allocation order. */
658 if (ix86_reg_alloc_order)
660 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
666 case 'a': regno = 0; break;
667 case 'd': regno = 1; break;
668 case 'c': regno = 2; break;
669 case 'b': regno = 3; break;
670 case 'S': regno = 4; break;
671 case 'D': regno = 5; break;
672 case 'B': regno = 6; break;
675 reg_alloc_order[order++] = regno;
678 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
680 if (! regs_allocated[i])
681 reg_alloc_order[order++] = i;
685 /* If user did not specify a register allocation order, use natural order. */
688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
689 reg_alloc_order[i] = i;
694 optimization_options (level, size)
696 int size ATTRIBUTE_UNUSED;
698 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
699 make the problem with not enough registers even worse. */
700 #ifdef INSN_SCHEDULING
702 flag_schedule_insns = 0;
706 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
707 attribute for DECL. The attributes in ATTRIBUTES have previously been
711 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
712 tree decl ATTRIBUTE_UNUSED;
713 tree attributes ATTRIBUTE_UNUSED;
714 tree identifier ATTRIBUTE_UNUSED;
715 tree args ATTRIBUTE_UNUSED;
720 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
721 attribute for TYPE. The attributes in ATTRIBUTES have previously been
725 ix86_valid_type_attribute_p (type, attributes, identifier, args)
727 tree attributes ATTRIBUTE_UNUSED;
731 if (TREE_CODE (type) != FUNCTION_TYPE
732 && TREE_CODE (type) != METHOD_TYPE
733 && TREE_CODE (type) != FIELD_DECL
734 && TREE_CODE (type) != TYPE_DECL)
737 /* Stdcall attribute says callee is responsible for popping arguments
738 if they are not variable. */
739 if (is_attribute_p ("stdcall", identifier))
740 return (args == NULL_TREE);
742 /* Cdecl attribute says the callee is a normal C declaration. */
743 if (is_attribute_p ("cdecl", identifier))
744 return (args == NULL_TREE);
746 /* Regparm attribute specifies how many integer arguments are to be
747 passed in registers. */
748 if (is_attribute_p ("regparm", identifier))
752 if (! args || TREE_CODE (args) != TREE_LIST
753 || TREE_CHAIN (args) != NULL_TREE
754 || TREE_VALUE (args) == NULL_TREE)
757 cst = TREE_VALUE (args);
758 if (TREE_CODE (cst) != INTEGER_CST)
761 if (compare_tree_int (cst, REGPARM_MAX) > 0)
770 /* Return 0 if the attributes for two types are incompatible, 1 if they
771 are compatible, and 2 if they are nearly compatible (which causes a
772 warning to be generated). */
775 ix86_comp_type_attributes (type1, type2)
779 /* Check for mismatch of non-default calling convention. */
780 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
782 if (TREE_CODE (type1) != FUNCTION_TYPE)
785 /* Check for mismatched return types (cdecl vs stdcall). */
786 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
787 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
792 /* Value is the number of bytes of arguments automatically
793 popped when returning from a subroutine call.
794 FUNDECL is the declaration node of the function (as a tree),
795 FUNTYPE is the data type of the function (as a tree),
796 or for a library call it is an identifier node for the subroutine name.
797 SIZE is the number of bytes of arguments passed on the stack.
799 On the 80386, the RTD insn may be used to pop them if the number
800 of args is fixed, but if the number is variable then the caller
801 must pop them all. RTD can't be used for library calls now
802 because the library is compiled with the Unix compiler.
803 Use of RTD is a selectable option, since it is incompatible with
804 standard Unix calling sequences. If the option is not selected,
805 the caller must always pop the args.
807 The attribute stdcall is equivalent to RTD on a per module basis. */
810 ix86_return_pops_args (fundecl, funtype, size)
815 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
817 /* Cdecl functions override -mrtd, and never pop the stack. */
818 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
820 /* Stdcall functions will pop the stack if not variable args. */
821 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
825 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
826 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
831 /* Lose any fake structure return argument. */
832 if (aggregate_value_p (TREE_TYPE (funtype)))
833 return GET_MODE_SIZE (Pmode);
838 /* Argument support functions. */
840 /* Initialize a variable CUM of type CUMULATIVE_ARGS
841 for a call to a function whose data type is FNTYPE.
842 For a library call, FNTYPE is 0. */
845 init_cumulative_args (cum, fntype, libname)
846 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
847 tree fntype; /* tree ptr for function decl */
848 rtx libname; /* SYMBOL_REF of library name or 0 */
850 static CUMULATIVE_ARGS zero_cum;
851 tree param, next_param;
853 if (TARGET_DEBUG_ARG)
855 fprintf (stderr, "\ninit_cumulative_args (");
857 fprintf (stderr, "fntype code = %s, ret code = %s",
858 tree_code_name[(int) TREE_CODE (fntype)],
859 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
861 fprintf (stderr, "no fntype");
864 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
869 /* Set up the number of registers to use for passing arguments. */
870 cum->nregs = ix86_regparm;
873 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
876 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
879 /* Determine if this function has variable arguments. This is
880 indicated by the last argument being 'void_type_mode' if there
881 are no variable arguments. If there are variable arguments, then
882 we won't pass anything in registers */
886 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
887 param != 0; param = next_param)
889 next_param = TREE_CHAIN (param);
890 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
895 if (TARGET_DEBUG_ARG)
896 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
901 /* Update the data in CUM to advance over an argument
902 of mode MODE and data type TYPE.
903 (TYPE is null for libcalls where that information may not be available.) */
906 function_arg_advance (cum, mode, type, named)
907 CUMULATIVE_ARGS *cum; /* current arg information */
908 enum machine_mode mode; /* current arg mode */
909 tree type; /* type of the argument or 0 if lib support */
910 int named; /* whether or not the argument was named */
913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
914 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
916 if (TARGET_DEBUG_ARG)
918 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
919 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
934 /* Define where to put the arguments to a function.
935 Value is zero to push the argument on the stack,
936 or a hard register in which to store the argument.
938 MODE is the argument's machine mode.
939 TYPE is the data type of the argument (as a tree).
940 This is null for libcalls where that information may
942 CUM is a variable of type CUMULATIVE_ARGS which gives info about
943 the preceding args and about the function being called.
944 NAMED is nonzero if this argument is a named parameter
945 (otherwise it is an extra parameter matching an ellipsis). */
948 function_arg (cum, mode, type, named)
949 CUMULATIVE_ARGS *cum; /* current arg information */
950 enum machine_mode mode; /* current arg mode */
951 tree type; /* type of the argument or 0 if lib support */
952 int named; /* != 0 for normal args, == 0 for ... args */
956 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
957 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
961 /* For now, pass fp/complex values on the stack. */
970 if (words <= cum->nregs)
971 ret = gen_rtx_REG (mode, cum->regno);
975 if (TARGET_DEBUG_ARG)
978 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
979 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
982 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
984 fprintf (stderr, ", stack");
986 fprintf (stderr, " )\n");
993 /* Return nonzero if OP is (const_int 1), else return zero. */
996 const_int_1_operand (op, mode)
998 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1004 reference and a constant. */
1007 symbolic_operand (op, mode)
1009 enum machine_mode mode ATTRIBUTE_UNUSED;
1011 switch (GET_CODE (op))
1019 if (GET_CODE (op) == SYMBOL_REF
1020 || GET_CODE (op) == LABEL_REF
1021 || (GET_CODE (op) == UNSPEC
1022 && XINT (op, 1) >= 6
1023 && XINT (op, 1) <= 7))
1025 if (GET_CODE (op) != PLUS
1026 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1030 if (GET_CODE (op) == SYMBOL_REF
1031 || GET_CODE (op) == LABEL_REF)
1033 /* Only @GOTOFF gets offsets. */
1034 if (GET_CODE (op) != UNSPEC
1035 || XINT (op, 1) != 7)
1038 op = XVECEXP (op, 0, 0);
1039 if (GET_CODE (op) == SYMBOL_REF
1040 || GET_CODE (op) == LABEL_REF)
1049 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1052 pic_symbolic_operand (op, mode)
1054 enum machine_mode mode ATTRIBUTE_UNUSED;
1056 if (GET_CODE (op) == CONST)
1059 if (GET_CODE (op) == UNSPEC)
1061 if (GET_CODE (op) != PLUS
1062 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1065 if (GET_CODE (op) == UNSPEC)
1071 /* Test for a valid operand for a call instruction. Don't allow the
1072 arg pointer register or virtual regs since they may decay into
1073 reg + const, which the patterns can't handle. */
1076 call_insn_operand (op, mode)
1078 enum machine_mode mode ATTRIBUTE_UNUSED;
1080 /* Disallow indirect through a virtual register. This leads to
1081 compiler aborts when trying to eliminate them. */
1082 if (GET_CODE (op) == REG
1083 && (op == arg_pointer_rtx
1084 || op == frame_pointer_rtx
1085 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1086 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1089 /* Disallow `call 1234'. Due to varying assembler lameness this
1090 gets either rejected or translated to `call .+1234'. */
1091 if (GET_CODE (op) == CONST_INT)
1094 /* Explicitly allow SYMBOL_REF even if pic. */
1095 if (GET_CODE (op) == SYMBOL_REF)
1098 /* Half-pic doesn't allow anything but registers and constants.
1099 We've just taken care of the later. */
1101 return register_operand (op, Pmode);
1103 /* Otherwise we can allow any general_operand in the address. */
1104 return general_operand (op, Pmode);
1108 constant_call_address_operand (op, mode)
1110 enum machine_mode mode ATTRIBUTE_UNUSED;
1112 return GET_CODE (op) == SYMBOL_REF;
1115 /* Match exactly zero and one. */
1118 const0_operand (op, mode)
1120 enum machine_mode mode;
1122 return op == CONST0_RTX (mode);
1126 const1_operand (op, mode)
1128 enum machine_mode mode ATTRIBUTE_UNUSED;
1130 return op == const1_rtx;
1133 /* Match 2, 4, or 8. Used for leal multiplicands. */
1136 const248_operand (op, mode)
1138 enum machine_mode mode ATTRIBUTE_UNUSED;
1140 return (GET_CODE (op) == CONST_INT
1141 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1144 /* True if this is a constant appropriate for an increment or decremenmt. */
1147 incdec_operand (op, mode)
1149 enum machine_mode mode;
1151 if (op == const1_rtx || op == constm1_rtx)
1153 if (GET_CODE (op) != CONST_INT)
1155 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1157 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1159 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1164 /* Return false if this is the stack pointer, or any other fake
1165 register eliminable to the stack pointer. Otherwise, this is
1168 This is used to prevent esp from being used as an index reg.
1169 Which would only happen in pathological cases. */
1172 reg_no_sp_operand (op, mode)
1174 enum machine_mode mode;
1177 if (GET_CODE (t) == SUBREG)
1179 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1182 return register_operand (op, mode);
1186 mmx_reg_operand (op, mode)
1188 enum machine_mode mode ATTRIBUTE_UNUSED;
1190 return MMX_REG_P (op);
1193 /* Return false if this is any eliminable register. Otherwise
1197 general_no_elim_operand (op, mode)
1199 enum machine_mode mode;
1202 if (GET_CODE (t) == SUBREG)
1204 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1205 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1206 || t == virtual_stack_dynamic_rtx)
1209 return general_operand (op, mode);
1212 /* Return false if this is any eliminable register. Otherwise
1213 register_operand or const_int. */
1216 nonmemory_no_elim_operand (op, mode)
1218 enum machine_mode mode;
1221 if (GET_CODE (t) == SUBREG)
1223 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1224 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1225 || t == virtual_stack_dynamic_rtx)
1228 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1231 /* Return true if op is a Q_REGS class register. */
1234 q_regs_operand (op, mode)
1236 enum machine_mode mode;
1238 if (mode != VOIDmode && GET_MODE (op) != mode)
1240 if (GET_CODE (op) == SUBREG)
1241 op = SUBREG_REG (op);
1242 return QI_REG_P (op);
1245 /* Return true if op is a NON_Q_REGS class register. */
1248 non_q_regs_operand (op, mode)
1250 enum machine_mode mode;
1252 if (mode != VOIDmode && GET_MODE (op) != mode)
1254 if (GET_CODE (op) == SUBREG)
1255 op = SUBREG_REG (op);
1256 return NON_QI_REG_P (op);
1259 /* Return 1 if OP is a comparison operator that can use the condition code
1260 generated by a logical operation, which characteristicly does not set
1261 overflow or carry. To be used with CCNOmode. */
1264 no_comparison_operator (op, mode)
1266 enum machine_mode mode;
1268 if (mode != VOIDmode && GET_MODE (op) != mode)
1271 switch (GET_CODE (op))
1275 case LEU: case LTU: case GEU: case GTU:
1283 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
1286 sse_comparison_operator (op, mode)
1288 enum machine_mode mode ATTRIBUTE_UNUSED;
1290 enum rtx_code code = GET_CODE (op);
1291 return code == EQ || code == LT || code == LE || code == UNORDERED;
1293 /* Return 1 if OP is a valid comparison operator in valid mode. */
1295 ix86_comparison_operator (op, mode)
1297 enum machine_mode mode;
1299 enum machine_mode inmode;
1300 if (mode != VOIDmode && GET_MODE (op) != mode)
1302 switch (GET_CODE (op))
1307 inmode = GET_MODE (XEXP (op, 0));
1308 if (inmode == CCmode || inmode == CCGCmode
1309 || inmode == CCGOCmode || inmode == CCNOmode)
1312 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1313 inmode = GET_MODE (XEXP (op, 0));
1314 if (inmode == CCmode)
1318 inmode = GET_MODE (XEXP (op, 0));
1319 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
1327 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1330 fcmov_comparison_operator (op, mode)
1332 enum machine_mode mode;
1334 enum machine_mode inmode = GET_MODE (XEXP (op, 0));
1335 if (mode != VOIDmode && GET_MODE (op) != mode)
1337 switch (GET_CODE (op))
1341 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
1342 if (inmode == CCmode)
1350 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1353 promotable_binary_operator (op, mode)
1355 enum machine_mode mode ATTRIBUTE_UNUSED;
1357 switch (GET_CODE (op))
1360 /* Modern CPUs have same latency for HImode and SImode multiply,
1361 but 386 and 486 do HImode multiply faster. */
1362 return ix86_cpu > PROCESSOR_I486;
1374 /* Nearly general operand, but accept any const_double, since we wish
1375 to be able to drop them into memory rather than have them get pulled
1379 cmp_fp_expander_operand (op, mode)
1381 enum machine_mode mode;
1383 if (mode != VOIDmode && mode != GET_MODE (op))
1385 if (GET_CODE (op) == CONST_DOUBLE)
1387 return general_operand (op, mode);
1390 /* Match an SI or HImode register for a zero_extract. */
1393 ext_register_operand (op, mode)
1395 enum machine_mode mode ATTRIBUTE_UNUSED;
1397 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1399 return register_operand (op, VOIDmode);
1402 /* Return 1 if this is a valid binary floating-point operation.
1403 OP is the expression matched, and MODE is its mode. */
1406 binary_fp_operator (op, mode)
1408 enum machine_mode mode;
1410 if (mode != VOIDmode && mode != GET_MODE (op))
1413 switch (GET_CODE (op))
1419 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1427 mult_operator(op, mode)
1429 enum machine_mode mode ATTRIBUTE_UNUSED;
1431 return GET_CODE (op) == MULT;
1435 div_operator(op, mode)
1437 enum machine_mode mode ATTRIBUTE_UNUSED;
1439 return GET_CODE (op) == DIV;
1443 arith_or_logical_operator (op, mode)
1445 enum machine_mode mode;
1447 return ((mode == VOIDmode || GET_MODE (op) == mode)
1448 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1449 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1452 /* Returns 1 if OP is memory operand with a displacement. */
1455 memory_displacement_operand (op, mode)
1457 enum machine_mode mode;
1459 struct ix86_address parts;
1461 if (! memory_operand (op, mode))
1464 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1467 return parts.disp != NULL_RTX;
1470 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1471 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1473 ??? It seems likely that this will only work because cmpsi is an
1474 expander, and no actual insns use this. */
1477 cmpsi_operand (op, mode)
1479 enum machine_mode mode;
1481 if (general_operand (op, mode))
1484 if (GET_CODE (op) == AND
1485 && GET_MODE (op) == SImode
1486 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1487 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1488 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1489 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1490 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1491 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1497 /* Returns 1 if OP is memory operand that can not be represented by the
1501 long_memory_operand (op, mode)
1503 enum machine_mode mode;
1505 if (! memory_operand (op, mode))
1508 return memory_address_length (op) != 0;
1511 /* Return nonzero if the rtx is known aligned. */
1514 aligned_operand (op, mode)
1516 enum machine_mode mode;
1518 struct ix86_address parts;
1520 if (!general_operand (op, mode))
1523 /* Registers and immediate operands are always "aligned". */
1524 if (GET_CODE (op) != MEM)
1527 /* Don't even try to do any aligned optimizations with volatiles. */
1528 if (MEM_VOLATILE_P (op))
1533 /* Pushes and pops are only valid on the stack pointer. */
1534 if (GET_CODE (op) == PRE_DEC
1535 || GET_CODE (op) == POST_INC)
1538 /* Decode the address. */
1539 if (! ix86_decompose_address (op, &parts))
1542 /* Look for some component that isn't known to be aligned. */
1546 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1551 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1556 if (GET_CODE (parts.disp) != CONST_INT
1557 || (INTVAL (parts.disp) & 3) != 0)
1561 /* Didn't find one -- this must be an aligned address. */
1565 /* Return true if the constant is something that can be loaded with
1566 a special instruction. Only handle 0.0 and 1.0; others are less
1570 standard_80387_constant_p (x)
1573 if (GET_CODE (x) != CONST_DOUBLE)
1576 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1582 if (setjmp (handler))
1585 set_float_handler (handler);
1586 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1587 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1588 is1 = REAL_VALUES_EQUAL (d, dconst1);
1589 set_float_handler (NULL_PTR);
1597 /* Note that on the 80387, other constants, such as pi,
1598 are much slower to load as standard constants
1599 than to load from doubles in memory! */
1600 /* ??? Not true on K6: all constants are equal cost. */
1607 /* Returns 1 if OP contains a symbol reference */
1610 symbolic_reference_mentioned_p (op)
1613 register const char *fmt;
1616 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1619 fmt = GET_RTX_FORMAT (GET_CODE (op));
1620 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1626 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1627 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1631 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1638 /* Return 1 if it is appropriate to emit `ret' instructions in the
1639 body of a function. Do this only if the epilogue is simple, needing a
1640 couple of insns. Prior to reloading, we can't tell how many registers
1641 must be saved, so return 0 then. Return 0 if there is no frame
1642 marker to de-allocate.
1644 If NON_SAVING_SETJMP is defined and true, then it is not possible
1645 for the epilogue to be simple, so return 0. This is a special case
1646 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1647 until final, but jump_optimize may need to know sooner if a
1651 ix86_can_use_return_insn_p ()
1653 HOST_WIDE_INT tsize;
1656 #ifdef NON_SAVING_SETJMP
1657 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1660 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1661 if (profile_block_flag == 2)
1665 if (! reload_completed || frame_pointer_needed)
1668 /* Don't allow more than 32 pop, since that's all we can do
1669 with one instruction. */
1670 if (current_function_pops_args
1671 && current_function_args_size >= 32768)
1674 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1675 return tsize == 0 && nregs == 0;
1678 static char *pic_label_name;
1679 static int pic_label_output;
1680 static char *global_offset_table_name;
1682 /* This function generates code for -fpic that loads %ebx with
1683 the return address of the caller and then returns. */
1686 asm_output_function_prefix (file, name)
1688 const char *name ATTRIBUTE_UNUSED;
1691 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1692 || current_function_uses_const_pool);
1693 xops[0] = pic_offset_table_rtx;
1694 xops[1] = stack_pointer_rtx;
1696 /* Deep branch prediction favors having a return for every call. */
1697 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1699 if (!pic_label_output)
1701 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1702 internal (non-global) label that's being emitted, it didn't make
1703 sense to have .type information for local labels. This caused
1704 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1705 me debug info for a label that you're declaring non-global?) this
1706 was changed to call ASM_OUTPUT_LABEL() instead. */
1708 ASM_OUTPUT_LABEL (file, pic_label_name);
1710 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1711 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1712 output_asm_insn ("ret", xops);
1714 pic_label_output = 1;
1720 load_pic_register ()
1724 if (global_offset_table_name == NULL)
1726 global_offset_table_name =
1727 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1728 ggc_add_string_root (&global_offset_table_name, 1);
1730 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1732 if (TARGET_DEEP_BRANCH_PREDICTION)
1734 if (pic_label_name == NULL)
1736 pic_label_name = ggc_alloc_string (NULL, 32);
1737 ggc_add_string_root (&pic_label_name, 1);
1738 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1740 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1744 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1747 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1749 if (! TARGET_DEEP_BRANCH_PREDICTION)
1750 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1752 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1755 /* Generate an SImode "push" pattern for input ARG. */
1761 return gen_rtx_SET (VOIDmode,
1762 gen_rtx_MEM (SImode,
1763 gen_rtx_PRE_DEC (SImode,
1764 stack_pointer_rtx)),
1768 /* Return number of registers to be saved on the stack. */
1774 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1775 || current_function_uses_const_pool);
1776 int limit = (frame_pointer_needed
1777 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1780 for (regno = limit - 1; regno >= 0; regno--)
1781 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1782 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1789 /* Return the offset between two registers, one to be eliminated, and the other
1790 its replacement, at the start of a routine. */
1793 ix86_initial_elimination_offset (from, to)
1800 /* Stack grows downward:
1806 saved frame pointer if frame_pointer_needed
1807 <- HARD_FRAME_POINTER
1817 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1818 /* Skip saved PC and previous frame pointer.
1819 Executed only when frame_pointer_needed. */
1821 else if (from == FRAME_POINTER_REGNUM
1822 && to == HARD_FRAME_POINTER_REGNUM)
1824 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *) 0);
1825 padding1 += nregs * UNITS_PER_WORD;
1830 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1831 int frame_size = frame_pointer_needed ? 8 : 4;
1832 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1833 &nregs, &padding1, (int *) 0);
1835 if (to != STACK_POINTER_REGNUM)
1837 else if (from == ARG_POINTER_REGNUM)
1838 return tsize + nregs * UNITS_PER_WORD + frame_size;
1839 else if (from != FRAME_POINTER_REGNUM)
1842 return tsize - padding1;
1846 /* Compute the size of local storage taking into consideration the
1847 desired stack alignment which is to be maintained. Also determine
1848 the number of registers saved below the local storage.
1850 PADDING1 returns padding before stack frame and PADDING2 returns
1851 padding after stack frame;
1854 static HOST_WIDE_INT
1855 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1857 int *nregs_on_stack;
1864 HOST_WIDE_INT total_size;
1865 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1867 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1869 nregs = ix86_nsaved_regs ();
1872 offset = frame_pointer_needed ? 8 : 4;
1874 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1875 since i386 port is the only using those features that may break easilly. */
1877 if (size && !stack_alignment_needed)
1879 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1881 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1883 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1885 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1888 if (stack_alignment_needed < 4)
1889 stack_alignment_needed = 4;
1891 offset += nregs * UNITS_PER_WORD;
1893 if (ACCUMULATE_OUTGOING_ARGS)
1894 total_size += current_function_outgoing_args_size;
1896 total_size += offset;
1898 /* Align start of frame for local function. */
1899 padding1 = ((offset + stack_alignment_needed - 1)
1900 & -stack_alignment_needed) - offset;
1901 total_size += padding1;
1903 /* Align stack boundary. */
1904 padding2 = ((total_size + preferred_alignment - 1)
1905 & -preferred_alignment) - total_size;
1907 if (ACCUMULATE_OUTGOING_ARGS)
1908 padding2 += current_function_outgoing_args_size;
1911 *nregs_on_stack = nregs;
1913 *rpadding1 = padding1;
1915 *rpadding2 = padding2;
1917 return size + padding1 + padding2;
1920 /* Emit code to save registers in the prologue. */
1923 ix86_emit_save_regs ()
1928 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1929 || current_function_uses_const_pool);
1930 limit = (frame_pointer_needed
1931 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1933 for (regno = limit - 1; regno >= 0; regno--)
1934 if ((regs_ever_live[regno] && !call_used_regs[regno])
1935 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1937 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1938 RTX_FRAME_RELATED_P (insn) = 1;
1942 /* Expand the prologue into a bunch of separate insns. */
1945 ix86_expand_prologue ()
1947 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *) 0, (int *) 0,
1950 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1951 || current_function_uses_const_pool);
1953 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1954 slower on all targets. Also sdb doesn't like it. */
1956 if (frame_pointer_needed)
1958 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1959 RTX_FRAME_RELATED_P (insn) = 1;
1961 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1962 RTX_FRAME_RELATED_P (insn) = 1;
1965 ix86_emit_save_regs ();
1969 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1971 if (frame_pointer_needed)
1972 insn = emit_insn (gen_pro_epilogue_adjust_stack
1973 (stack_pointer_rtx, stack_pointer_rtx,
1974 GEN_INT (-tsize), hard_frame_pointer_rtx));
1976 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1978 RTX_FRAME_RELATED_P (insn) = 1;
1982 /* ??? Is this only valid for Win32? */
1986 arg0 = gen_rtx_REG (SImode, 0);
1987 emit_move_insn (arg0, GEN_INT (tsize));
1989 sym = gen_rtx_MEM (FUNCTION_MODE,
1990 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1991 insn = emit_call_insn (gen_call (sym, const0_rtx));
1993 CALL_INSN_FUNCTION_USAGE (insn)
1994 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1995 CALL_INSN_FUNCTION_USAGE (insn));
1998 #ifdef SUBTARGET_PROLOGUE
2003 load_pic_register ();
2005 /* If we are profiling, make sure no instructions are scheduled before
2006 the call to mcount. However, if -fpic, the above call will have
2008 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
2009 emit_insn (gen_blockage ());
2012 /* Emit code to add TSIZE to esp value. Use POP instruction when
2016 ix86_emit_epilogue_esp_adjustment (tsize)
2019 /* If a frame pointer is present, we must be sure to tie the sp
2020 to the fp so that we don't mis-schedule. */
2021 if (frame_pointer_needed)
2022 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2025 hard_frame_pointer_rtx));
2027 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2031 /* Emit code to restore saved registers using MOV insns. First register
2032 is restored from POINTER + OFFSET. */
2034 ix86_emit_restore_regs_using_mov (pointer, offset)
2039 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2040 || current_function_uses_const_pool);
2041 int limit = (frame_pointer_needed
2042 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2044 for (regno = 0; regno < limit; regno++)
2045 if ((regs_ever_live[regno] && !call_used_regs[regno])
2046 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2048 emit_move_insn (gen_rtx_REG (SImode, regno),
2049 adj_offsettable_operand (gen_rtx_MEM (SImode,
2056 /* Restore function stack, frame, and registers. */
2059 ix86_expand_epilogue (emit_return)
2065 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2066 || current_function_uses_const_pool);
2067 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2068 HOST_WIDE_INT offset;
2069 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2070 (int *) 0, (int *) 0);
2072 /* Calculate start of saved registers relative to ebp. */
2073 offset = -nregs * UNITS_PER_WORD;
2075 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2076 if (profile_block_flag == 2)
2078 FUNCTION_BLOCK_PROFILER_EXIT;
2082 /* If we're only restoring one register and sp is not valid then
2083 using a move instruction to restore the register since it's
2084 less work than reloading sp and popping the register.
2086 The default code result in stack adjustment using add/lea instruction,
2087 while this code results in LEAVE instruction (or discrete equivalent),
2088 so it is profitable in some other cases as well. Especially when there
2089 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2090 and there is exactly one register to pop. This heruistic may need some
2091 tuning in future. */
2092 if ((!sp_valid && nregs <= 1)
2093 || (frame_pointer_needed && !nregs && tsize)
2094 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2097 /* Restore registers. We can use ebp or esp to address the memory
2098 locations. If both are available, default to ebp, since offsets
2099 are known to be small. Only exception is esp pointing directly to the
2100 end of block of saved registers, where we may simplify addressing
2103 if (!frame_pointer_needed || (sp_valid && !tsize))
2104 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2106 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2108 if (!frame_pointer_needed)
2109 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2110 /* If not an i386, mov & pop is faster than "leave". */
2111 else if (TARGET_USE_LEAVE || optimize_size)
2112 emit_insn (gen_leave ());
2115 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2116 hard_frame_pointer_rtx,
2118 hard_frame_pointer_rtx));
2119 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2124 /* First step is to deallocate the stack frame so that we can
2125 pop the registers. */
2128 if (!frame_pointer_needed)
2130 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2131 hard_frame_pointer_rtx,
2133 hard_frame_pointer_rtx));
2136 ix86_emit_epilogue_esp_adjustment (tsize);
2138 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2139 if ((regs_ever_live[regno] && !call_used_regs[regno])
2140 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2141 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2144 /* Sibcall epilogues don't want a return instruction. */
2148 if (current_function_pops_args && current_function_args_size)
2150 rtx popc = GEN_INT (current_function_pops_args);
2152 /* i386 can only pop 64K bytes. If asked to pop more, pop
2153 return address, do explicit add, and jump indirectly to the
2156 if (current_function_pops_args >= 65536)
2158 rtx ecx = gen_rtx_REG (SImode, 2);
2160 emit_insn (gen_popsi1 (ecx));
2161 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2162 emit_jump_insn (gen_return_indirect_internal (ecx));
2165 emit_jump_insn (gen_return_pop_internal (popc));
2168 emit_jump_insn (gen_return_internal ());
2171 /* Extract the parts of an RTL expression that is a valid memory address
2172 for an instruction. Return false if the structure of the address is
2176 ix86_decompose_address (addr, out)
2178 struct ix86_address *out;
2180 rtx base = NULL_RTX;
2181 rtx index = NULL_RTX;
2182 rtx disp = NULL_RTX;
2183 HOST_WIDE_INT scale = 1;
2184 rtx scale_rtx = NULL_RTX;
2186 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2188 else if (GET_CODE (addr) == PLUS)
2190 rtx op0 = XEXP (addr, 0);
2191 rtx op1 = XEXP (addr, 1);
2192 enum rtx_code code0 = GET_CODE (op0);
2193 enum rtx_code code1 = GET_CODE (op1);
2195 if (code0 == REG || code0 == SUBREG)
2197 if (code1 == REG || code1 == SUBREG)
2198 index = op0, base = op1; /* index + base */
2200 base = op0, disp = op1; /* base + displacement */
2202 else if (code0 == MULT)
2204 index = XEXP (op0, 0);
2205 scale_rtx = XEXP (op0, 1);
2206 if (code1 == REG || code1 == SUBREG)
2207 base = op1; /* index*scale + base */
2209 disp = op1; /* index*scale + disp */
2211 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2213 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2214 scale_rtx = XEXP (XEXP (op0, 0), 1);
2215 base = XEXP (op0, 1);
2218 else if (code0 == PLUS)
2220 index = XEXP (op0, 0); /* index + base + disp */
2221 base = XEXP (op0, 1);
2227 else if (GET_CODE (addr) == MULT)
2229 index = XEXP (addr, 0); /* index*scale */
2230 scale_rtx = XEXP (addr, 1);
2232 else if (GET_CODE (addr) == ASHIFT)
2236 /* We're called for lea too, which implements ashift on occasion. */
2237 index = XEXP (addr, 0);
2238 tmp = XEXP (addr, 1);
2239 if (GET_CODE (tmp) != CONST_INT)
2241 scale = INTVAL (tmp);
2242 if ((unsigned HOST_WIDE_INT) scale > 3)
2247 disp = addr; /* displacement */
2249 /* Extract the integral value of scale. */
2252 if (GET_CODE (scale_rtx) != CONST_INT)
2254 scale = INTVAL (scale_rtx);
2257 /* Allow arg pointer and stack pointer as index if there is not scaling */
2258 if (base && index && scale == 1
2259 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2260 || index == stack_pointer_rtx))
2267 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2268 if ((base == hard_frame_pointer_rtx
2269 || base == frame_pointer_rtx
2270 || base == arg_pointer_rtx) && !disp)
2273 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2274 Avoid this by transforming to [%esi+0]. */
2275 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2276 && base && !index && !disp
2278 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2281 /* Special case: encode reg+reg instead of reg*2. */
2282 if (!base && index && scale && scale == 2)
2283 base = index, scale = 1;
2285 /* Special case: scaling cannot be encoded without base or displacement. */
2286 if (!base && !disp && index && scale != 1)
2297 /* Return cost of the memory address x.
2298 For i386, it is better to use a complex address than let gcc copy
2299 the address into a reg and make a new pseudo. But not if the address
2300 requires to two regs - that would mean more pseudos with longer
2303 ix86_address_cost (x)
2306 struct ix86_address parts;
2309 if (!ix86_decompose_address (x, &parts))
2312 /* More complex memory references are better. */
2313 if (parts.disp && parts.disp != const0_rtx)
2316 /* Attempt to minimize number of registers in the address. */
2318 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2320 && (!REG_P (parts.index)
2321 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2327 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2328 && parts.base != parts.index)
2331 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2332 since it's predecode logic can't detect the length of instructions
2333 and it degenerates to vector decoded. Increase cost of such
2334 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2335 to split such addresses or even refuse such addresses at all.
2337 Following addressing modes are affected:
2342 The first and last case may be avoidable by explicitly coding the zero in
2343 memory address, but I don't have AMD-K6 machine handy to check this
2347 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2348 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2349 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2355 /* If X is a machine specific address (i.e. a symbol or label being
2356 referenced as a displacement from the GOT implemented using an
2357 UNSPEC), then return the base term. Otherwise return X. */
2360 ix86_find_base_term (x)
2365 if (GET_CODE (x) != PLUS
2366 || XEXP (x, 0) != pic_offset_table_rtx
2367 || GET_CODE (XEXP (x, 1)) != CONST)
2370 term = XEXP (XEXP (x, 1), 0);
2372 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
2373 term = XEXP (term, 0);
2375 if (GET_CODE (term) != UNSPEC
2376 || XVECLEN (term, 0) != 1
2377 || XINT (term, 1) != 7)
2380 term = XVECEXP (term, 0, 0);
2382 if (GET_CODE (term) != SYMBOL_REF
2383 && GET_CODE (term) != LABEL_REF)
2389 /* Determine if a given CONST RTX is a valid memory displacement
2393 legitimate_pic_address_disp_p (disp)
2396 if (GET_CODE (disp) != CONST)
2398 disp = XEXP (disp, 0);
2400 if (GET_CODE (disp) == PLUS)
2402 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2404 disp = XEXP (disp, 0);
2407 if (GET_CODE (disp) != UNSPEC
2408 || XVECLEN (disp, 0) != 1)
2411 /* Must be @GOT or @GOTOFF. */
2412 if (XINT (disp, 1) != 6
2413 && XINT (disp, 1) != 7)
2416 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2417 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2423 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2424 memory address for an instruction. The MODE argument is the machine mode
2425 for the MEM expression that wants to use this address.
2427 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2428 convert common non-canonical forms to canonical form so that they will
2432 legitimate_address_p (mode, addr, strict)
2433 enum machine_mode mode;
2437 struct ix86_address parts;
2438 rtx base, index, disp;
2439 HOST_WIDE_INT scale;
2440 const char *reason = NULL;
2441 rtx reason_rtx = NULL_RTX;
2443 if (TARGET_DEBUG_ADDR)
2446 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2447 GET_MODE_NAME (mode), strict);
2451 if (! ix86_decompose_address (addr, &parts))
2453 reason = "decomposition failed";
2458 index = parts.index;
2460 scale = parts.scale;
2462 /* Validate base register.
2464 Don't allow SUBREG's here, it can lead to spill failures when the base
2465 is one word out of a two word structure, which is represented internally
2472 if (GET_CODE (base) != REG)
2474 reason = "base is not a register";
2478 if (GET_MODE (base) != Pmode)
2480 reason = "base is not in Pmode";
2484 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2485 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2487 reason = "base is not valid";
2492 /* Validate index register.
2494 Don't allow SUBREG's here, it can lead to spill failures when the index
2495 is one word out of a two word structure, which is represented internally
2502 if (GET_CODE (index) != REG)
2504 reason = "index is not a register";
2508 if (GET_MODE (index) != Pmode)
2510 reason = "index is not in Pmode";
2514 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2515 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2517 reason = "index is not valid";
2522 /* Validate scale factor. */
2525 reason_rtx = GEN_INT (scale);
2528 reason = "scale without index";
2532 if (scale != 2 && scale != 4 && scale != 8)
2534 reason = "scale is not a valid multiplier";
2539 /* Validate displacement. */
2544 if (!CONSTANT_ADDRESS_P (disp))
2546 reason = "displacement is not constant";
2550 if (GET_CODE (disp) == CONST_DOUBLE)
2552 reason = "displacement is a const_double";
2556 if (flag_pic && SYMBOLIC_CONST (disp))
2558 if (! legitimate_pic_address_disp_p (disp))
2560 reason = "displacement is an invalid pic construct";
2564 /* This code used to verify that a symbolic pic displacement
2565 includes the pic_offset_table_rtx register.
2567 While this is good idea, unfortunately these constructs may
2568 be created by "adds using lea" optimization for incorrect
2577 This code is nonsensical, but results in addressing
2578 GOT table with pic_offset_table_rtx base. We can't
2579 just refuse it easilly, since it gets matched by
2580 "addsi3" pattern, that later gets split to lea in the
2581 case output register differs from input. While this
2582 can be handled by separate addsi pattern for this case
2583 that never results in lea, this seems to be easier and
2584 correct fix for crash to disable this test. */
2586 else if (HALF_PIC_P ())
2588 if (! HALF_PIC_ADDRESS_P (disp)
2589 || (base != NULL_RTX || index != NULL_RTX))
2591 reason = "displacement is an invalid half-pic reference";
2597 /* Everything looks valid. */
2598 if (TARGET_DEBUG_ADDR)
2599 fprintf (stderr, "Success.\n");
2603 if (TARGET_DEBUG_ADDR)
2605 fprintf (stderr, "Error: %s\n", reason);
2606 debug_rtx (reason_rtx);
2611 /* Return an unique alias set for the GOT. */
2613 static HOST_WIDE_INT
2614 ix86_GOT_alias_set ()
2616 static HOST_WIDE_INT set = -1;
2618 set = new_alias_set ();
2622 /* Return a legitimate reference for ORIG (an address) using the
2623 register REG. If REG is 0, a new pseudo is generated.
2625 There are two types of references that must be handled:
2627 1. Global data references must load the address from the GOT, via
2628 the PIC reg. An insn is emitted to do this load, and the reg is
2631 2. Static data references, constant pool addresses, and code labels
2632 compute the address as an offset from the GOT, whose base is in
2633 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2634 differentiate them from global data objects. The returned
2635 address is the PIC reg + an unspec constant.
2637 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2638 reg also appears in the address. */
2641 legitimize_pic_address (orig, reg)
2649 if (GET_CODE (addr) == LABEL_REF
2650 || (GET_CODE (addr) == SYMBOL_REF
2651 && (CONSTANT_POOL_ADDRESS_P (addr)
2652 || SYMBOL_REF_FLAG (addr))))
2654 /* This symbol may be referenced via a displacement from the PIC
2655 base address (@GOTOFF). */
2657 current_function_uses_pic_offset_table = 1;
2658 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
2659 new = gen_rtx_CONST (Pmode, new);
2660 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2664 emit_move_insn (reg, new);
2668 else if (GET_CODE (addr) == SYMBOL_REF)
2670 /* This symbol must be referenced via a load from the
2671 Global Offset Table (@GOT). */
2673 current_function_uses_pic_offset_table = 1;
2674 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
2675 new = gen_rtx_CONST (Pmode, new);
2676 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2677 new = gen_rtx_MEM (Pmode, new);
2678 RTX_UNCHANGING_P (new) = 1;
2679 MEM_ALIAS_SET (new) = ix86_GOT_alias_set ();
2682 reg = gen_reg_rtx (Pmode);
2683 emit_move_insn (reg, new);
2688 if (GET_CODE (addr) == CONST)
2690 addr = XEXP (addr, 0);
2691 if (GET_CODE (addr) == UNSPEC)
2693 /* Check that the unspec is one of the ones we generate? */
2695 else if (GET_CODE (addr) != PLUS)
2698 if (GET_CODE (addr) == PLUS)
2700 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2702 /* Check first to see if this is a constant offset from a @GOTOFF
2703 symbol reference. */
2704 if ((GET_CODE (op0) == LABEL_REF
2705 || (GET_CODE (op0) == SYMBOL_REF
2706 && (CONSTANT_POOL_ADDRESS_P (op0)
2707 || SYMBOL_REF_FLAG (op0))))
2708 && GET_CODE (op1) == CONST_INT)
2710 current_function_uses_pic_offset_table = 1;
2711 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
2712 new = gen_rtx_PLUS (Pmode, new, op1);
2713 new = gen_rtx_CONST (Pmode, new);
2714 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2718 emit_move_insn (reg, new);
2724 base = legitimize_pic_address (XEXP (addr, 0), reg);
2725 new = legitimize_pic_address (XEXP (addr, 1),
2726 base == reg ? NULL_RTX : reg);
2728 if (GET_CODE (new) == CONST_INT)
2729 new = plus_constant (base, INTVAL (new));
2732 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2734 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2735 new = XEXP (new, 1);
2737 new = gen_rtx_PLUS (Pmode, base, new);
2745 /* Try machine-dependent ways of modifying an illegitimate address
2746 to be legitimate. If we find one, return the new, valid address.
2747 This macro is used in only one place: `memory_address' in explow.c.
2749 OLDX is the address as it was before break_out_memory_refs was called.
2750 In some cases it is useful to look at this to decide what needs to be done.
2752 MODE and WIN are passed so that this macro can use
2753 GO_IF_LEGITIMATE_ADDRESS.
2755 It is always safe for this macro to do nothing. It exists to recognize
2756 opportunities to optimize the output.
2758 For the 80386, we handle X+REG by loading X into a register R and
2759 using R+REG. R will go in a general reg and indexing will be used.
2760 However, if REG is a broken-out memory address or multiplication,
2761 nothing needs to be done because REG can certainly go in a general reg.
2763 When -fpic is used, special handling is needed for symbolic references.
2764 See comments by legitimize_pic_address in i386.c for details. */
2767 legitimize_address (x, oldx, mode)
2769 register rtx oldx ATTRIBUTE_UNUSED;
2770 enum machine_mode mode;
2775 if (TARGET_DEBUG_ADDR)
2777 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2778 GET_MODE_NAME (mode));
2782 if (flag_pic && SYMBOLIC_CONST (x))
2783 return legitimize_pic_address (x, 0);
2785 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2786 if (GET_CODE (x) == ASHIFT
2787 && GET_CODE (XEXP (x, 1)) == CONST_INT
2788 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2791 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2792 GEN_INT (1 << log));
2795 if (GET_CODE (x) == PLUS)
2797 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2799 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2800 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2801 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2804 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2805 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2806 GEN_INT (1 << log));
2809 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2810 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2811 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2814 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2815 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2816 GEN_INT (1 << log));
2819 /* Put multiply first if it isn't already. */
2820 if (GET_CODE (XEXP (x, 1)) == MULT)
2822 rtx tmp = XEXP (x, 0);
2823 XEXP (x, 0) = XEXP (x, 1);
2828 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2829 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2830 created by virtual register instantiation, register elimination, and
2831 similar optimizations. */
2832 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2835 x = gen_rtx_PLUS (Pmode,
2836 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2837 XEXP (XEXP (x, 1), 0)),
2838 XEXP (XEXP (x, 1), 1));
2842 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2843 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2844 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2845 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2846 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2847 && CONSTANT_P (XEXP (x, 1)))
2850 rtx other = NULL_RTX;
2852 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2854 constant = XEXP (x, 1);
2855 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2857 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2859 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2860 other = XEXP (x, 1);
2868 x = gen_rtx_PLUS (Pmode,
2869 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2870 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2871 plus_constant (other, INTVAL (constant)));
2875 if (changed && legitimate_address_p (mode, x, FALSE))
2878 if (GET_CODE (XEXP (x, 0)) == MULT)
2881 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2884 if (GET_CODE (XEXP (x, 1)) == MULT)
2887 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2891 && GET_CODE (XEXP (x, 1)) == REG
2892 && GET_CODE (XEXP (x, 0)) == REG)
2895 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2898 x = legitimize_pic_address (x, 0);
2901 if (changed && legitimate_address_p (mode, x, FALSE))
2904 if (GET_CODE (XEXP (x, 0)) == REG)
2906 register rtx temp = gen_reg_rtx (Pmode);
2907 register rtx val = force_operand (XEXP (x, 1), temp);
2909 emit_move_insn (temp, val);
2915 else if (GET_CODE (XEXP (x, 1)) == REG)
2917 register rtx temp = gen_reg_rtx (Pmode);
2918 register rtx val = force_operand (XEXP (x, 0), temp);
2920 emit_move_insn (temp, val);
2930 /* Print an integer constant expression in assembler syntax. Addition
2931 and subtraction are the only arithmetic that may appear in these
2932 expressions. FILE is the stdio stream to write to, X is the rtx, and
2933 CODE is the operand print code from the output string. */
2936 output_pic_addr_const (file, x, code)
2943 switch (GET_CODE (x))
2953 assemble_name (file, XSTR (x, 0));
2954 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2955 fputs ("@PLT", file);
2962 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2963 assemble_name (asm_out_file, buf);
2967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2971 /* This used to output parentheses around the expression,
2972 but that does not work on the 386 (either ATT or BSD assembler). */
2973 output_pic_addr_const (file, XEXP (x, 0), code);
2977 if (GET_MODE (x) == VOIDmode)
2979 /* We can use %d if the number is <32 bits and positive. */
2980 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2981 fprintf (file, "0x%lx%08lx",
2982 (unsigned long) CONST_DOUBLE_HIGH (x),
2983 (unsigned long) CONST_DOUBLE_LOW (x));
2985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2988 /* We can't handle floating point constants;
2989 PRINT_OPERAND must handle them. */
2990 output_operand_lossage ("floating constant misused");
2994 /* Some assemblers need integer constants to appear first. */
2995 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2997 output_pic_addr_const (file, XEXP (x, 0), code);
2999 output_pic_addr_const (file, XEXP (x, 1), code);
3001 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3003 output_pic_addr_const (file, XEXP (x, 1), code);
3005 output_pic_addr_const (file, XEXP (x, 0), code);
3012 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
3013 output_pic_addr_const (file, XEXP (x, 0), code);
3015 output_pic_addr_const (file, XEXP (x, 1), code);
3016 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
3020 if (XVECLEN (x, 0) != 1)
3022 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
3023 switch (XINT (x, 1))
3026 fputs ("@GOT", file);
3029 fputs ("@GOTOFF", file);
3032 fputs ("@PLT", file);
3035 output_operand_lossage ("invalid UNSPEC as operand");
3041 output_operand_lossage ("invalid expression as operand");
3045 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
3046 We need to handle our special PIC relocations. */
3049 i386_dwarf_output_addr_const (file, x)
3053 fprintf (file, "%s", INT_ASM_OP);
3055 output_pic_addr_const (file, x, '\0');
3057 output_addr_const (file, x);
3061 /* In the name of slightly smaller debug output, and to cater to
3062 general assembler losage, recognize PIC+GOTOFF and turn it back
3063 into a direct symbol reference. */
3066 i386_simplify_dwarf_addr (orig_x)
3071 if (GET_CODE (x) != PLUS
3072 || GET_CODE (XEXP (x, 0)) != REG
3073 || GET_CODE (XEXP (x, 1)) != CONST)
3076 x = XEXP (XEXP (x, 1), 0);
3077 if (GET_CODE (x) == UNSPEC
3078 && XINT (x, 1) == 7)
3079 return XVECEXP (x, 0, 0);
3081 if (GET_CODE (x) == PLUS
3082 && GET_CODE (XEXP (x, 0)) == UNSPEC
3083 && GET_CODE (XEXP (x, 1)) == CONST_INT
3084 && XINT (XEXP (x, 0), 1) == 7)
3085 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3091 put_condition_code (code, mode, reverse, fp, file)
3093 enum machine_mode mode;
3100 code = reverse_condition (code);
3111 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
3116 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3117 Those same assemblers have the same but opposite losage on cmov. */
3120 suffix = fp ? "nbe" : "a";
3123 if (mode == CCNOmode || mode == CCGOCmode)
3125 else if (mode == CCmode || mode == CCGCmode)
3136 if (mode == CCNOmode || mode == CCGOCmode)
3138 else if (mode == CCmode || mode == CCGCmode)
3147 suffix = fp ? "nb" : "ae";
3150 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
3168 fputs (suffix, file);
3172 print_reg (x, code, file)
3177 if (REGNO (x) == ARG_POINTER_REGNUM
3178 || REGNO (x) == FRAME_POINTER_REGNUM
3179 || REGNO (x) == FLAGS_REG
3180 || REGNO (x) == FPSR_REG)
3183 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3188 else if (code == 'b')
3190 else if (code == 'k')
3192 else if (code == 'y')
3194 else if (code == 'h')
3196 else if (code == 'm' || MMX_REG_P (x))
3199 code = GET_MODE_SIZE (GET_MODE (x));
3204 fputs (hi_reg_name[REGNO (x)], file);
3207 if (STACK_TOP_P (x))
3209 fputs ("st(0)", file);
3221 fputs (hi_reg_name[REGNO (x)], file);
3224 fputs (qi_reg_name[REGNO (x)], file);
3227 fputs (qi_high_reg_name[REGNO (x)], file);
3235 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3236 C -- print opcode suffix for set/cmov insn.
3237 c -- like C, but print reversed condition
3238 R -- print the prefix for register names.
3239 z -- print the opcode suffix for the size of the current operand.
3240 * -- print a star (in certain assembler syntax)
3241 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3242 s -- print a shift double count, followed by the assemblers argument
3244 b -- print the QImode name of the register for the indicated operand.
3245 %b0 would print %al if operands[0] is reg 0.
3246 w -- likewise, print the HImode name of the register.
3247 k -- likewise, print the SImode name of the register.
3248 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3249 y -- print "st(0)" instead of "st" as a register.
3250 m -- print "st(n)" as an mmx register. */
3253 print_operand (file, x, code)
3263 if (ASSEMBLER_DIALECT == 0)
3268 if (ASSEMBLER_DIALECT == 0)
3273 if (ASSEMBLER_DIALECT == 0)
3278 if (ASSEMBLER_DIALECT == 0)
3283 if (ASSEMBLER_DIALECT == 0)
3288 if (ASSEMBLER_DIALECT == 0)
3293 if (ASSEMBLER_DIALECT == 0)
3298 /* 387 opcodes don't get size suffixes if the operands are
3301 if (STACK_REG_P (x))
3304 /* Intel syntax has no truck with instruction suffixes. */
3305 if (ASSEMBLER_DIALECT != 0)
3308 /* this is the size of op from size of operand */
3309 switch (GET_MODE_SIZE (GET_MODE (x)))
3312 #ifdef HAVE_GAS_FILDS_FISTS
3318 if (GET_MODE (x) == SFmode)
3332 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3334 #ifdef GAS_MNEMONICS
3360 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3362 PRINT_OPERAND (file, x, 0);
3368 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3371 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3374 /* Like above, but reverse condition */
3376 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3379 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3385 sprintf (str, "invalid operand code `%c'", code);
3386 output_operand_lossage (str);
3391 if (GET_CODE (x) == REG)
3393 PRINT_REG (x, code, file);
3396 else if (GET_CODE (x) == MEM)
3398 /* No `byte ptr' prefix for call instructions. */
3399 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3402 switch (GET_MODE_SIZE (GET_MODE (x)))
3404 case 1: size = "BYTE"; break;
3405 case 2: size = "WORD"; break;
3406 case 4: size = "DWORD"; break;
3407 case 8: size = "QWORD"; break;
3408 case 12: size = "XWORD"; break;
3409 case 16: size = "XMMWORD"; break;
3414 fputs (" PTR ", file);
3418 if (flag_pic && CONSTANT_ADDRESS_P (x))
3419 output_pic_addr_const (file, x, code);
3424 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3429 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3430 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3432 if (ASSEMBLER_DIALECT == 0)
3434 fprintf (file, "0x%lx", l);
3437 /* These float cases don't actually occur as immediate operands. */
3438 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3443 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3444 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3445 fprintf (file, "%s", dstr);
3448 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3453 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3454 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3455 fprintf (file, "%s", dstr);
3461 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3463 if (ASSEMBLER_DIALECT == 0)
3466 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3467 || GET_CODE (x) == LABEL_REF)
3469 if (ASSEMBLER_DIALECT == 0)
3472 fputs ("OFFSET FLAT:", file);
3475 if (GET_CODE (x) == CONST_INT)
3476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3478 output_pic_addr_const (file, x, code);
3480 output_addr_const (file, x);
3484 /* Print a memory operand whose address is ADDR. */
3487 print_operand_address (file, addr)
3491 struct ix86_address parts;
3492 rtx base, index, disp;
3495 if (! ix86_decompose_address (addr, &parts))
3499 index = parts.index;
3501 scale = parts.scale;
3503 if (!base && !index)
3505 /* Displacement only requires special attention. */
3507 if (GET_CODE (disp) == CONST_INT)
3509 if (ASSEMBLER_DIALECT != 0)
3510 fputs ("ds:", file);
3511 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3514 output_pic_addr_const (file, addr, 0);
3516 output_addr_const (file, addr);
3520 if (ASSEMBLER_DIALECT == 0)
3525 output_pic_addr_const (file, disp, 0);
3526 else if (GET_CODE (disp) == LABEL_REF)
3527 output_asm_label (disp);
3529 output_addr_const (file, disp);
3534 PRINT_REG (base, 0, file);
3538 PRINT_REG (index, 0, file);
3540 fprintf (file, ",%d", scale);
3546 rtx offset = NULL_RTX;
3550 /* Pull out the offset of a symbol; print any symbol itself. */
3551 if (GET_CODE (disp) == CONST
3552 && GET_CODE (XEXP (disp, 0)) == PLUS
3553 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3555 offset = XEXP (XEXP (disp, 0), 1);
3556 disp = gen_rtx_CONST (VOIDmode,
3557 XEXP (XEXP (disp, 0), 0));
3561 output_pic_addr_const (file, disp, 0);
3562 else if (GET_CODE (disp) == LABEL_REF)
3563 output_asm_label (disp);
3564 else if (GET_CODE (disp) == CONST_INT)
3567 output_addr_const (file, disp);
3573 PRINT_REG (base, 0, file);
3576 if (INTVAL (offset) >= 0)
3578 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3589 PRINT_REG (index, 0, file);
3591 fprintf (file, "*%d", scale);
3598 /* Split one or more DImode RTL references into pairs of SImode
3599 references. The RTL can be REG, offsettable MEM, integer constant, or
3600 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3601 split and "num" is its length. lo_half and hi_half are output arrays
3602 that parallel "operands". */
3605 split_di (operands, num, lo_half, hi_half)
3608 rtx lo_half[], hi_half[];
3612 rtx op = operands[num];
3613 if (CONSTANT_P (op))
3614 split_double (op, &lo_half[num], &hi_half[num]);
3615 else if (! reload_completed)
3617 lo_half[num] = gen_lowpart (SImode, op);
3618 hi_half[num] = gen_highpart (SImode, op);
3620 else if (GET_CODE (op) == REG)
3622 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3623 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3625 else if (offsettable_memref_p (op))
3627 rtx lo_addr = XEXP (op, 0);
3628 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3629 lo_half[num] = change_address (op, SImode, lo_addr);
3630 hi_half[num] = change_address (op, SImode, hi_addr);
3637 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3638 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3639 is the expression of the binary operation. The output may either be
3640 emitted here, or returned to the caller, like all output_* functions.
3642 There is no guarantee that the operands are the same mode, as they
3643 might be within FLOAT or FLOAT_EXTEND expressions. */
3645 #ifndef SYSV386_COMPAT
3646 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3647 wants to fix the assemblers because that causes incompatibility
3648 with gcc. No-one wants to fix gcc because that causes
3649 incompatibility with assemblers... You can use the option of
3650 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3651 #define SYSV386_COMPAT 1
3655 output_387_binary_op (insn, operands)
3659 static char buf[30];
3662 #ifdef ENABLE_CHECKING
3663 /* Even if we do not want to check the inputs, this documents input
3664 constraints. Which helps in understanding the following code. */
3665 if (STACK_REG_P (operands[0])
3666 && ((REG_P (operands[1])
3667 && REGNO (operands[0]) == REGNO (operands[1])
3668 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3669 || (REG_P (operands[2])
3670 && REGNO (operands[0]) == REGNO (operands[2])
3671 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3672 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3678 switch (GET_CODE (operands[3]))
3681 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3682 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3689 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3690 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3697 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3698 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3705 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3706 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3718 switch (GET_CODE (operands[3]))
3722 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3724 rtx temp = operands[2];
3725 operands[2] = operands[1];
3729 /* know operands[0] == operands[1]. */
3731 if (GET_CODE (operands[2]) == MEM)
3737 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3739 if (STACK_TOP_P (operands[0]))
3740 /* How is it that we are storing to a dead operand[2]?
3741 Well, presumably operands[1] is dead too. We can't
3742 store the result to st(0) as st(0) gets popped on this
3743 instruction. Instead store to operands[2] (which I
3744 think has to be st(1)). st(1) will be popped later.
3745 gcc <= 2.8.1 didn't have this check and generated
3746 assembly code that the Unixware assembler rejected. */
3747 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3749 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3753 if (STACK_TOP_P (operands[0]))
3754 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3756 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3761 if (GET_CODE (operands[1]) == MEM)
3767 if (GET_CODE (operands[2]) == MEM)
3773 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3776 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3777 derived assemblers, confusingly reverse the direction of
3778 the operation for fsub{r} and fdiv{r} when the
3779 destination register is not st(0). The Intel assembler
3780 doesn't have this brain damage. Read !SYSV386_COMPAT to
3781 figure out what the hardware really does. */
3782 if (STACK_TOP_P (operands[0]))
3783 p = "{p\t%0, %2|rp\t%2, %0}";
3785 p = "{rp\t%2, %0|p\t%0, %2}";
3787 if (STACK_TOP_P (operands[0]))
3788 /* As above for fmul/fadd, we can't store to st(0). */
3789 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3791 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3796 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3799 if (STACK_TOP_P (operands[0]))
3800 p = "{rp\t%0, %1|p\t%1, %0}";
3802 p = "{p\t%1, %0|rp\t%0, %1}";
3804 if (STACK_TOP_P (operands[0]))
3805 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3807 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3812 if (STACK_TOP_P (operands[0]))
3814 if (STACK_TOP_P (operands[1]))
3815 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3817 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3820 else if (STACK_TOP_P (operands[1]))
3823 p = "{\t%1, %0|r\t%0, %1}";
3825 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3831 p = "{r\t%2, %0|\t%0, %2}";
3833 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3846 /* Output code for INSN to convert a float to a signed int. OPERANDS
3847 are the insn operands. The output may be [HSD]Imode and the input
3848 operand may be [SDX]Fmode. */
3851 output_fix_trunc (insn, operands)
3855 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3856 int dimode_p = GET_MODE (operands[0]) == DImode;
3859 /* Jump through a hoop or two for DImode, since the hardware has no
3860 non-popping instruction. We used to do this a different way, but
3861 that was somewhat fragile and broke with post-reload splitters. */
3862 if (dimode_p && !stack_top_dies)
3863 output_asm_insn ("fld\t%y1", operands);
3865 if (! STACK_TOP_P (operands[1]))
3868 xops[0] = GEN_INT (12);
3869 xops[1] = adj_offsettable_operand (operands[2], 1);
3870 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3872 xops[2] = operands[0];
3873 if (GET_CODE (operands[0]) != MEM)
3874 xops[2] = operands[3];
3876 output_asm_insn ("fnstcw\t%2", operands);
3877 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3878 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3879 output_asm_insn ("fldcw\t%2", operands);
3880 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3882 if (stack_top_dies || dimode_p)
3883 output_asm_insn ("fistp%z2\t%2", xops);
3885 output_asm_insn ("fist%z2\t%2", xops);
3887 output_asm_insn ("fldcw\t%2", operands);
3889 if (GET_CODE (operands[0]) != MEM)
3893 split_di (operands+0, 1, xops+0, xops+1);
3894 split_di (operands+3, 1, xops+2, xops+3);
3895 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3896 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3898 else if (GET_MODE (operands[0]) == SImode)
3899 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3901 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3907 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3908 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3909 when fucom should be used. */
3912 output_fp_compare (insn, operands, eflags_p, unordered_p)
3915 int eflags_p, unordered_p;
3918 rtx cmp_op0 = operands[0];
3919 rtx cmp_op1 = operands[1];
3924 cmp_op1 = operands[2];
3927 if (! STACK_TOP_P (cmp_op0))
3930 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3932 if (STACK_REG_P (cmp_op1)
3934 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3935 && REGNO (cmp_op1) != FIRST_STACK_REG)
3937 /* If both the top of the 387 stack dies, and the other operand
3938 is also a stack register that dies, then this must be a
3939 `fcompp' float compare */
3943 /* There is no double popping fcomi variant. Fortunately,
3944 eflags is immune from the fstp's cc clobbering. */
3946 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3948 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3956 return "fucompp\n\tfnstsw\t%0";
3958 return "fcompp\n\tfnstsw\t%0";
3971 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3973 static const char * const alt[24] =
3985 "fcomi\t{%y1, %0|%0, %y1}",
3986 "fcomip\t{%y1, %0|%0, %y1}",
3987 "fucomi\t{%y1, %0|%0, %y1}",
3988 "fucomip\t{%y1, %0|%0, %y1}",
3995 "fcom%z2\t%y2\n\tfnstsw\t%0",
3996 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3997 "fucom%z2\t%y2\n\tfnstsw\t%0",
3998 "fucomp%z2\t%y2\n\tfnstsw\t%0",
4000 "ficom%z2\t%y2\n\tfnstsw\t%0",
4001 "ficomp%z2\t%y2\n\tfnstsw\t%0",
4009 mask = eflags_p << 3;
4010 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
4011 mask |= unordered_p << 1;
4012 mask |= stack_top_dies;
4024 /* Output assembler code to FILE to initialize basic-block profiling.
4026 If profile_block_flag == 2
4028 Output code to call the subroutine `__bb_init_trace_func'
4029 and pass two parameters to it. The first parameter is
4030 the address of a block allocated in the object module.
4031 The second parameter is the number of the first basic block
4034 The name of the block is a local symbol made with this statement:
4036 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4038 Of course, since you are writing the definition of
4039 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4040 can take a short cut in the definition of this macro and use the
4041 name that you know will result.
4043 The number of the first basic block of the function is
4044 passed to the macro in BLOCK_OR_LABEL.
4046 If described in a virtual assembler language the code to be
4050 parameter2 <- BLOCK_OR_LABEL
4051 call __bb_init_trace_func
4053 else if profile_block_flag != 0
4055 Output code to call the subroutine `__bb_init_func'
4056 and pass one single parameter to it, which is the same
4057 as the first parameter to `__bb_init_trace_func'.
4059 The first word of this parameter is a flag which will be nonzero if
4060 the object module has already been initialized. So test this word
4061 first, and do not call `__bb_init_func' if the flag is nonzero.
4062 Note: When profile_block_flag == 2 the test need not be done
4063 but `__bb_init_trace_func' *must* be called.
4065 BLOCK_OR_LABEL may be used to generate a label number as a
4066 branch destination in case `__bb_init_func' will not be called.
4068 If described in a virtual assembler language the code to be
4079 ix86_output_function_block_profiler (file, block_or_label)
4083 static int num_func = 0;
4085 char block_table[80], false_label[80];
4087 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4089 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4090 xops[5] = stack_pointer_rtx;
4091 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4093 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4095 switch (profile_block_flag)
4098 xops[2] = GEN_INT (block_or_label);
4099 xops[3] = gen_rtx_MEM (Pmode,
4100 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4101 xops[6] = GEN_INT (8);
4103 output_asm_insn ("push{l}\t%2", xops);
4105 output_asm_insn ("push{l}\t%1", xops);
4108 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4109 output_asm_insn ("push{l}\t%7", xops);
4111 output_asm_insn ("call\t%P3", xops);
4112 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4116 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4118 xops[0] = const0_rtx;
4119 xops[2] = gen_rtx_MEM (Pmode,
4120 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4121 xops[3] = gen_rtx_MEM (Pmode,
4122 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4123 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4124 xops[6] = GEN_INT (4);
4126 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4128 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4129 output_asm_insn ("jne\t%2", xops);
4132 output_asm_insn ("push{l}\t%1", xops);
4135 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4136 output_asm_insn ("push{l}\t%7", xops);
4138 output_asm_insn ("call\t%P3", xops);
4139 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4140 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4146 /* Output assembler code to FILE to increment a counter associated
4147 with basic block number BLOCKNO.
4149 If profile_block_flag == 2
4151 Output code to initialize the global structure `__bb' and
4152 call the function `__bb_trace_func' which will increment the
4155 `__bb' consists of two words. In the first word the number
4156 of the basic block has to be stored. In the second word
4157 the address of a block allocated in the object module
4160 The basic block number is given by BLOCKNO.
4162 The address of the block is given by the label created with
4164 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4166 by FUNCTION_BLOCK_PROFILER.
4168 Of course, since you are writing the definition of
4169 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4170 can take a short cut in the definition of this macro and use the
4171 name that you know will result.
4173 If described in a virtual assembler language the code to be
4176 move BLOCKNO -> (__bb)
4177 move LPBX0 -> (__bb+4)
4178 call __bb_trace_func
4180 Note that function `__bb_trace_func' must not change the
4181 machine state, especially the flag register. To grant
4182 this, you must output code to save and restore registers
4183 either in this macro or in the macros MACHINE_STATE_SAVE
4184 and MACHINE_STATE_RESTORE. The last two macros will be
4185 used in the function `__bb_trace_func', so you must make
4186 sure that the function prologue does not change any
4187 register prior to saving it with MACHINE_STATE_SAVE.
4189 else if profile_block_flag != 0
4191 Output code to increment the counter directly.
4192 Basic blocks are numbered separately from zero within each
4193 compiled object module. The count associated with block number
4194 BLOCKNO is at index BLOCKNO in an array of words; the name of
4195 this array is a local symbol made with this statement:
4197 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4199 Of course, since you are writing the definition of
4200 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4201 can take a short cut in the definition of this macro and use the
4202 name that you know will result.
4204 If described in a virtual assembler language the code to be
4207 inc (LPBX2+4*BLOCKNO)
4211 ix86_output_block_profiler (file, blockno)
4212 FILE *file ATTRIBUTE_UNUSED;
4215 rtx xops[8], cnt_rtx;
4217 char *block_table = counts;
4219 switch (profile_block_flag)
4222 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4224 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4225 xops[2] = GEN_INT (blockno);
4226 xops[3] = gen_rtx_MEM (Pmode,
4227 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4228 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4229 xops[5] = plus_constant (xops[4], 4);
4230 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4231 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4233 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4235 output_asm_insn ("pushf", xops);
4236 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4239 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4240 output_asm_insn ("push{l}\t%7", xops);
4241 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4242 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4243 output_asm_insn ("pop{l}\t%7", xops);
4246 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4247 output_asm_insn ("call\t%P3", xops);
4248 output_asm_insn ("popf", xops);
4253 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4254 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4255 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4258 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4261 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4263 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4264 output_asm_insn ("inc{l}\t%0", xops);
4271 ix86_expand_move (mode, operands)
4272 enum machine_mode mode;
4275 int strict = (reload_in_progress || reload_completed);
4278 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4280 /* Emit insns to move operands[1] into operands[0]. */
4282 if (GET_CODE (operands[0]) == MEM)
4283 operands[1] = force_reg (Pmode, operands[1]);
4286 rtx temp = operands[0];
4287 if (GET_CODE (temp) != REG)
4288 temp = gen_reg_rtx (Pmode);
4289 temp = legitimize_pic_address (operands[1], temp);
4290 if (temp == operands[0])
4297 if (GET_CODE (operands[0]) == MEM
4298 && (GET_MODE (operands[0]) == QImode
4299 || !push_operand (operands[0], mode))
4300 && GET_CODE (operands[1]) == MEM)
4301 operands[1] = force_reg (mode, operands[1]);
4303 if (push_operand (operands[0], mode)
4304 && ! general_no_elim_operand (operands[1], mode))
4305 operands[1] = copy_to_mode_reg (mode, operands[1]);
4307 if (FLOAT_MODE_P (mode))
4309 /* If we are loading a floating point constant to a register,
4310 force the value to memory now, since we'll get better code
4311 out the back end. */
4315 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4316 && register_operand (operands[0], mode))
4317 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4321 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4326 /* Attempt to expand a binary operator. Make the expansion closer to the
4327 actual machine, then just general_operand, which will allow 3 separate
4328 memory references (one output, two input) in a single insn. */
4331 ix86_expand_binary_operator (code, mode, operands)
4333 enum machine_mode mode;
4336 int matching_memory;
4337 rtx src1, src2, dst, op, clob;
4343 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4344 if (GET_RTX_CLASS (code) == 'c'
4345 && (rtx_equal_p (dst, src2)
4346 || immediate_operand (src1, mode)))
4353 /* If the destination is memory, and we do not have matching source
4354 operands, do things in registers. */
4355 matching_memory = 0;
4356 if (GET_CODE (dst) == MEM)
4358 if (rtx_equal_p (dst, src1))
4359 matching_memory = 1;
4360 else if (GET_RTX_CLASS (code) == 'c'
4361 && rtx_equal_p (dst, src2))
4362 matching_memory = 2;
4364 dst = gen_reg_rtx (mode);
4367 /* Both source operands cannot be in memory. */
4368 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4370 if (matching_memory != 2)
4371 src2 = force_reg (mode, src2);
4373 src1 = force_reg (mode, src1);
4376 /* If the operation is not commutable, source 1 cannot be a constant
4377 or non-matching memory. */
4378 if ((CONSTANT_P (src1)
4379 || (!matching_memory && GET_CODE (src1) == MEM))
4380 && GET_RTX_CLASS (code) != 'c')
4381 src1 = force_reg (mode, src1);
4383 /* If optimizing, copy to regs to improve CSE */
4384 if (optimize && ! no_new_pseudos)
4386 if (GET_CODE (dst) == MEM)
4387 dst = gen_reg_rtx (mode);
4388 if (GET_CODE (src1) == MEM)
4389 src1 = force_reg (mode, src1);
4390 if (GET_CODE (src2) == MEM)
4391 src2 = force_reg (mode, src2);
4394 /* Emit the instruction. */
4396 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4397 if (reload_in_progress)
4399 /* Reload doesn't know about the flags register, and doesn't know that
4400 it doesn't want to clobber it. We can only do this with PLUS. */
4407 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4408 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4411 /* Fix up the destination if needed. */
4412 if (dst != operands[0])
4413 emit_move_insn (operands[0], dst);
4416 /* Return TRUE or FALSE depending on whether the binary operator meets the
4417 appropriate constraints. */
4420 ix86_binary_operator_ok (code, mode, operands)
4422 enum machine_mode mode ATTRIBUTE_UNUSED;
4425 /* Both source operands cannot be in memory. */
4426 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4428 /* If the operation is not commutable, source 1 cannot be a constant. */
4429 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4431 /* If the destination is memory, we must have a matching source operand. */
4432 if (GET_CODE (operands[0]) == MEM
4433 && ! (rtx_equal_p (operands[0], operands[1])
4434 || (GET_RTX_CLASS (code) == 'c'
4435 && rtx_equal_p (operands[0], operands[2]))))
4437 /* If the operation is not commutable and the source 1 is memory, we must
4438 have a matching destionation. */
4439 if (GET_CODE (operands[1]) == MEM
4440 && GET_RTX_CLASS (code) != 'c'
4441 && ! rtx_equal_p (operands[0], operands[1]))
4446 /* Attempt to expand a unary operator. Make the expansion closer to the
4447 actual machine, then just general_operand, which will allow 2 separate
4448 memory references (one output, one input) in a single insn. */
4451 ix86_expand_unary_operator (code, mode, operands)
4453 enum machine_mode mode;
4456 int matching_memory;
4457 rtx src, dst, op, clob;
4462 /* If the destination is memory, and we do not have matching source
4463 operands, do things in registers. */
4464 matching_memory = 0;
4465 if (GET_CODE (dst) == MEM)
4467 if (rtx_equal_p (dst, src))
4468 matching_memory = 1;
4470 dst = gen_reg_rtx (mode);
4473 /* When source operand is memory, destination must match. */
4474 if (!matching_memory && GET_CODE (src) == MEM)
4475 src = force_reg (mode, src);
4477 /* If optimizing, copy to regs to improve CSE */
4478 if (optimize && ! no_new_pseudos)
4480 if (GET_CODE (dst) == MEM)
4481 dst = gen_reg_rtx (mode);
4482 if (GET_CODE (src) == MEM)
4483 src = force_reg (mode, src);
4486 /* Emit the instruction. */
4488 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4489 if (reload_in_progress || code == NOT)
4491 /* Reload doesn't know about the flags register, and doesn't know that
4492 it doesn't want to clobber it. */
4499 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4500 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4503 /* Fix up the destination if needed. */
4504 if (dst != operands[0])
4505 emit_move_insn (operands[0], dst);
4508 /* Return TRUE or FALSE depending on whether the unary operator meets the
4509 appropriate constraints. */
4512 ix86_unary_operator_ok (code, mode, operands)
4513 enum rtx_code code ATTRIBUTE_UNUSED;
4514 enum machine_mode mode ATTRIBUTE_UNUSED;
4515 rtx operands[2] ATTRIBUTE_UNUSED;
4517 /* If one of operands is memory, source and destination must match. */
4518 if ((GET_CODE (operands[0]) == MEM
4519 || GET_CODE (operands[1]) == MEM)
4520 && ! rtx_equal_p (operands[0], operands[1]))
4525 /* Return TRUE or FALSE depending on whether the first SET in INSN
4526 has source and destination with matching CC modes, and that the
4527 CC mode is at least as constrained as REQ_MODE. */
4530 ix86_match_ccmode (insn, req_mode)
4532 enum machine_mode req_mode;
4535 enum machine_mode set_mode;
4537 set = PATTERN (insn);
4538 if (GET_CODE (set) == PARALLEL)
4539 set = XVECEXP (set, 0, 0);
4540 if (GET_CODE (set) != SET)
4542 if (GET_CODE (SET_SRC (set)) != COMPARE)
4545 set_mode = GET_MODE (SET_DEST (set));
4549 if (req_mode != CCNOmode
4550 && (req_mode != CCmode
4551 || XEXP (SET_SRC (set), 1) != const0_rtx))
4555 if (req_mode == CCGCmode)
4559 if (req_mode == CCGOCmode || req_mode == CCNOmode)
4563 if (req_mode == CCZmode)
4573 return (GET_MODE (SET_SRC (set)) == set_mode);
4576 /* Produce an unsigned comparison for a given signed comparison. */
4578 static enum rtx_code
4579 unsigned_comparison (code)
4611 /* Generate insn patterns to do an integer compare of OPERANDS. */
4614 ix86_expand_int_compare (code, op0, op1)
4618 enum machine_mode cmpmode;
4621 cmpmode = SELECT_CC_MODE (code, op0, op1);
4622 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4624 /* This is very simple, but making the interface the same as in the
4625 FP case makes the rest of the code easier. */
4626 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4627 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4629 /* Return the test that should be put into the flags user, i.e.
4630 the bcc, scc, or cmov instruction. */
4631 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4634 /* Figure out whether to use ordered or unordered fp comparisons.
4635 Return the appropriate mode to use. */
4638 ix86_fp_compare_mode (code)
4646 /* When not doing IEEE compliant compares, fault on NaNs. */
4647 unordered = (TARGET_IEEE_FP != 0);
4650 case LT: case LE: case GT: case GE:
4654 case UNORDERED: case ORDERED:
4655 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4663 /* ??? If we knew whether invalid-operand exceptions were masked,
4664 we could rely on fcom to raise an exception and take care of
4665 NaNs. But we don't. We could know this from c99 math pragmas. */
4669 return unordered ? CCFPUmode : CCFPmode;
4673 ix86_cc_mode (code, op0, op1)
4677 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4678 return ix86_fp_compare_mode (code);
4681 /* Only zero flag is needed. */
4683 case NE: /* ZF!=0 */
4685 /* Codes needing carry flag. */
4686 case GEU: /* CF=0 */
4687 case GTU: /* CF=0 & ZF=0 */
4688 case LTU: /* CF=1 */
4689 case LEU: /* CF=1 | ZF=1 */
4691 /* Codes possibly doable only with sign flag when
4692 comparing against zero. */
4693 case GE: /* SF=OF or SF=0 */
4694 case LT: /* SF<>OF or SF=0 */
4695 if (op1 == const0_rtx)
4698 /* For other cases Carry flag is not required. */
4700 /* Codes doable only with sign flag when comparing
4701 against zero, but we miss jump instruction for it
4702 so we need to use relational tests agains overflow
4703 that thus needs to be zero. */
4704 case GT: /* ZF=0 & SF=OF */
4705 case LE: /* ZF=1 | SF<>OF */
4706 if (op1 == const0_rtx)
4715 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4718 ix86_use_fcomi_compare (code)
4721 return (TARGET_CMOVE
4722 && (code == ORDERED || code == UNORDERED
4723 /* All other unordered compares require checking
4724 multiple sets of bits. */
4725 || ix86_fp_compare_mode (code) == CCFPmode));
4728 /* Swap, force into registers, or otherwise massage the two operands
4729 to a fp comparison. The operands are updated in place; the new
4730 comparsion code is returned. */
4732 static enum rtx_code
4733 ix86_prepare_fp_compare_args (code, pop0, pop1)
4737 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4738 rtx op0 = *pop0, op1 = *pop1;
4739 enum machine_mode op_mode = GET_MODE (op0);
4741 /* All of the unordered compare instructions only work on registers.
4742 The same is true of the XFmode compare instructions. The same is
4743 true of the fcomi compare instructions. */
4745 if (fpcmp_mode == CCFPUmode
4746 || op_mode == XFmode
4747 || ix86_use_fcomi_compare (code))
4749 op0 = force_reg (op_mode, op0);
4750 op1 = force_reg (op_mode, op1);
4754 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4755 things around if they appear profitable, otherwise force op0
4758 if (standard_80387_constant_p (op0) == 0
4759 || (GET_CODE (op0) == MEM
4760 && ! (standard_80387_constant_p (op1) == 0
4761 || GET_CODE (op1) == MEM)))
4764 tmp = op0, op0 = op1, op1 = tmp;
4765 code = swap_condition (code);
4768 if (GET_CODE (op0) != REG)
4769 op0 = force_reg (op_mode, op0);
4771 if (CONSTANT_P (op1))
4773 if (standard_80387_constant_p (op1))
4774 op1 = force_reg (op_mode, op1);
4776 op1 = validize_mem (force_const_mem (op_mode, op1));
4785 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4788 ix86_expand_fp_compare (code, op0, op1, scratch)
4790 rtx op0, op1, scratch;
4792 enum machine_mode fpcmp_mode, intcmp_mode;
4795 fpcmp_mode = ix86_fp_compare_mode (code);
4796 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4798 /* %%% fcomi is probably always faster, even when dealing with memory,
4799 since compare-and-branch would be three insns instead of four. */
4800 if (ix86_use_fcomi_compare (code))
4802 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4803 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4806 /* The FP codes work out to act like unsigned. */
4807 code = unsigned_comparison (code);
4808 intcmp_mode = CCmode;
4812 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4815 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4816 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4817 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4819 if (fpcmp_mode == CCFPmode
4821 || code == UNORDERED)
4823 /* We have two options here -- use sahf, or testing bits of ah
4824 directly. On PPRO, they are equivalent, sahf being one byte
4825 smaller. On Pentium, sahf is non-pairable while test is UV
4828 if (TARGET_USE_SAHF || optimize_size)
4831 emit_insn (gen_x86_sahf_1 (scratch));
4833 /* The FP codes work out to act like unsigned. */
4834 code = unsigned_comparison (code);
4835 intcmp_mode = CCmode;
4840 * The numbers below correspond to the bits of the FPSW in AH.
4841 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4863 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4864 faster in all cases to just fall back on sahf. */
4891 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4892 intcmp_mode = CCNOmode;
4897 /* In the unordered case, we have to check C2 for NaN's, which
4898 doesn't happen to work out to anything nice combination-wise.
4899 So do some bit twiddling on the value we've got in AH to come
4900 up with an appropriate set of condition codes. */
4902 intcmp_mode = CCNOmode;
4906 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4910 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4911 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4912 intcmp_mode = CCmode;
4916 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4920 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4921 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4922 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4923 intcmp_mode = CCmode;
4927 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4928 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4929 intcmp_mode = CCmode;
4933 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4934 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4939 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4943 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4947 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4951 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4952 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4956 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4957 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4958 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4959 intcmp_mode = CCmode;
4963 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4967 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4971 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4981 /* Return the test that should be put into the flags user, i.e.
4982 the bcc, scc, or cmov instruction. */
4983 return gen_rtx_fmt_ee (code, VOIDmode,
4984 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4989 ix86_expand_compare (code)
4993 op0 = ix86_compare_op0;
4994 op1 = ix86_compare_op1;
4996 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4997 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
4999 ret = ix86_expand_int_compare (code, op0, op1);
5005 ix86_expand_branch (code, label)
5011 switch (GET_MODE (ix86_compare_op0))
5016 tmp = ix86_expand_compare (code);
5017 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5018 gen_rtx_LABEL_REF (VOIDmode, label),
5020 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5026 /* Don't expand the comparison early, so that we get better code
5027 when jump or whoever decides to reverse the comparison. */
5032 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
5035 tmp = gen_rtx_fmt_ee (code, VOIDmode,
5036 ix86_compare_op0, ix86_compare_op1);
5037 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5038 gen_rtx_LABEL_REF (VOIDmode, label),
5040 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
5042 use_fcomi = ix86_use_fcomi_compare (code);
5043 vec = rtvec_alloc (3 + !use_fcomi);
5044 RTVEC_ELT (vec, 0) = tmp;
5046 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
5048 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
5051 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
5053 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
5058 /* Expand DImode branch into multiple compare+branch. */
5060 rtx lo[2], hi[2], label2;
5061 enum rtx_code code1, code2, code3;
5063 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
5065 tmp = ix86_compare_op0;
5066 ix86_compare_op0 = ix86_compare_op1;
5067 ix86_compare_op1 = tmp;
5068 code = swap_condition (code);
5070 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
5071 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
5073 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
5074 avoid two branches. This costs one extra insn, so disable when
5075 optimizing for size. */
5077 if ((code == EQ || code == NE)
5079 || hi[1] == const0_rtx || lo[1] == const0_rtx))
5084 if (hi[1] != const0_rtx)
5085 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
5086 NULL_RTX, 0, OPTAB_WIDEN);
5089 if (lo[1] != const0_rtx)
5090 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
5091 NULL_RTX, 0, OPTAB_WIDEN);
5093 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
5094 NULL_RTX, 0, OPTAB_WIDEN);
5096 ix86_compare_op0 = tmp;
5097 ix86_compare_op1 = const0_rtx;
5098 ix86_expand_branch (code, label);
5102 /* Otherwise, if we are doing less-than or greater-or-equal-than,
5103 op1 is a constant and the low word is zero, then we can just
5104 examine the high word. */
5106 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
5109 case LT: case LTU: case GE: case GEU:
5110 ix86_compare_op0 = hi[0];
5111 ix86_compare_op1 = hi[1];
5112 ix86_expand_branch (code, label);
5118 /* Otherwise, we need two or three jumps. */
5120 label2 = gen_label_rtx ();
5123 code2 = swap_condition (code);
5124 code3 = unsigned_condition (code);
5128 case LT: case GT: case LTU: case GTU:
5131 case LE: code1 = LT; code2 = GT; break;
5132 case GE: code1 = GT; code2 = LT; break;
5133 case LEU: code1 = LTU; code2 = GTU; break;
5134 case GEU: code1 = GTU; code2 = LTU; break;
5136 case EQ: code1 = NIL; code2 = NE; break;
5137 case NE: code2 = NIL; break;
5145 * if (hi(a) < hi(b)) goto true;
5146 * if (hi(a) > hi(b)) goto false;
5147 * if (lo(a) < lo(b)) goto true;
5151 ix86_compare_op0 = hi[0];
5152 ix86_compare_op1 = hi[1];
5155 ix86_expand_branch (code1, label);
5157 ix86_expand_branch (code2, label2);
5159 ix86_compare_op0 = lo[0];
5160 ix86_compare_op1 = lo[1];
5161 ix86_expand_branch (code3, label);
5164 emit_label (label2);
5174 ix86_expand_setcc (code, dest)
5181 if (GET_MODE (ix86_compare_op0) == DImode)
5182 return 0; /* FAIL */
5184 /* Three modes of generation:
5185 0 -- destination does not overlap compare sources:
5186 clear dest first, emit strict_low_part setcc.
5187 1 -- destination does overlap compare sources:
5188 emit subreg setcc, zero extend.
5189 2 -- destination is in QImode:
5195 if (GET_MODE (dest) == QImode)
5197 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5198 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5202 emit_move_insn (dest, const0_rtx);
5204 ret = ix86_expand_compare (code);
5205 PUT_MODE (ret, QImode);
5210 tmp = gen_lowpart (QImode, dest);
5211 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5215 if (!cse_not_expected)
5216 tmp = gen_reg_rtx (QImode);
5218 tmp = gen_lowpart (QImode, dest);
5221 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5227 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5228 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5229 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5230 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5234 return 1; /* DONE */
5238 ix86_expand_int_movcc (operands)
5241 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5242 rtx compare_seq, compare_op;
5244 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5245 In case comparsion is done with immediate, we can convert it to LTU or
5246 GEU by altering the integer. */
5248 if ((code == LEU || code == GTU)
5249 && GET_CODE (ix86_compare_op1) == CONST_INT
5250 && GET_MODE (operands[0]) != HImode
5251 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5252 && GET_CODE (operands[2]) == CONST_INT
5253 && GET_CODE (operands[3]) == CONST_INT)
5259 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5263 compare_op = ix86_expand_compare (code);
5264 compare_seq = gen_sequence ();
5267 compare_code = GET_CODE (compare_op);
5269 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5270 HImode insns, we'd be swallowed in word prefix ops. */
5272 if (GET_MODE (operands[0]) != HImode
5273 && GET_CODE (operands[2]) == CONST_INT
5274 && GET_CODE (operands[3]) == CONST_INT)
5276 rtx out = operands[0];
5277 HOST_WIDE_INT ct = INTVAL (operands[2]);
5278 HOST_WIDE_INT cf = INTVAL (operands[3]);
5281 if (compare_code == LTU || compare_code == GEU)
5284 /* Detect overlap between destination and compare sources. */
5287 /* To simplify rest of code, restrict to the GEU case. */
5288 if (compare_code == LTU)
5293 compare_code = reverse_condition (compare_code);
5294 code = reverse_condition (code);
5298 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5299 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5300 tmp = gen_reg_rtx (SImode);
5302 emit_insn (compare_seq);
5303 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5315 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5326 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5328 else if (diff == -1 && ct)
5338 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5340 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5347 * andl cf - ct, dest
5352 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5354 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5358 emit_move_insn (out, tmp);
5360 return 1; /* DONE */
5367 tmp = ct, ct = cf, cf = tmp;
5369 compare_code = reverse_condition (compare_code);
5370 code = reverse_condition (code);
5372 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5373 || diff == 3 || diff == 5 || diff == 9)
5379 * lea cf(dest*(ct-cf)),dest
5383 * This also catches the degenerate setcc-only case.
5389 out = emit_store_flag (out, code, ix86_compare_op0,
5390 ix86_compare_op1, VOIDmode, 0, 1);
5397 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5401 tmp = gen_rtx_PLUS (SImode, tmp, out);
5407 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5413 emit_move_insn (out, tmp);
5418 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5419 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5421 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5422 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5426 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5428 if (out != operands[0])
5429 emit_move_insn (operands[0], out);
5431 return 1; /* DONE */
5435 * General case: Jumpful:
5436 * xorl dest,dest cmpl op1, op2
5437 * cmpl op1, op2 movl ct, dest
5439 * decl dest movl cf, dest
5440 * andl (cf-ct),dest 1:
5445 * This is reasonably steep, but branch mispredict costs are
5446 * high on modern cpus, so consider failing only if optimizing
5449 * %%% Parameterize branch_cost on the tuning architecture, then
5450 * use that. The 80386 couldn't care less about mispredicts.
5453 if (!optimize_size && !TARGET_CMOVE)
5459 compare_code = reverse_condition (compare_code);
5460 code = reverse_condition (code);
5463 out = emit_store_flag (out, code, ix86_compare_op0,
5464 ix86_compare_op1, VOIDmode, 0, 1);
5466 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5467 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5469 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5470 if (out != operands[0])
5471 emit_move_insn (operands[0], out);
5473 return 1; /* DONE */
5479 /* Try a few things more with specific constants and a variable. */
5482 rtx var, orig_out, out, tmp;
5485 return 0; /* FAIL */
5487 /* If one of the two operands is an interesting constant, load a
5488 constant with the above and mask it in with a logical operation. */
5490 if (GET_CODE (operands[2]) == CONST_INT)
5493 if (INTVAL (operands[2]) == 0)
5494 operands[3] = constm1_rtx, op = and_optab;
5495 else if (INTVAL (operands[2]) == -1)
5496 operands[3] = const0_rtx, op = ior_optab;
5498 return 0; /* FAIL */
5500 else if (GET_CODE (operands[3]) == CONST_INT)
5503 if (INTVAL (operands[3]) == 0)
5504 operands[2] = constm1_rtx, op = and_optab;
5505 else if (INTVAL (operands[3]) == -1)
5506 operands[2] = const0_rtx, op = ior_optab;
5508 return 0; /* FAIL */
5511 return 0; /* FAIL */
5513 orig_out = operands[0];
5514 tmp = gen_reg_rtx (GET_MODE (orig_out));
5517 /* Recurse to get the constant loaded. */
5518 if (ix86_expand_int_movcc (operands) == 0)
5519 return 0; /* FAIL */
5521 /* Mask in the interesting variable. */
5522 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5524 if (out != orig_out)
5525 emit_move_insn (orig_out, out);
5527 return 1; /* DONE */
5531 * For comparison with above,
5541 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5542 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5543 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5544 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5546 emit_insn (compare_seq);
5547 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5548 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5549 compare_op, operands[2],
5552 return 1; /* DONE */
5556 ix86_expand_fp_movcc (operands)
5560 enum machine_mode mode;
5563 /* The floating point conditional move instructions don't directly
5564 support conditions resulting from a signed integer comparison. */
5566 code = GET_CODE (operands[1]);
5573 tmp = gen_reg_rtx (QImode);
5574 ix86_expand_setcc (code, tmp);
5576 ix86_compare_op0 = tmp;
5577 ix86_compare_op1 = const0_rtx;
5584 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5585 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5586 gen_rtx_COMPARE (mode,
5588 ix86_compare_op1)));
5589 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5590 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5591 gen_rtx_fmt_ee (code, VOIDmode,
5592 gen_rtx_REG (mode, FLAGS_REG),
5600 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5601 works for floating pointer parameters and nonoffsetable memories.
5602 For pushes, it returns just stack offsets; the values will be saved
5603 in the right order. Maximally three parts are generated. */
5606 ix86_split_to_parts (operand, parts, mode)
5609 enum machine_mode mode;
5611 int size = GET_MODE_SIZE (mode) / 4;
5613 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5615 if (size < 2 || size > 3)
5618 /* Optimize constant pool reference to immediates. This is used by fp moves,
5619 that force all constants to memory to allow combining. */
5621 if (GET_CODE (operand) == MEM
5622 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5623 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5624 operand = get_pool_constant (XEXP (operand, 0));
5626 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5628 /* The only non-offsetable memories we handle are pushes. */
5629 if (! push_operand (operand, VOIDmode))
5632 PUT_MODE (operand, SImode);
5633 parts[0] = parts[1] = parts[2] = operand;
5638 split_di (&operand, 1, &parts[0], &parts[1]);
5641 if (REG_P (operand))
5643 if (!reload_completed)
5645 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5646 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5648 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5650 else if (offsettable_memref_p (operand))
5652 PUT_MODE (operand, SImode);
5654 parts[1] = adj_offsettable_operand (operand, 4);
5656 parts[2] = adj_offsettable_operand (operand, 8);
5658 else if (GET_CODE (operand) == CONST_DOUBLE)
5663 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5667 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5668 parts[2] = GEN_INT (l[2]);
5671 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5676 parts[1] = GEN_INT (l[1]);
5677 parts[0] = GEN_INT (l[0]);
5687 /* Emit insns to perform a move or push of DI, DF, and XF values.
5688 Return false when normal moves are needed; true when all required
5689 insns have been emitted. Operands 2-4 contain the input values
5690 int the correct order; operands 5-7 contain the output values. */
5693 ix86_split_long_move (operands1)
5698 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5702 /* Make our own copy to avoid clobbering the operands. */
5703 operands[0] = copy_rtx (operands1[0]);
5704 operands[1] = copy_rtx (operands1[1]);
5706 if (size < 2 || size > 3)
5709 /* The only non-offsettable memory we handle is push. */
5710 if (push_operand (operands[0], VOIDmode))
5712 else if (GET_CODE (operands[0]) == MEM
5713 && ! offsettable_memref_p (operands[0]))
5716 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5717 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5719 /* When emitting push, take care for source operands on the stack. */
5720 if (push && GET_CODE (operands[1]) == MEM
5721 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5724 part[1][1] = part[1][2];
5725 part[1][0] = part[1][1];
5728 /* We need to do copy in the right order in case an address register
5729 of the source overlaps the destination. */
5730 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5732 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5734 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5737 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5740 /* Collision in the middle part can be handled by reordering. */
5741 if (collisions == 1 && size == 3
5742 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5745 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5746 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5749 /* If there are more collisions, we can't handle it by reordering.
5750 Do an lea to the last part and use only one colliding move. */
5751 else if (collisions > 1)
5754 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5755 XEXP (part[1][0], 0)));
5756 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5757 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5759 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5766 emit_insn (gen_push (part[1][2]));
5767 emit_insn (gen_push (part[1][1]));
5768 emit_insn (gen_push (part[1][0]));
5772 /* Choose correct order to not overwrite the source before it is copied. */
5773 if ((REG_P (part[0][0])
5774 && REG_P (part[1][1])
5775 && (REGNO (part[0][0]) == REGNO (part[1][1])
5777 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5779 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5783 operands1[2] = part[0][2];
5784 operands1[3] = part[0][1];
5785 operands1[4] = part[0][0];
5786 operands1[5] = part[1][2];
5787 operands1[6] = part[1][1];
5788 operands1[7] = part[1][0];
5792 operands1[2] = part[0][1];
5793 operands1[3] = part[0][0];
5794 operands1[5] = part[1][1];
5795 operands1[6] = part[1][0];
5802 operands1[2] = part[0][0];
5803 operands1[3] = part[0][1];
5804 operands1[4] = part[0][2];
5805 operands1[5] = part[1][0];
5806 operands1[6] = part[1][1];
5807 operands1[7] = part[1][2];
5811 operands1[2] = part[0][0];
5812 operands1[3] = part[0][1];
5813 operands1[5] = part[1][0];
5814 operands1[6] = part[1][1];
5822 ix86_split_ashldi (operands, scratch)
5823 rtx *operands, scratch;
5825 rtx low[2], high[2];
5828 if (GET_CODE (operands[2]) == CONST_INT)
5830 split_di (operands, 2, low, high);
5831 count = INTVAL (operands[2]) & 63;
5835 emit_move_insn (high[0], low[1]);
5836 emit_move_insn (low[0], const0_rtx);
5839 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5843 if (!rtx_equal_p (operands[0], operands[1]))
5844 emit_move_insn (operands[0], operands[1]);
5845 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5846 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5851 if (!rtx_equal_p (operands[0], operands[1]))
5852 emit_move_insn (operands[0], operands[1]);
5854 split_di (operands, 1, low, high);
5856 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5857 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5859 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5861 if (! no_new_pseudos)
5862 scratch = force_reg (SImode, const0_rtx);
5864 emit_move_insn (scratch, const0_rtx);
5866 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5870 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5875 ix86_split_ashrdi (operands, scratch)
5876 rtx *operands, scratch;
5878 rtx low[2], high[2];
5881 if (GET_CODE (operands[2]) == CONST_INT)
5883 split_di (operands, 2, low, high);
5884 count = INTVAL (operands[2]) & 63;
5888 emit_move_insn (low[0], high[1]);
5890 if (! reload_completed)
5891 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5894 emit_move_insn (high[0], low[0]);
5895 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5899 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5903 if (!rtx_equal_p (operands[0], operands[1]))
5904 emit_move_insn (operands[0], operands[1]);
5905 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5906 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5911 if (!rtx_equal_p (operands[0], operands[1]))
5912 emit_move_insn (operands[0], operands[1]);
5914 split_di (operands, 1, low, high);
5916 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5917 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5919 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5921 if (! no_new_pseudos)
5922 scratch = gen_reg_rtx (SImode);
5923 emit_move_insn (scratch, high[0]);
5924 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5925 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5929 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5934 ix86_split_lshrdi (operands, scratch)
5935 rtx *operands, scratch;
5937 rtx low[2], high[2];
5940 if (GET_CODE (operands[2]) == CONST_INT)
5942 split_di (operands, 2, low, high);
5943 count = INTVAL (operands[2]) & 63;
5947 emit_move_insn (low[0], high[1]);
5948 emit_move_insn (high[0], const0_rtx);
5951 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5955 if (!rtx_equal_p (operands[0], operands[1]))
5956 emit_move_insn (operands[0], operands[1]);
5957 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5958 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5963 if (!rtx_equal_p (operands[0], operands[1]))
5964 emit_move_insn (operands[0], operands[1]);
5966 split_di (operands, 1, low, high);
5968 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5969 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5971 /* Heh. By reversing the arguments, we can reuse this pattern. */
5972 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5974 if (! no_new_pseudos)
5975 scratch = force_reg (SImode, const0_rtx);
5977 emit_move_insn (scratch, const0_rtx);
5979 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5983 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5987 /* Expand the appropriate insns for doing strlen if not just doing
5990 out = result, initialized with the start address
5991 align_rtx = alignment of the address.
5992 scratch = scratch register, initialized with the startaddress when
5993 not aligned, otherwise undefined
5995 This is just the body. It needs the initialisations mentioned above and
5996 some address computing at the end. These things are done in i386.md. */
5999 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
6000 rtx out, align_rtx, scratch;
6004 rtx align_2_label = NULL_RTX;
6005 rtx align_3_label = NULL_RTX;
6006 rtx align_4_label = gen_label_rtx ();
6007 rtx end_0_label = gen_label_rtx ();
6009 rtx tmpreg = gen_reg_rtx (SImode);
6012 if (GET_CODE (align_rtx) == CONST_INT)
6013 align = INTVAL (align_rtx);
6015 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
6017 /* Is there a known alignment and is it less than 4? */
6020 /* Is there a known alignment and is it not 2? */
6023 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
6024 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
6026 /* Leave just the 3 lower bits. */
6027 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
6028 NULL_RTX, 0, OPTAB_WIDEN);
6030 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6031 SImode, 1, 0, align_4_label);
6032 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
6033 SImode, 1, 0, align_2_label);
6034 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
6035 SImode, 1, 0, align_3_label);
6039 /* Since the alignment is 2, we have to check 2 or 0 bytes;
6040 check if is aligned to 4 - byte. */
6042 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
6043 NULL_RTX, 0, OPTAB_WIDEN);
6045 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
6046 SImode, 1, 0, align_4_label);
6049 mem = gen_rtx_MEM (QImode, out);
6051 /* Now compare the bytes. */
6053 /* Compare the first n unaligned byte on a byte per byte basis. */
6054 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6055 QImode, 1, 0, end_0_label);
6057 /* Increment the address. */
6058 emit_insn (gen_addsi3 (out, out, const1_rtx));
6060 /* Not needed with an alignment of 2 */
6063 emit_label (align_2_label);
6065 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6066 QImode, 1, 0, end_0_label);
6068 emit_insn (gen_addsi3 (out, out, const1_rtx));
6070 emit_label (align_3_label);
6073 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
6074 QImode, 1, 0, end_0_label);
6076 emit_insn (gen_addsi3 (out, out, const1_rtx));
6079 /* Generate loop to check 4 bytes at a time. It is not a good idea to
6080 align this loop. It gives only huge programs, but does not help to
6082 emit_label (align_4_label);
6084 mem = gen_rtx_MEM (SImode, out);
6085 emit_move_insn (scratch, mem);
6086 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
6088 /* This formula yields a nonzero result iff one of the bytes is zero.
6089 This saves three branches inside loop and many cycles. */
6091 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
6092 emit_insn (gen_one_cmplsi2 (scratch, scratch));
6093 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
6094 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
6095 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
6096 SImode, 1, 0, align_4_label);
6100 rtx reg = gen_reg_rtx (SImode);
6101 emit_move_insn (reg, tmpreg);
6102 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6104 /* If zero is not in the first two bytes, move two bytes forward. */
6105 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6106 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6107 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6108 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6109 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6112 /* Emit lea manually to avoid clobbering of flags. */
6113 emit_insn (gen_rtx_SET (SImode, reg,
6114 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6116 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6117 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6118 emit_insn (gen_rtx_SET (VOIDmode, out,
6119 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6126 rtx end_2_label = gen_label_rtx ();
6127 /* Is zero in the first two bytes? */
6129 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6130 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6131 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6132 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6133 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6135 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6136 JUMP_LABEL (tmp) = end_2_label;
6138 /* Not in the first two. Move two bytes forward. */
6139 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6140 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6142 emit_label (end_2_label);
6146 /* Avoid branch in fixing the byte. */
6147 tmpreg = gen_lowpart (QImode, tmpreg);
6148 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6149 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6151 emit_label (end_0_label);
6154 /* Clear stack slot assignments remembered from previous functions.
6155 This is called from INIT_EXPANDERS once before RTL is emitted for each
6159 ix86_init_machine_status (p)
6162 enum machine_mode mode;
6165 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6167 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6168 mode = (enum machine_mode) ((int) mode + 1))
6169 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6170 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6173 /* Mark machine specific bits of P for GC. */
6175 ix86_mark_machine_status (p)
6178 enum machine_mode mode;
6181 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6182 mode = (enum machine_mode) ((int) mode + 1))
6183 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6184 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6187 /* Return a MEM corresponding to a stack slot with mode MODE.
6188 Allocate a new slot if necessary.
6190 The RTL for a function can have several slots available: N is
6191 which slot to use. */
6194 assign_386_stack_local (mode, n)
6195 enum machine_mode mode;
6198 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6201 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6202 ix86_stack_locals[(int) mode][n]
6203 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6205 return ix86_stack_locals[(int) mode][n];
6208 /* Calculate the length of the memory address in the instruction
6209 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6212 memory_address_length (addr)
6215 struct ix86_address parts;
6216 rtx base, index, disp;
6219 if (GET_CODE (addr) == PRE_DEC
6220 || GET_CODE (addr) == POST_INC)
6223 if (! ix86_decompose_address (addr, &parts))
6227 index = parts.index;
6231 /* Register Indirect. */
6232 if (base && !index && !disp)
6234 /* Special cases: ebp and esp need the two-byte modrm form. */
6235 if (addr == stack_pointer_rtx
6236 || addr == arg_pointer_rtx
6237 || addr == frame_pointer_rtx
6238 || addr == hard_frame_pointer_rtx)
6242 /* Direct Addressing. */
6243 else if (disp && !base && !index)
6248 /* Find the length of the displacement constant. */
6251 if (GET_CODE (disp) == CONST_INT
6252 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6258 /* An index requires the two-byte modrm form. */
6266 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6267 expect that insn have 8bit immediate alternative. */
6269 ix86_attr_length_immediate_default (insn, shortform)
6275 extract_insn_cached (insn);
6276 for (i = recog_data.n_operands - 1; i >= 0; --i)
6277 if (CONSTANT_P (recog_data.operand[i]))
6282 && GET_CODE (recog_data.operand[i]) == CONST_INT
6283 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6287 switch (get_attr_mode (insn))
6299 fatal_insn ("Unknown insn mode", insn);
6305 /* Compute default value for "length_address" attribute. */
6307 ix86_attr_length_address_default (insn)
6311 extract_insn_cached (insn);
6312 for (i = recog_data.n_operands - 1; i >= 0; --i)
6313 if (GET_CODE (recog_data.operand[i]) == MEM)
6315 return memory_address_length (XEXP (recog_data.operand[i], 0));
6321 /* Return the maximum number of instructions a cpu can issue. */
6328 case PROCESSOR_PENTIUM:
6332 case PROCESSOR_PENTIUMPRO:
6340 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6341 by DEP_INSN and nothing set by DEP_INSN. */
6344 ix86_flags_dependant (insn, dep_insn, insn_type)
6346 enum attr_type insn_type;
6350 /* Simplify the test for uninteresting insns. */
6351 if (insn_type != TYPE_SETCC
6352 && insn_type != TYPE_ICMOV
6353 && insn_type != TYPE_FCMOV
6354 && insn_type != TYPE_IBR)
6357 if ((set = single_set (dep_insn)) != 0)
6359 set = SET_DEST (set);
6362 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6363 && XVECLEN (PATTERN (dep_insn), 0) == 2
6364 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6365 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6367 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6368 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6373 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6376 /* This test is true if the dependant insn reads the flags but
6377 not any other potentially set register. */
6378 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6381 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6387 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6388 address with operands set by DEP_INSN. */
6391 ix86_agi_dependant (insn, dep_insn, insn_type)
6393 enum attr_type insn_type;
6397 if (insn_type == TYPE_LEA)
6399 addr = PATTERN (insn);
6400 if (GET_CODE (addr) == SET)
6402 else if (GET_CODE (addr) == PARALLEL
6403 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6404 addr = XVECEXP (addr, 0, 0);
6407 addr = SET_SRC (addr);
6412 extract_insn_cached (insn);
6413 for (i = recog_data.n_operands - 1; i >= 0; --i)
6414 if (GET_CODE (recog_data.operand[i]) == MEM)
6416 addr = XEXP (recog_data.operand[i], 0);
6423 return modified_in_p (addr, dep_insn);
6427 ix86_adjust_cost (insn, link, dep_insn, cost)
6428 rtx insn, link, dep_insn;
6431 enum attr_type insn_type, dep_insn_type;
6432 enum attr_memory memory;
6434 int dep_insn_code_number;
6436 /* Anti and output depenancies have zero cost on all CPUs. */
6437 if (REG_NOTE_KIND (link) != 0)
6440 dep_insn_code_number = recog_memoized (dep_insn);
6442 /* If we can't recognize the insns, we can't really do anything. */
6443 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6446 insn_type = get_attr_type (insn);
6447 dep_insn_type = get_attr_type (dep_insn);
6449 /* Prologue and epilogue allocators can have a false dependency on ebp.
6450 This results in one cycle extra stall on Pentium prologue scheduling,
6451 so handle this important case manually. */
6452 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6453 && dep_insn_type == TYPE_ALU
6454 && !reg_mentioned_p (stack_pointer_rtx, insn))
6459 case PROCESSOR_PENTIUM:
6460 /* Address Generation Interlock adds a cycle of latency. */
6461 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6464 /* ??? Compares pair with jump/setcc. */
6465 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6468 /* Floating point stores require value to be ready one cycle ealier. */
6469 if (insn_type == TYPE_FMOV
6470 && get_attr_memory (insn) == MEMORY_STORE
6471 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6475 case PROCESSOR_PENTIUMPRO:
6476 /* Since we can't represent delayed latencies of load+operation,
6477 increase the cost here for non-imov insns. */
6478 if (dep_insn_type != TYPE_IMOV
6479 && dep_insn_type != TYPE_FMOV
6480 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6481 || memory == MEMORY_BOTH))
6484 /* INT->FP conversion is expensive. */
6485 if (get_attr_fp_int_src (dep_insn))
6488 /* There is one cycle extra latency between an FP op and a store. */
6489 if (insn_type == TYPE_FMOV
6490 && (set = single_set (dep_insn)) != NULL_RTX
6491 && (set2 = single_set (insn)) != NULL_RTX
6492 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6493 && GET_CODE (SET_DEST (set2)) == MEM)
6498 /* The esp dependency is resolved before the instruction is really
6500 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6501 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6504 /* Since we can't represent delayed latencies of load+operation,
6505 increase the cost here for non-imov insns. */
6506 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6507 || memory == MEMORY_BOTH)
6508 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6510 /* INT->FP conversion is expensive. */
6511 if (get_attr_fp_int_src (dep_insn))
6515 case PROCESSOR_ATHLON:
6516 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6517 || memory == MEMORY_BOTH)
6519 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6534 struct ppro_sched_data
6537 int issued_this_cycle;
6542 ix86_safe_length (insn)
6545 if (recog_memoized (insn) >= 0)
6546 return get_attr_length(insn);
6552 ix86_safe_length_prefix (insn)
6555 if (recog_memoized (insn) >= 0)
6556 return get_attr_length(insn);
6561 static enum attr_memory
6562 ix86_safe_memory (insn)
6565 if (recog_memoized (insn) >= 0)
6566 return get_attr_memory(insn);
6568 return MEMORY_UNKNOWN;
6571 static enum attr_pent_pair
6572 ix86_safe_pent_pair (insn)
6575 if (recog_memoized (insn) >= 0)
6576 return get_attr_pent_pair(insn);
6578 return PENT_PAIR_NP;
6581 static enum attr_ppro_uops
6582 ix86_safe_ppro_uops (insn)
6585 if (recog_memoized (insn) >= 0)
6586 return get_attr_ppro_uops (insn);
6588 return PPRO_UOPS_MANY;
6592 ix86_dump_ppro_packet (dump)
6595 if (ix86_sched_data.ppro.decode[0])
6597 fprintf (dump, "PPRO packet: %d",
6598 INSN_UID (ix86_sched_data.ppro.decode[0]));
6599 if (ix86_sched_data.ppro.decode[1])
6600 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6601 if (ix86_sched_data.ppro.decode[2])
6602 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6607 /* We're beginning a new block. Initialize data structures as necessary. */
6610 ix86_sched_init (dump, sched_verbose)
6611 FILE *dump ATTRIBUTE_UNUSED;
6612 int sched_verbose ATTRIBUTE_UNUSED;
6614 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6617 /* Shift INSN to SLOT, and shift everything else down. */
6620 ix86_reorder_insn (insnp, slot)
6627 insnp[0] = insnp[1];
6628 while (++insnp != slot);
6633 /* Find an instruction with given pairability and minimal amount of cycles
6634 lost by the fact that the CPU waits for both pipelines to finish before
6635 reading next instructions. Also take care that both instructions together
6636 can not exceed 7 bytes. */
6639 ix86_pent_find_pair (e_ready, ready, type, first)
6642 enum attr_pent_pair type;
6645 int mincycles, cycles;
6646 enum attr_pent_pair tmp;
6647 enum attr_memory memory;
6648 rtx *insnp, *bestinsnp = NULL;
6650 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6653 memory = ix86_safe_memory (first);
6654 cycles = result_ready_cost (first);
6655 mincycles = INT_MAX;
6657 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6658 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6659 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6661 enum attr_memory second_memory;
6662 int secondcycles, currentcycles;
6664 second_memory = ix86_safe_memory (*insnp);
6665 secondcycles = result_ready_cost (*insnp);
6666 currentcycles = abs (cycles - secondcycles);
6668 if (secondcycles >= 1 && cycles >= 1)
6670 /* Two read/modify/write instructions together takes two
6672 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6675 /* Read modify/write instruction followed by read/modify
6676 takes one cycle longer. */
6677 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6678 && tmp != PENT_PAIR_UV
6679 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6682 if (currentcycles < mincycles)
6683 bestinsnp = insnp, mincycles = currentcycles;
6689 /* Subroutines of ix86_sched_reorder. */
6692 ix86_sched_reorder_pentium (ready, e_ready)
6696 enum attr_pent_pair pair1, pair2;
6699 /* This wouldn't be necessary if Haifa knew that static insn ordering
6700 is important to which pipe an insn is issued to. So we have to make
6701 some minor rearrangements. */
6703 pair1 = ix86_safe_pent_pair (*e_ready);
6705 /* If the first insn is non-pairable, let it be. */
6706 if (pair1 == PENT_PAIR_NP)
6709 pair2 = PENT_PAIR_NP;
6712 /* If the first insn is UV or PV pairable, search for a PU
6714 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6716 insnp = ix86_pent_find_pair (e_ready-1, ready,
6717 PENT_PAIR_PU, *e_ready);
6719 pair2 = PENT_PAIR_PU;
6722 /* If the first insn is PU or UV pairable, search for a PV
6724 if (pair2 == PENT_PAIR_NP
6725 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6727 insnp = ix86_pent_find_pair (e_ready-1, ready,
6728 PENT_PAIR_PV, *e_ready);
6730 pair2 = PENT_PAIR_PV;
6733 /* If the first insn is pairable, search for a UV
6735 if (pair2 == PENT_PAIR_NP)
6737 insnp = ix86_pent_find_pair (e_ready-1, ready,
6738 PENT_PAIR_UV, *e_ready);
6740 pair2 = PENT_PAIR_UV;
6743 if (pair2 == PENT_PAIR_NP)
6746 /* Found something! Decide if we need to swap the order. */
6747 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6748 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6749 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6750 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6751 ix86_reorder_insn (insnp, e_ready);
6753 ix86_reorder_insn (insnp, e_ready - 1);
6757 ix86_sched_reorder_ppro (ready, e_ready)
6762 enum attr_ppro_uops cur_uops;
6763 int issued_this_cycle;
6767 /* At this point .ppro.decode contains the state of the three
6768 decoders from last "cycle". That is, those insns that were
6769 actually independent. But here we're scheduling for the
6770 decoder, and we may find things that are decodable in the
6773 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
6774 issued_this_cycle = 0;
6777 cur_uops = ix86_safe_ppro_uops (*insnp);
6779 /* If the decoders are empty, and we've a complex insn at the
6780 head of the priority queue, let it issue without complaint. */
6781 if (decode[0] == NULL)
6783 if (cur_uops == PPRO_UOPS_MANY)
6789 /* Otherwise, search for a 2-4 uop unsn to issue. */
6790 while (cur_uops != PPRO_UOPS_FEW)
6794 cur_uops = ix86_safe_ppro_uops (*--insnp);
6797 /* If so, move it to the head of the line. */
6798 if (cur_uops == PPRO_UOPS_FEW)
6799 ix86_reorder_insn (insnp, e_ready);
6801 /* Issue the head of the queue. */
6802 issued_this_cycle = 1;
6803 decode[0] = *e_ready--;
6806 /* Look for simple insns to fill in the other two slots. */
6807 for (i = 1; i < 3; ++i)
6808 if (decode[i] == NULL)
6810 if (ready >= e_ready)
6814 cur_uops = ix86_safe_ppro_uops (*insnp);
6815 while (cur_uops != PPRO_UOPS_ONE)
6819 cur_uops = ix86_safe_ppro_uops (*--insnp);
6822 /* Found one. Move it to the head of the queue and issue it. */
6823 if (cur_uops == PPRO_UOPS_ONE)
6825 ix86_reorder_insn (insnp, e_ready);
6826 decode[i] = *e_ready--;
6827 issued_this_cycle++;
6831 /* ??? Didn't find one. Ideally, here we would do a lazy split
6832 of 2-uop insns, issue one and queue the other. */
6836 if (issued_this_cycle == 0)
6837 issued_this_cycle = 1;
6838 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6841 /* We are about to being issuing insns for this clock cycle.
6842 Override the default sort algorithm to better slot instructions. */
6844 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6845 FILE *dump ATTRIBUTE_UNUSED;
6846 int sched_verbose ATTRIBUTE_UNUSED;
6849 int clock_var ATTRIBUTE_UNUSED;
6851 rtx *e_ready = ready + n_ready - 1;
6861 case PROCESSOR_PENTIUM:
6862 ix86_sched_reorder_pentium (ready, e_ready);
6865 case PROCESSOR_PENTIUMPRO:
6866 ix86_sched_reorder_ppro (ready, e_ready);
6871 return ix86_issue_rate ();
6874 /* We are about to issue INSN. Return the number of insns left on the
6875 ready queue that can be issued this cycle. */
6878 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6888 return can_issue_more - 1;
6890 case PROCESSOR_PENTIUMPRO:
6892 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6894 if (uops == PPRO_UOPS_MANY)
6897 ix86_dump_ppro_packet (dump);
6898 ix86_sched_data.ppro.decode[0] = insn;
6899 ix86_sched_data.ppro.decode[1] = NULL;
6900 ix86_sched_data.ppro.decode[2] = NULL;
6902 ix86_dump_ppro_packet (dump);
6903 ix86_sched_data.ppro.decode[0] = NULL;
6905 else if (uops == PPRO_UOPS_FEW)
6908 ix86_dump_ppro_packet (dump);
6909 ix86_sched_data.ppro.decode[0] = insn;
6910 ix86_sched_data.ppro.decode[1] = NULL;
6911 ix86_sched_data.ppro.decode[2] = NULL;
6915 for (i = 0; i < 3; ++i)
6916 if (ix86_sched_data.ppro.decode[i] == NULL)
6918 ix86_sched_data.ppro.decode[i] = insn;
6926 ix86_dump_ppro_packet (dump);
6927 ix86_sched_data.ppro.decode[0] = NULL;
6928 ix86_sched_data.ppro.decode[1] = NULL;
6929 ix86_sched_data.ppro.decode[2] = NULL;
6933 return --ix86_sched_data.ppro.issued_this_cycle;
6937 /* Compute the alignment given to a constant that is being placed in memory.
6938 EXP is the constant and ALIGN is the alignment that the object would
6940 The value of this function is used instead of that alignment to align
6944 ix86_constant_alignment (exp, align)
6948 if (TREE_CODE (exp) == REAL_CST)
6950 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6952 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
6955 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
6962 /* Compute the alignment for a static variable.
6963 TYPE is the data type, and ALIGN is the alignment that
6964 the object would ordinarily have. The value of this function is used
6965 instead of that alignment to align the object. */
6968 ix86_data_alignment (type, align)
6972 if (AGGREGATE_TYPE_P (type)
6974 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6975 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
6976 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
6979 if (TREE_CODE (type) == ARRAY_TYPE)
6981 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6983 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6986 else if (TREE_CODE (type) == COMPLEX_TYPE)
6989 if (TYPE_MODE (type) == DCmode && align < 64)
6991 if (TYPE_MODE (type) == XCmode && align < 128)
6994 else if ((TREE_CODE (type) == RECORD_TYPE
6995 || TREE_CODE (type) == UNION_TYPE
6996 || TREE_CODE (type) == QUAL_UNION_TYPE)
6997 && TYPE_FIELDS (type))
6999 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7001 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7004 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7005 || TREE_CODE (type) == INTEGER_TYPE)
7007 if (TYPE_MODE (type) == DFmode && align < 64)
7009 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7016 /* Compute the alignment for a local variable.
7017 TYPE is the data type, and ALIGN is the alignment that
7018 the object would ordinarily have. The value of this macro is used
7019 instead of that alignment to align the object. */
7022 ix86_local_alignment (type, align)
7026 if (TREE_CODE (type) == ARRAY_TYPE)
7028 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
7030 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
7033 else if (TREE_CODE (type) == COMPLEX_TYPE)
7035 if (TYPE_MODE (type) == DCmode && align < 64)
7037 if (TYPE_MODE (type) == XCmode && align < 128)
7040 else if ((TREE_CODE (type) == RECORD_TYPE
7041 || TREE_CODE (type) == UNION_TYPE
7042 || TREE_CODE (type) == QUAL_UNION_TYPE)
7043 && TYPE_FIELDS (type))
7045 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
7047 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
7050 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
7051 || TREE_CODE (type) == INTEGER_TYPE)
7054 if (TYPE_MODE (type) == DFmode && align < 64)
7056 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
7062 #define def_builtin(NAME, TYPE, CODE) \
7063 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
7064 struct builtin_description
7066 enum insn_code icode;
7068 enum ix86_builtins code;
7069 enum rtx_code comparison;
7073 static struct builtin_description bdesc_comi[] =
7075 { CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
7076 { CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
7077 { CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
7078 { CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
7079 { CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
7080 { CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
7081 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
7082 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
7083 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
7084 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
7085 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
7086 { CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
7089 static struct builtin_description bdesc_2arg[] =
7092 { CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
7093 { CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
7094 { CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
7095 { CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
7096 { CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
7097 { CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
7098 { CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
7099 { CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
7101 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
7102 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
7103 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
7104 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
7105 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
7106 { CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
7107 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
7108 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
7109 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
7110 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
7111 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
7112 { CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
7113 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
7114 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
7115 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
7116 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
7117 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
7118 { CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
7119 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
7120 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
7121 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
7122 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
7123 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
7124 { CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
7126 { CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
7127 { CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
7128 { CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
7129 { CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
7131 { CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
7132 { CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
7133 { CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
7134 { CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
7136 { CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
7137 { CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
7138 { CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
7139 { CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
7140 { CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
7143 { CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
7144 { CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
7145 { CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
7146 { CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
7147 { CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
7148 { CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
7150 { CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
7151 { CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
7152 { CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
7153 { CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
7154 { CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
7155 { CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
7156 { CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
7157 { CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
7159 { CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
7160 { CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
7161 { CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
7163 { CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
7164 { CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
7165 { CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
7166 { CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
7168 { CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
7169 { CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
7171 { CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
7172 { CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
7173 { CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
7174 { CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
7175 { CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
7176 { CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
7178 { CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
7179 { CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
7180 { CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
7181 { CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
7183 { CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
7184 { CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
7185 { CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
7186 { CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
7187 { CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
7188 { CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
7191 { CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
7192 { CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
7193 { CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
7195 { CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
7196 { CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
7198 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
7199 { CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
7200 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
7201 { CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
7202 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
7203 { CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
7205 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
7206 { CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
7207 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
7208 { CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
7209 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
7210 { CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
7212 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
7213 { CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
7214 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
7215 { CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
7217 { CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
7218 { CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
7222 static struct builtin_description bdesc_1arg[] =
7224 { CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
7225 { CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
7227 { CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
7228 { CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
7229 { CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
7231 { CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
7232 { CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
7233 { CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
7234 { CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
7238 /* Expand all the target specific builtins. This is not called if TARGET_MMX
7239 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
7242 ix86_init_builtins ()
7244 struct builtin_description * d;
7246 tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE);
7248 tree pchar_type_node = build_pointer_type (char_type_node);
7249 tree pfloat_type_node = build_pointer_type (float_type_node);
7250 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
7251 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
7254 tree int_ftype_v4sf_v4sf
7255 = build_function_type (integer_type_node,
7256 tree_cons (NULL_TREE, V4SF_type_node,
7257 tree_cons (NULL_TREE,
7260 tree v4si_ftype_v4sf_v4sf
7261 = build_function_type (V4SI_type_node,
7262 tree_cons (NULL_TREE, V4SF_type_node,
7263 tree_cons (NULL_TREE,
7266 /* MMX/SSE/integer conversions. */
7267 tree int_ftype_v4sf_int
7268 = build_function_type (integer_type_node,
7269 tree_cons (NULL_TREE, V4SF_type_node,
7270 tree_cons (NULL_TREE,
7274 = build_function_type (integer_type_node,
7275 tree_cons (NULL_TREE, V4SF_type_node,
7278 = build_function_type (integer_type_node,
7279 tree_cons (NULL_TREE, V8QI_type_node,
7282 = build_function_type (integer_type_node,
7283 tree_cons (NULL_TREE, V2SI_type_node,
7286 = build_function_type (V2SI_type_node,
7287 tree_cons (NULL_TREE, integer_type_node,
7289 tree v4sf_ftype_v4sf_int
7290 = build_function_type (integer_type_node,
7291 tree_cons (NULL_TREE, V4SF_type_node,
7292 tree_cons (NULL_TREE, integer_type_node,
7294 tree v4sf_ftype_v4sf_v2si
7295 = build_function_type (V4SF_type_node,
7296 tree_cons (NULL_TREE, V4SF_type_node,
7297 tree_cons (NULL_TREE, V2SI_type_node,
7299 tree int_ftype_v4hi_int
7300 = build_function_type (integer_type_node,
7301 tree_cons (NULL_TREE, V4HI_type_node,
7302 tree_cons (NULL_TREE, integer_type_node,
7304 tree v4hi_ftype_v4hi_int_int
7305 = build_function_type (integer_type_node,
7306 tree_cons (NULL_TREE, V4HI_type_node,
7307 tree_cons (NULL_TREE, integer_type_node,
7308 tree_cons (NULL_TREE,
7311 /* Miscellaneous. */
7312 tree v8qi_ftype_v4hi_v4hi
7313 = build_function_type (V8QI_type_node,
7314 tree_cons (NULL_TREE, V4HI_type_node,
7315 tree_cons (NULL_TREE, V4HI_type_node,
7317 tree v4hi_ftype_v2si_v2si
7318 = build_function_type (V4HI_type_node,
7319 tree_cons (NULL_TREE, V2SI_type_node,
7320 tree_cons (NULL_TREE, V2SI_type_node,
7322 tree v4sf_ftype_v4sf_v4sf_int
7323 = build_function_type (V4SF_type_node,
7324 tree_cons (NULL_TREE, V4SF_type_node,
7325 tree_cons (NULL_TREE, V4SF_type_node,
7326 tree_cons (NULL_TREE,
7329 tree v4hi_ftype_v8qi_v8qi
7330 = build_function_type (V4HI_type_node,
7331 tree_cons (NULL_TREE, V8QI_type_node,
7332 tree_cons (NULL_TREE, V8QI_type_node,
7334 tree v2si_ftype_v4hi_v4hi
7335 = build_function_type (V2SI_type_node,
7336 tree_cons (NULL_TREE, V4HI_type_node,
7337 tree_cons (NULL_TREE, V4HI_type_node,
7339 tree v4hi_ftype_v4hi_int
7340 = build_function_type (V4HI_type_node,
7341 tree_cons (NULL_TREE, V4HI_type_node,
7342 tree_cons (NULL_TREE, integer_type_node,
7344 tree di_ftype_di_int
7345 = build_function_type (long_long_unsigned_type_node,
7346 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7347 tree_cons (NULL_TREE, integer_type_node,
7349 tree v8qi_ftype_v8qi_di
7350 = build_function_type (V8QI_type_node,
7351 tree_cons (NULL_TREE, V8QI_type_node,
7352 tree_cons (NULL_TREE,
7353 long_long_integer_type_node,
7355 tree v4hi_ftype_v4hi_di
7356 = build_function_type (V4HI_type_node,
7357 tree_cons (NULL_TREE, V4HI_type_node,
7358 tree_cons (NULL_TREE,
7359 long_long_integer_type_node,
7361 tree v2si_ftype_v2si_di
7362 = build_function_type (V2SI_type_node,
7363 tree_cons (NULL_TREE, V2SI_type_node,
7364 tree_cons (NULL_TREE,
7365 long_long_integer_type_node,
7367 tree void_ftype_void
7368 = build_function_type (void_type_node, endlink);
7369 tree void_ftype_pchar_int
7370 = build_function_type (void_type_node,
7371 tree_cons (NULL_TREE, pchar_type_node,
7372 tree_cons (NULL_TREE, integer_type_node,
7374 tree void_ftype_unsigned
7375 = build_function_type (void_type_node,
7376 tree_cons (NULL_TREE, unsigned_type_node,
7378 tree unsigned_ftype_void
7379 = build_function_type (unsigned_type_node, endlink);
7381 = build_function_type (long_long_unsigned_type_node, endlink);
7383 = build_function_type (intTI_type_node, endlink);
7384 tree v2si_ftype_v4sf
7385 = build_function_type (V2SI_type_node,
7386 tree_cons (NULL_TREE, V4SF_type_node,
7389 tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
7390 tree_cons (NULL_TREE, V8QI_type_node,
7391 tree_cons (NULL_TREE,
7394 tree void_ftype_v8qi_v8qi_pchar
7395 = build_function_type (void_type_node, maskmovq_args);
7396 tree v4sf_ftype_pfloat
7397 = build_function_type (V4SF_type_node,
7398 tree_cons (NULL_TREE, pfloat_type_node,
7400 tree v4sf_ftype_float
7401 = build_function_type (V4SF_type_node,
7402 tree_cons (NULL_TREE, float_type_node,
7404 tree v4sf_ftype_float_float_float_float
7405 = build_function_type (V4SF_type_node,
7406 tree_cons (NULL_TREE, float_type_node,
7407 tree_cons (NULL_TREE, float_type_node,
7408 tree_cons (NULL_TREE,
7410 tree_cons (NULL_TREE,
7413 /* @@@ the type is bogus */
7414 tree v4sf_ftype_v4sf_pv2si
7415 = build_function_type (V4SF_type_node,
7416 tree_cons (NULL_TREE, V4SF_type_node,
7417 tree_cons (NULL_TREE, pv2si_type_node,
7419 tree v4sf_ftype_pv2si_v4sf
7420 = build_function_type (V4SF_type_node,
7421 tree_cons (NULL_TREE, V4SF_type_node,
7422 tree_cons (NULL_TREE, pv2si_type_node,
7424 tree void_ftype_pfloat_v4sf
7425 = build_function_type (void_type_node,
7426 tree_cons (NULL_TREE, pfloat_type_node,
7427 tree_cons (NULL_TREE, V4SF_type_node,
7429 tree void_ftype_pdi_di
7430 = build_function_type (void_type_node,
7431 tree_cons (NULL_TREE, pdi_type_node,
7432 tree_cons (NULL_TREE,
7433 long_long_unsigned_type_node,
7435 /* Normal vector unops. */
7436 tree v4sf_ftype_v4sf
7437 = build_function_type (V4SF_type_node,
7438 tree_cons (NULL_TREE, V4SF_type_node,
7441 /* Normal vector binops. */
7442 tree v4sf_ftype_v4sf_v4sf
7443 = build_function_type (V4SF_type_node,
7444 tree_cons (NULL_TREE, V4SF_type_node,
7445 tree_cons (NULL_TREE, V4SF_type_node,
7447 tree v8qi_ftype_v8qi_v8qi
7448 = build_function_type (V8QI_type_node,
7449 tree_cons (NULL_TREE, V8QI_type_node,
7450 tree_cons (NULL_TREE, V8QI_type_node,
7452 tree v4hi_ftype_v4hi_v4hi
7453 = build_function_type (V4HI_type_node,
7454 tree_cons (NULL_TREE, V4HI_type_node,
7455 tree_cons (NULL_TREE, V4HI_type_node,
7457 tree v2si_ftype_v2si_v2si
7458 = build_function_type (V2SI_type_node,
7459 tree_cons (NULL_TREE, V2SI_type_node,
7460 tree_cons (NULL_TREE, V2SI_type_node,
7463 = build_function_type (intTI_type_node,
7464 tree_cons (NULL_TREE, intTI_type_node,
7465 tree_cons (NULL_TREE, intTI_type_node,
7468 = build_function_type (long_long_unsigned_type_node,
7469 tree_cons (NULL_TREE, long_long_unsigned_type_node,
7470 tree_cons (NULL_TREE,
7471 long_long_unsigned_type_node,
7474 /* Add all builtins that are more or less simple operations on two
7476 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
7478 /* Use one of the operands; the target can have a different mode for
7479 mask-generating compares. */
7480 enum machine_mode mode;
7485 mode = insn_data[d->icode].operand[1].mode;
7487 if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
7493 type = v4sf_ftype_v4sf_v4sf;
7496 type = v8qi_ftype_v8qi_v8qi;
7499 type = v4hi_ftype_v4hi_v4hi;
7502 type = v2si_ftype_v2si_v2si;
7505 type = ti_ftype_ti_ti;
7508 type = di_ftype_di_di;
7515 /* Override for comparisons. */
7516 if (d->icode == CODE_FOR_maskcmpv4sf3
7517 || d->icode == CODE_FOR_maskncmpv4sf3
7518 || d->icode == CODE_FOR_vmmaskcmpv4sf3
7519 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
7520 type = v4si_ftype_v4sf_v4sf;
7522 def_builtin (d->name, type, d->code);
7525 /* Add the remaining MMX insns with somewhat more complicated types. */
7526 def_builtin ("__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
7527 def_builtin ("__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
7528 def_builtin ("__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
7529 def_builtin ("__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
7530 def_builtin ("__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
7531 def_builtin ("__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
7532 def_builtin ("__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
7533 def_builtin ("__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
7534 def_builtin ("__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
7536 def_builtin ("__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
7537 def_builtin ("__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
7538 def_builtin ("__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
7540 def_builtin ("__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
7541 def_builtin ("__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
7543 def_builtin ("__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
7544 def_builtin ("__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
7546 /* Everything beyond this point is SSE only. */
7550 /* comi/ucomi insns. */
7551 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
7552 def_builtin (d->name, int_ftype_v4sf_v4sf, d->code);
7554 def_builtin ("__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
7555 def_builtin ("__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
7556 def_builtin ("__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
7558 def_builtin ("__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
7559 def_builtin ("__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
7560 def_builtin ("__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
7561 def_builtin ("__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
7562 def_builtin ("__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
7563 def_builtin ("__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
7565 def_builtin ("__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
7566 def_builtin ("__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
7568 def_builtin ("__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
7570 def_builtin ("__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
7571 def_builtin ("__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
7572 def_builtin ("__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
7573 def_builtin ("__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
7574 def_builtin ("__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
7575 def_builtin ("__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
7577 def_builtin ("__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
7578 def_builtin ("__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
7579 def_builtin ("__builtin_ia32_storehps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
7580 def_builtin ("__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
7582 def_builtin ("__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
7583 def_builtin ("__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
7584 def_builtin ("__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
7585 def_builtin ("__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
7587 def_builtin ("__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
7588 def_builtin ("__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
7590 def_builtin ("__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
7592 def_builtin ("__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
7593 def_builtin ("__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
7594 def_builtin ("__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
7595 def_builtin ("__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
7596 def_builtin ("__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
7597 def_builtin ("__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
7599 def_builtin ("__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
7601 /* Composite intrinsics. */
7602 def_builtin ("__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
7603 def_builtin ("__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
7604 def_builtin ("__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
7605 def_builtin ("__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
7606 def_builtin ("__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
7607 def_builtin ("__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
7608 def_builtin ("__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
7611 /* Errors in the source file can cause expand_expr to return const0_rtx
7612 where we expect a vector. To avoid crashing, use one of the vector
7613 clear instructions. */
7615 safe_vector_operand (x, mode)
7617 enum machine_mode mode;
7619 if (x != const0_rtx)
7621 x = gen_reg_rtx (mode);
7623 if (VALID_MMX_REG_MODE (mode))
7624 emit_insn (gen_mmx_clrdi (mode == DImode ? x
7625 : gen_rtx_SUBREG (DImode, x, 0)));
7627 emit_insn (gen_sse_clrti (mode == TImode ? x
7628 : gen_rtx_SUBREG (TImode, x, 0)));
7632 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
7635 ix86_expand_binop_builtin (icode, arglist, target)
7636 enum insn_code icode;
7641 tree arg0 = TREE_VALUE (arglist);
7642 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7643 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7644 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7645 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7646 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7647 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
7649 if (VECTOR_MODE_P (mode0))
7650 op0 = safe_vector_operand (op0, mode0);
7651 if (VECTOR_MODE_P (mode1))
7652 op1 = safe_vector_operand (op1, mode1);
7655 || GET_MODE (target) != tmode
7656 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7657 target = gen_reg_rtx (tmode);
7659 /* In case the insn wants input operands in modes different from
7660 the result, abort. */
7661 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
7664 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7665 op0 = copy_to_mode_reg (mode0, op0);
7666 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7667 op1 = copy_to_mode_reg (mode1, op1);
7669 pat = GEN_FCN (icode) (target, op0, op1);
7676 /* Subroutine of ix86_expand_builtin to take care of stores. */
7679 ix86_expand_store_builtin (icode, arglist, shuffle)
7680 enum insn_code icode;
7685 tree arg0 = TREE_VALUE (arglist);
7686 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7687 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7688 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7689 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
7690 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
7692 if (VECTOR_MODE_P (mode1))
7693 op1 = safe_vector_operand (op1, mode1);
7695 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7696 if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
7697 op1 = copy_to_mode_reg (mode1, op1);
7699 emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
7700 pat = GEN_FCN (icode) (op0, op1);
7706 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
7709 ix86_expand_unop_builtin (icode, arglist, target, do_load)
7710 enum insn_code icode;
7716 tree arg0 = TREE_VALUE (arglist);
7717 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7718 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7719 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7722 || GET_MODE (target) != tmode
7723 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7724 target = gen_reg_rtx (tmode);
7726 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
7729 if (VECTOR_MODE_P (mode0))
7730 op0 = safe_vector_operand (op0, mode0);
7732 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7733 op0 = copy_to_mode_reg (mode0, op0);
7736 pat = GEN_FCN (icode) (target, op0);
7743 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
7744 sqrtss, rsqrtss, rcpss. */
7747 ix86_expand_unop1_builtin (icode, arglist, target)
7748 enum insn_code icode;
7753 tree arg0 = TREE_VALUE (arglist);
7754 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7755 enum machine_mode tmode = insn_data[icode].operand[0].mode;
7756 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
7759 || GET_MODE (target) != tmode
7760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7761 target = gen_reg_rtx (tmode);
7763 if (VECTOR_MODE_P (mode0))
7764 op0 = safe_vector_operand (op0, mode0);
7766 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7767 op0 = copy_to_mode_reg (mode0, op0);
7769 pat = GEN_FCN (icode) (target, op0, op0);
7776 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
7779 ix86_expand_sse_compare (d, arglist, target)
7780 struct builtin_description *d;
7785 tree arg0 = TREE_VALUE (arglist);
7786 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7787 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7788 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7790 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
7791 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
7792 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
7793 enum rtx_code comparison = d->comparison;
7795 if (VECTOR_MODE_P (mode0))
7796 op0 = safe_vector_operand (op0, mode0);
7797 if (VECTOR_MODE_P (mode1))
7798 op1 = safe_vector_operand (op1, mode1);
7800 /* Swap operands if we have a comparison that isn't available in
7804 target = gen_reg_rtx (tmode);
7805 emit_move_insn (target, op1);
7808 comparison = swap_condition (comparison);
7811 || GET_MODE (target) != tmode
7812 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
7813 target = gen_reg_rtx (tmode);
7815 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
7816 op0 = copy_to_mode_reg (mode0, op0);
7817 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
7818 op1 = copy_to_mode_reg (mode1, op1);
7820 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7821 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
7828 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
7831 ix86_expand_sse_comi (d, arglist, target)
7832 struct builtin_description *d;
7837 tree arg0 = TREE_VALUE (arglist);
7838 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7839 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7840 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7842 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
7843 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
7844 enum rtx_code comparison = d->comparison;
7846 if (VECTOR_MODE_P (mode0))
7847 op0 = safe_vector_operand (op0, mode0);
7848 if (VECTOR_MODE_P (mode1))
7849 op1 = safe_vector_operand (op1, mode1);
7851 /* Swap operands if we have a comparison that isn't available in
7858 comparison = swap_condition (comparison);
7861 target = gen_reg_rtx (SImode);
7862 emit_move_insn (target, const0_rtx);
7863 target = gen_rtx_SUBREG (QImode, target, 0);
7865 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
7866 op0 = copy_to_mode_reg (mode0, op0);
7867 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
7868 op1 = copy_to_mode_reg (mode1, op1);
7870 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
7871 pat = GEN_FCN (d->icode) (op0, op1, op2);
7875 emit_insn (gen_setcc_2 (target, op2));
7880 /* Expand an expression EXP that calls a built-in function,
7881 with result going to TARGET if that's convenient
7882 (and in mode MODE if that's convenient).
7883 SUBTARGET may be used as the target for computing one of EXP's operands.
7884 IGNORE is nonzero if the value is to be ignored. */
7887 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
7890 rtx subtarget ATTRIBUTE_UNUSED;
7891 enum machine_mode mode ATTRIBUTE_UNUSED;
7892 int ignore ATTRIBUTE_UNUSED;
7894 struct builtin_description *d;
7896 enum insn_code icode;
7897 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
7898 tree arglist = TREE_OPERAND (exp, 1);
7899 tree arg0, arg1, arg2, arg3;
7900 rtx op0, op1, op2, pat;
7901 enum machine_mode tmode, mode0, mode1, mode2;
7902 int fcode = DECL_FUNCTION_CODE (fndecl);
7906 case IX86_BUILTIN_EMMS:
7907 emit_insn (gen_emms ());
7910 case IX86_BUILTIN_SFENCE:
7911 emit_insn (gen_sfence ());
7914 case IX86_BUILTIN_M_FROM_INT:
7915 target = gen_reg_rtx (DImode);
7916 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7917 emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
7920 case IX86_BUILTIN_M_TO_INT:
7921 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
7922 op0 = copy_to_mode_reg (DImode, op0);
7923 target = gen_reg_rtx (SImode);
7924 emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
7927 case IX86_BUILTIN_PEXTRW:
7928 icode = CODE_FOR_mmx_pextrw;
7929 arg0 = TREE_VALUE (arglist);
7930 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7931 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7932 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7933 tmode = insn_data[icode].operand[0].mode;
7934 mode0 = insn_data[icode].operand[1].mode;
7935 mode1 = insn_data[icode].operand[2].mode;
7937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7938 op0 = copy_to_mode_reg (mode0, op0);
7939 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7941 /* @@@ better error message */
7942 error ("selector must be an immediate");
7946 || GET_MODE (target) != tmode
7947 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7948 target = gen_reg_rtx (tmode);
7949 pat = GEN_FCN (icode) (target, op0, op1);
7955 case IX86_BUILTIN_PINSRW:
7956 icode = CODE_FOR_mmx_pinsrw;
7957 arg0 = TREE_VALUE (arglist);
7958 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7959 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7960 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7961 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7962 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
7963 tmode = insn_data[icode].operand[0].mode;
7964 mode0 = insn_data[icode].operand[1].mode;
7965 mode1 = insn_data[icode].operand[2].mode;
7966 mode2 = insn_data[icode].operand[3].mode;
7968 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
7969 op0 = copy_to_mode_reg (mode0, op0);
7970 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
7971 op1 = copy_to_mode_reg (mode1, op1);
7972 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
7974 /* @@@ better error message */
7975 error ("selector must be an immediate");
7979 || GET_MODE (target) != tmode
7980 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
7981 target = gen_reg_rtx (tmode);
7982 pat = GEN_FCN (icode) (target, op0, op1, op2);
7988 case IX86_BUILTIN_MASKMOVQ:
7989 icode = CODE_FOR_mmx_maskmovq;
7990 /* Note the arg order is different from the operand order. */
7991 arg1 = TREE_VALUE (arglist);
7992 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
7993 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7994 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
7995 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
7996 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
7997 mode0 = insn_data[icode].operand[0].mode;
7998 mode1 = insn_data[icode].operand[1].mode;
7999 mode2 = insn_data[icode].operand[2].mode;
8001 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8002 op0 = copy_to_mode_reg (mode0, op0);
8003 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
8004 op1 = copy_to_mode_reg (mode1, op1);
8005 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
8006 op2 = copy_to_mode_reg (mode2, op2);
8007 pat = GEN_FCN (icode) (op0, op1, op2);
8013 case IX86_BUILTIN_SQRTSS:
8014 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
8015 case IX86_BUILTIN_RSQRTSS:
8016 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
8017 case IX86_BUILTIN_RCPSS:
8018 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
8020 case IX86_BUILTIN_LOADAPS:
8021 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
8023 case IX86_BUILTIN_LOADUPS:
8024 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
8026 case IX86_BUILTIN_STOREAPS:
8027 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
8028 case IX86_BUILTIN_STOREUPS:
8029 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
8031 case IX86_BUILTIN_LOADSS:
8032 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
8034 case IX86_BUILTIN_STORESS:
8035 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
8037 case IX86_BUILTIN_LOADHPS:
8038 case IX86_BUILTIN_LOADLPS:
8039 icode = (fcode == IX86_BUILTIN_LOADHPS
8040 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8041 arg0 = TREE_VALUE (arglist);
8042 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8043 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8044 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8045 tmode = insn_data[icode].operand[0].mode;
8046 mode0 = insn_data[icode].operand[1].mode;
8047 mode1 = insn_data[icode].operand[2].mode;
8049 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8050 op0 = copy_to_mode_reg (mode0, op0);
8051 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
8053 || GET_MODE (target) != tmode
8054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8055 target = gen_reg_rtx (tmode);
8056 pat = GEN_FCN (icode) (target, op0, op1);
8062 case IX86_BUILTIN_STOREHPS:
8063 case IX86_BUILTIN_STORELPS:
8064 icode = (fcode == IX86_BUILTIN_STOREHPS
8065 ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
8066 arg0 = TREE_VALUE (arglist);
8067 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8068 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8069 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8070 mode0 = insn_data[icode].operand[1].mode;
8071 mode1 = insn_data[icode].operand[2].mode;
8073 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8074 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8075 op1 = copy_to_mode_reg (mode1, op1);
8077 pat = GEN_FCN (icode) (op0, op0, op1);
8083 case IX86_BUILTIN_MOVNTPS:
8084 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
8085 case IX86_BUILTIN_MOVNTQ:
8086 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
8088 case IX86_BUILTIN_LDMXCSR:
8089 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
8090 target = assign_386_stack_local (SImode, 0);
8091 emit_move_insn (target, op0);
8092 emit_insn (gen_ldmxcsr (target));
8095 case IX86_BUILTIN_STMXCSR:
8096 target = assign_386_stack_local (SImode, 0);
8097 emit_insn (gen_stmxcsr (target));
8098 return copy_to_mode_reg (SImode, target);
8100 case IX86_BUILTIN_PREFETCH:
8101 icode = CODE_FOR_prefetch;
8102 arg0 = TREE_VALUE (arglist);
8103 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8104 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8105 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8106 mode0 = insn_data[icode].operand[1].mode;
8107 mode1 = insn_data[icode].operand[2].mode;
8109 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
8110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8112 /* @@@ better error message */
8113 error ("selector must be an immediate");
8117 pat = GEN_FCN (icode) (op0, op1);
8123 case IX86_BUILTIN_SHUFPS:
8124 icode = CODE_FOR_sse_shufps;
8125 arg0 = TREE_VALUE (arglist);
8126 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8127 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8128 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8129 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8130 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
8131 tmode = insn_data[icode].operand[0].mode;
8132 mode0 = insn_data[icode].operand[1].mode;
8133 mode1 = insn_data[icode].operand[2].mode;
8134 mode2 = insn_data[icode].operand[3].mode;
8136 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8137 op0 = copy_to_mode_reg (mode0, op0);
8138 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
8139 op1 = copy_to_mode_reg (mode1, op1);
8140 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
8142 /* @@@ better error message */
8143 error ("mask must be an immediate");
8147 || GET_MODE (target) != tmode
8148 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8149 target = gen_reg_rtx (tmode);
8150 pat = GEN_FCN (icode) (target, op0, op1, op2);
8156 case IX86_BUILTIN_PSHUFW:
8157 icode = CODE_FOR_mmx_pshufw;
8158 arg0 = TREE_VALUE (arglist);
8159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
8161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
8162 tmode = insn_data[icode].operand[0].mode;
8163 mode0 = insn_data[icode].operand[2].mode;
8164 mode1 = insn_data[icode].operand[3].mode;
8166 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
8167 op0 = copy_to_mode_reg (mode0, op0);
8168 if (! (*insn_data[icode].operand[3].predicate) (op1, mode1))
8170 /* @@@ better error message */
8171 error ("mask must be an immediate");
8175 || GET_MODE (target) != tmode
8176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8177 target = gen_reg_rtx (tmode);
8178 pat = GEN_FCN (icode) (target, target, op0, op1);
8184 /* Composite intrinsics. */
8185 case IX86_BUILTIN_SETPS1:
8186 target = assign_386_stack_local (SFmode, 0);
8187 arg0 = TREE_VALUE (arglist);
8188 emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
8189 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8190 op0 = gen_reg_rtx (V4SFmode);
8191 emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
8192 XEXP (target, 0))));
8193 emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
8196 case IX86_BUILTIN_SETPS:
8197 target = assign_386_stack_local (V4SFmode, 0);
8198 op0 = change_address (target, SFmode, XEXP (target, 0));
8199 arg0 = TREE_VALUE (arglist);
8200 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8201 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8202 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
8203 emit_move_insn (op0,
8204 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
8205 emit_move_insn (adj_offsettable_operand (op0, 4),
8206 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
8207 emit_move_insn (adj_offsettable_operand (op0, 8),
8208 expand_expr (arg2, NULL_RTX, VOIDmode, 0));
8209 emit_move_insn (adj_offsettable_operand (op0, 12),
8210 expand_expr (arg3, NULL_RTX, VOIDmode, 0));
8211 op0 = gen_reg_rtx (V4SFmode);
8212 emit_insn (gen_sse_movaps (op0, target));
8215 case IX86_BUILTIN_CLRPS:
8216 target = gen_reg_rtx (TImode);
8217 emit_insn (gen_sse_clrti (target));
8220 case IX86_BUILTIN_LOADRPS:
8221 target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
8222 gen_reg_rtx (V4SFmode), 1);
8223 emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
8226 case IX86_BUILTIN_LOADPS1:
8227 target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
8228 gen_reg_rtx (V4SFmode), 1);
8229 emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
8232 case IX86_BUILTIN_STOREPS1:
8233 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
8234 case IX86_BUILTIN_STORERPS:
8235 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
8237 case IX86_BUILTIN_MMX_ZERO:
8238 target = gen_reg_rtx (DImode);
8239 emit_insn (gen_mmx_clrdi (target));
8246 for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
8247 if (d->code == fcode)
8249 /* Compares are treated specially. */
8250 if (d->icode == CODE_FOR_maskcmpv4sf3
8251 || d->icode == CODE_FOR_vmmaskcmpv4sf3
8252 || d->icode == CODE_FOR_maskncmpv4sf3
8253 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
8254 return ix86_expand_sse_compare (d, arglist, target);
8256 return ix86_expand_binop_builtin (d->icode, arglist, target);
8259 for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
8260 if (d->code == fcode)
8261 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
8263 for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
8264 if (d->code == fcode)
8265 return ix86_expand_sse_comi (d, arglist, target);
8268 /* @@@ Should really do something sensible here. */